diff --git a/8224675-Late-GC-barrier-insertion-for-ZGC.patch b/8224675-Late-GC-barrier-insertion-for-ZGC.patch
index 95dc4c9534025966ef3685ce9256373b8bcb7e46..50e15d1eb5affe295e471492566ff76974cabbb4 100644
--- a/8224675-Late-GC-barrier-insertion-for-ZGC.patch
+++ b/8224675-Late-GC-barrier-insertion-for-ZGC.patch
@@ -2812,7 +2812,7 @@ index 5454d1350..d7eb3996b 100644
 --- a/src/hotspot/share/opto/loopnode.cpp
 +++ b/src/hotspot/share/opto/loopnode.cpp
 @@ -3017,9 +3018,7 @@ void PhaseIdealLoop::build_and_optimize() {
-   build_loop_late( visited, worklist, nstack );
+   if (C->failing()) { return; }
 
    if (_verify_only) {
 -    // restore major progress flag
diff --git a/2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch b/Add-riscv64-support.patch
similarity index 64%
rename from 2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch
rename to Add-riscv64-support.patch
index 13815b71215d789e75976982277c6f8b05762627..59017ae02c5b6c185a441f428acd08dfc203eb6e 100644
--- a/2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch
+++ b/Add-riscv64-support.patch
@@ -1,228 +1,198 @@
-From 77eaf1804b7e56ed17a6c3a478e6ee9df89ea024 Mon Sep 17 00:00:00 2001
-From: misaka00251 <liuxin@iscas.ac.cn>
-Date: Wed, 9 Aug 2023 02:24:23 +0800
-Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch)
+From dfa792539047c39d0d25244265bc8368163d5768 Mon Sep 17 00:00:00 2001
+From: Fei Yang <fyang@openjdk.org>
+Date: Thu, 24 Mar 2022 09:22:46 +0000
+Subject: [PATCH 001/140] Cherry-picked JDK-8276799: initial load of RISC-V
+ backend (cannot pass compilation)
 
 ---
- make/autoconf/build-aux/config.sub            |     7 +
+ make/autoconf/build-aux/config.guess          |     2 +-
  make/autoconf/hotspot.m4                      |     3 +-
- make/autoconf/libraries.m4                    |     4 +-
- make/autoconf/platform.m4                     |    10 +-
- make/hotspot/gensrc/GensrcAdlc.gmk            |    16 +-
- src/hotspot/cpu/aarch64/aarch64.ad            |    40 +-
- .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp   |     4 +-
- .../cpu/aarch64/macroAssembler_aarch64.cpp    |    64 +
- .../cpu/aarch64/macroAssembler_aarch64.hpp    |     3 +
- src/hotspot/cpu/arm/arm.ad                    |    10 +-
- src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp   |     5 +-
- src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp   |     5 +-
- src/hotspot/cpu/ppc/ppc.ad                    |    16 +-
- .../cpu/riscv/abstractInterpreter_riscv.cpp   |   185 +
- src/hotspot/cpu/riscv/assembler_riscv.cpp     |   365 +
- src/hotspot/cpu/riscv/assembler_riscv.hpp     |  2004 +++
+ make/autoconf/libraries.m4                    |     8 +-
+ make/autoconf/platform.m4                     |     6 +-
+ make/hotspot/gensrc/GensrcAdlc.gmk            |     9 +-
+ .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp   |     6 +-
+ src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp   |     7 +-
+ src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp   |     8 +-
+ .../cpu/riscv/abstractInterpreter_riscv.cpp   |   177 +
+ src/hotspot/cpu/riscv/assembler_riscv.cpp     |   372 +
+ src/hotspot/cpu/riscv/assembler_riscv.hpp     |  3047 +++++
  .../cpu/riscv/assembler_riscv.inline.hpp      |    47 +
- src/hotspot/cpu/riscv/bytes_riscv.hpp         |   169 +
- src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp  |   352 +
- src/hotspot/cpu/riscv/c1_Defs_riscv.hpp       |    85 +
- .../cpu/riscv/c1_FpuStackSim_riscv.cpp        |    31 +
- .../cpu/riscv/c1_FpuStackSim_riscv.hpp        |    33 +
- src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp   |   391 +
- src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp   |   149 +
- .../cpu/riscv/c1_LIRAssembler_arith_riscv.cpp |   287 +
- .../cpu/riscv/c1_LIRAssembler_arith_riscv.hpp |    36 +
- .../riscv/c1_LIRAssembler_arraycopy_riscv.cpp |   387 +
- .../riscv/c1_LIRAssembler_arraycopy_riscv.hpp |    51 +
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |  2275 ++++
+ src/hotspot/cpu/riscv/bytes_riscv.hpp         |   167 +
+ src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp  |   353 +
+ src/hotspot/cpu/riscv/c1_Defs_riscv.hpp       |    84 +
+ .../cpu/riscv/c1_FpuStackSim_riscv.cpp        |    30 +
+ .../cpu/riscv/c1_FpuStackSim_riscv.hpp        |    32 +
+ src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp   |   388 +
+ src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp   |   148 +
+ .../cpu/riscv/c1_LIRAssembler_arith_riscv.cpp |   281 +
+ .../cpu/riscv/c1_LIRAssembler_arith_riscv.hpp |    37 +
+ .../riscv/c1_LIRAssembler_arraycopy_riscv.cpp |   388 +
+ .../riscv/c1_LIRAssembler_arraycopy_riscv.hpp |    52 +
+ .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |  2267 ++++
  .../cpu/riscv/c1_LIRAssembler_riscv.hpp       |   132 +
- .../cpu/riscv/c1_LIRGenerator_riscv.cpp       |  1083 ++
+ .../cpu/riscv/c1_LIRGenerator_riscv.cpp       |  1075 ++
  src/hotspot/cpu/riscv/c1_LIR_riscv.cpp        |    55 +
  src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp |    33 +
- src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp |    85 +
- .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |   441 +
- .../cpu/riscv/c1_MacroAssembler_riscv.hpp     |   121 +
- src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp   |  1206 ++
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp    |    72 +
- src/hotspot/cpu/riscv/c2_globals_riscv.hpp    |    91 +
+ src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp |    83 +
+ .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |   432 +
+ .../cpu/riscv/c1_MacroAssembler_riscv.hpp     |   120 +
+ src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp   |  1172 ++
+ src/hotspot/cpu/riscv/c1_globals_riscv.hpp    |    65 +
+ .../cpu/riscv/c2_MacroAssembler_riscv.cpp     |  1646 +++
+ .../cpu/riscv/c2_MacroAssembler_riscv.hpp     |   193 +
+ src/hotspot/cpu/riscv/c2_globals_riscv.hpp    |    83 +
  src/hotspot/cpu/riscv/c2_init_riscv.cpp       |    38 +
+ .../riscv/c2_safepointPollStubTable_riscv.cpp |    47 +
  src/hotspot/cpu/riscv/codeBuffer_riscv.hpp    |    36 +
- src/hotspot/cpu/riscv/compiledIC_riscv.cpp    |   154 +
- src/hotspot/cpu/riscv/copy_riscv.hpp          |    60 +
- src/hotspot/cpu/riscv/depChecker_riscv.hpp    |    32 +
- src/hotspot/cpu/riscv/disassembler_riscv.hpp  |    37 +
- src/hotspot/cpu/riscv/frame_riscv.cpp         |   683 +
- src/hotspot/cpu/riscv/frame_riscv.hpp         |   200 +
- src/hotspot/cpu/riscv/frame_riscv.inline.hpp  |   257 +
- .../gc/g1/g1BarrierSetAssembler_riscv.cpp     |   479 +
+ src/hotspot/cpu/riscv/compiledIC_riscv.cpp    |   149 +
+ src/hotspot/cpu/riscv/copy_riscv.hpp          |   136 +
+ src/hotspot/cpu/riscv/disassembler_riscv.hpp  |    58 +
+ .../cpu/riscv/foreign_globals_riscv.cpp       |    44 +
+ .../cpu/riscv/foreign_globals_riscv.hpp       |    32 +
+ src/hotspot/cpu/riscv/frame_riscv.cpp         |   697 +
+ src/hotspot/cpu/riscv/frame_riscv.hpp         |   202 +
+ src/hotspot/cpu/riscv/frame_riscv.inline.hpp  |   248 +
+ .../gc/g1/g1BarrierSetAssembler_riscv.cpp     |   484 +
  .../gc/g1/g1BarrierSetAssembler_riscv.hpp     |    78 +
- .../gc/shared/barrierSetAssembler_riscv.cpp   |   226 +
- .../gc/shared/barrierSetAssembler_riscv.hpp   |    75 +
- .../cardTableBarrierSetAssembler_riscv.cpp    |   120 +
- .../cardTableBarrierSetAssembler_riscv.hpp    |    43 +
- .../modRefBarrierSetAssembler_riscv.cpp       |    54 +
+ .../cpu/riscv/gc/g1/g1Globals_riscv.hpp       |    31 +
+ .../gc/shared/barrierSetAssembler_riscv.cpp   |   302 +
+ .../gc/shared/barrierSetAssembler_riscv.hpp   |    79 +
+ .../gc/shared/barrierSetNMethod_riscv.cpp     |   171 +
+ .../cardTableBarrierSetAssembler_riscv.cpp    |   111 +
+ .../cardTableBarrierSetAssembler_riscv.hpp    |    42 +
+ .../modRefBarrierSetAssembler_riscv.cpp       |    55 +
  .../modRefBarrierSetAssembler_riscv.hpp       |    55 +
- .../c1/shenandoahBarrierSetC1_riscv.cpp       |   124 +
- .../shenandoahBarrierSetAssembler_riscv.cpp   |   743 ++
- .../shenandoahBarrierSetAssembler_riscv.hpp   |    92 +
- .../riscv/gc/shenandoah/shenandoah_riscv64.ad |   188 +
- .../cpu/riscv/globalDefinitions_riscv.hpp     |    44 +
- src/hotspot/cpu/riscv/globals_riscv.hpp       |   120 +
+ .../c1/shenandoahBarrierSetC1_riscv.cpp       |   117 +
+ .../shenandoahBarrierSetAssembler_riscv.cpp   |   712 ++
+ .../shenandoahBarrierSetAssembler_riscv.hpp   |    88 +
+ .../riscv/gc/shenandoah/shenandoah_riscv64.ad |   285 +
+ .../riscv/gc/z/zBarrierSetAssembler_riscv.cpp |   441 +
+ .../riscv/gc/z/zBarrierSetAssembler_riscv.hpp |   101 +
+ src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp |   212 +
+ src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp |    36 +
+ src/hotspot/cpu/riscv/gc/z/z_riscv64.ad       |   233 +
+ .../cpu/riscv/globalDefinitions_riscv.hpp     |    52 +
+ src/hotspot/cpu/riscv/globals_riscv.hpp       |    99 +
  src/hotspot/cpu/riscv/icBuffer_riscv.cpp      |    79 +
- src/hotspot/cpu/riscv/icache_riscv.cpp        |    61 +
+ src/hotspot/cpu/riscv/icache_riscv.cpp        |    51 +
  src/hotspot/cpu/riscv/icache_riscv.hpp        |    42 +
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp   |  1932 +++
- src/hotspot/cpu/riscv/interp_masm_riscv.hpp   |   283 +
- src/hotspot/cpu/riscv/interpreterRT_riscv.cpp |   296 +
+ src/hotspot/cpu/riscv/interp_masm_riscv.cpp   |  1940 +++
+ src/hotspot/cpu/riscv/interp_masm_riscv.hpp   |   285 +
+ src/hotspot/cpu/riscv/interpreterRT_riscv.cpp |   295 +
  src/hotspot/cpu/riscv/interpreterRT_riscv.hpp |    68 +
- .../cpu/riscv/javaFrameAnchor_riscv.hpp       |    89 +
- .../cpu/riscv/jniFastGetField_riscv.cpp       |   193 +
- src/hotspot/cpu/riscv/jniTypes_riscv.hpp      |   108 +
- .../cpu/riscv/macroAssembler_riscv.cpp        |  5861 +++++++++
- .../cpu/riscv/macroAssembler_riscv.hpp        |   975 ++
- .../cpu/riscv/macroAssembler_riscv.inline.hpp |    30 +
- src/hotspot/cpu/riscv/methodHandles_riscv.cpp |   440 +
- src/hotspot/cpu/riscv/methodHandles_riscv.hpp |    58 +
- src/hotspot/cpu/riscv/nativeInst_riscv.cpp    |   404 +
- src/hotspot/cpu/riscv/nativeInst_riscv.hpp    |   561 +
- src/hotspot/cpu/riscv/registerMap_riscv.hpp   |    46 +
- .../cpu/riscv/register_definitions_riscv.cpp  |   193 +
- src/hotspot/cpu/riscv/register_riscv.cpp      |    69 +
- src/hotspot/cpu/riscv/register_riscv.hpp      |   337 +
+ .../cpu/riscv/javaFrameAnchor_riscv.hpp       |    86 +
+ .../cpu/riscv/jniFastGetField_riscv.cpp       |   214 +
+ src/hotspot/cpu/riscv/jniTypes_riscv.hpp      |   106 +
+ .../cpu/riscv/macroAssembler_riscv.cpp        |  4016 ++++++
+ .../cpu/riscv/macroAssembler_riscv.hpp        |   858 ++
+ .../cpu/riscv/macroAssembler_riscv.inline.hpp |    31 +
+ src/hotspot/cpu/riscv/matcher_riscv.hpp       |   169 +
+ src/hotspot/cpu/riscv/methodHandles_riscv.cpp |   461 +
+ src/hotspot/cpu/riscv/methodHandles_riscv.hpp |    57 +
+ src/hotspot/cpu/riscv/nativeInst_riscv.cpp    |   429 +
+ src/hotspot/cpu/riscv/nativeInst_riscv.hpp    |   572 +
+ src/hotspot/cpu/riscv/registerMap_riscv.cpp   |    45 +
+ src/hotspot/cpu/riscv/registerMap_riscv.hpp   |    43 +
+ src/hotspot/cpu/riscv/register_riscv.cpp      |    73 +
+ src/hotspot/cpu/riscv/register_riscv.hpp      |   324 +
  src/hotspot/cpu/riscv/relocInfo_riscv.cpp     |   113 +
- src/hotspot/cpu/riscv/relocInfo_riscv.hpp     |    45 +
- src/hotspot/cpu/riscv/riscv.ad                | 10685 ++++++++++++++++
- src/hotspot/cpu/riscv/riscv_b.ad              |   605 +
- src/hotspot/cpu/riscv/riscv_v.ad              |  1723 +++
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |  2738 ++++
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp |  3743 ++++++
- src/hotspot/cpu/riscv/stubRoutines_riscv.cpp  |    60 +
- src/hotspot/cpu/riscv/stubRoutines_riscv.hpp  |   179 +
- .../templateInterpreterGenerator_riscv.cpp    |  1841 +++
- src/hotspot/cpu/riscv/templateTable_riscv.cpp |  4028 ++++++
+ src/hotspot/cpu/riscv/relocInfo_riscv.hpp     |    44 +
+ src/hotspot/cpu/riscv/riscv.ad                | 10611 ++++++++++++++++
+ src/hotspot/cpu/riscv/riscv_b.ad              |   527 +
+ src/hotspot/cpu/riscv/riscv_v.ad              |  2065 +++
+ src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |  2761 ++++
+ src/hotspot/cpu/riscv/stubGenerator_riscv.cpp |  3864 ++++++
+ src/hotspot/cpu/riscv/stubRoutines_riscv.cpp  |    58 +
+ src/hotspot/cpu/riscv/stubRoutines_riscv.hpp  |   161 +
+ .../templateInterpreterGenerator_riscv.cpp    |  1794 +++
+ src/hotspot/cpu/riscv/templateTable_riscv.cpp |  3951 ++++++
  src/hotspot/cpu/riscv/templateTable_riscv.hpp |    42 +
- src/hotspot/cpu/riscv/vmStructs_riscv.hpp     |    43 +
- .../cpu/riscv/vm_version_ext_riscv.cpp        |    91 +
- .../cpu/riscv/vm_version_ext_riscv.hpp        |    55 +
- src/hotspot/cpu/riscv/vm_version_riscv.cpp    |   190 +
- src/hotspot/cpu/riscv/vm_version_riscv.hpp    |    65 +
- src/hotspot/cpu/riscv/vmreg_riscv.cpp         |    60 +
- src/hotspot/cpu/riscv/vmreg_riscv.hpp         |    64 +
- src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp  |    47 +
+ .../riscv/universalNativeInvoker_riscv.cpp    |    33 +
+ .../cpu/riscv/universalUpcallHandle_riscv.cpp |    42 +
+ src/hotspot/cpu/riscv/vmStructs_riscv.hpp     |    42 +
+ src/hotspot/cpu/riscv/vm_version_riscv.cpp    |   230 +
+ src/hotspot/cpu/riscv/vm_version_riscv.hpp    |    72 +
+ src/hotspot/cpu/riscv/vmreg_riscv.cpp         |    64 +
+ src/hotspot/cpu/riscv/vmreg_riscv.hpp         |    68 +
+ src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp  |    46 +
  src/hotspot/cpu/riscv/vtableStubs_riscv.cpp   |   260 +
- src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp |     5 +-
- src/hotspot/cpu/s390/s390.ad                  |    16 +-
- src/hotspot/cpu/sparc/sparc.ad                |    10 +-
- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp   |     5 +-
- src/hotspot/cpu/x86/macroAssembler_x86.cpp    |    93 +
- src/hotspot/cpu/x86/macroAssembler_x86.hpp    |     2 +
- src/hotspot/cpu/x86/x86.ad                    |    14 +-
- src/hotspot/cpu/x86/x86_32.ad                 |    19 +-
- src/hotspot/cpu/x86/x86_64.ad                 |    24 +-
- src/hotspot/os/linux/os_linux.cpp             |    11 +-
- .../os_cpu/linux_riscv/atomic_linux_riscv.hpp |   113 +
- .../linux_riscv/bytes_linux_riscv.inline.hpp  |    44 +
- .../linux_riscv/copy_linux_riscv.inline.hpp   |   116 +
+ src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp |     9 +-
+ src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp   |     7 +-
+ src/hotspot/os/linux/os_linux.cpp             |     2 +
+ .../linux_riscv/assembler_linux_riscv.cpp     |    26 +
+ .../os_cpu/linux_riscv/atomic_linux_riscv.hpp |   134 +
+ .../os_cpu/linux_riscv/bytes_linux_riscv.hpp  |    45 +
+ .../os_cpu/linux_riscv/copy_linux_riscv.hpp   |    31 +
+ .../linux_riscv/gc/z/zSyscall_linux_riscv.hpp |    42 +
  .../linux_riscv/globals_linux_riscv.hpp       |    43 +
- .../linux_riscv/orderAccess_linux_riscv.hpp   |    73 +
- .../os_cpu/linux_riscv/os_linux_riscv.cpp     |   628 +
- .../os_cpu/linux_riscv/os_linux_riscv.hpp     |    40 +
+ .../linux_riscv/orderAccess_linux_riscv.hpp   |    63 +
+ .../os_cpu/linux_riscv/os_linux_riscv.cpp     |   466 +
+ .../os_cpu/linux_riscv/os_linux_riscv.hpp     |    59 +
  .../prefetch_linux_riscv.inline.hpp           |    38 +
- .../os_cpu/linux_riscv/thread_linux_riscv.cpp |   103 +
- .../os_cpu/linux_riscv/thread_linux_riscv.hpp |    67 +
+ .../os_cpu/linux_riscv/thread_linux_riscv.cpp |    92 +
+ .../os_cpu/linux_riscv/thread_linux_riscv.hpp |    48 +
  .../linux_riscv/vmStructs_linux_riscv.hpp     |    55 +
- .../linux_riscv/vm_version_linux_riscv.cpp    |   116 +
- src/hotspot/share/adlc/archDesc.cpp           |     5 +
- src/hotspot/share/adlc/formssel.cpp           |     2 +
- src/hotspot/share/c1/c1_LIR.cpp               |   113 +-
- src/hotspot/share/c1/c1_LIR.hpp               |   208 +-
+ .../linux_riscv/vm_version_linux_riscv.cpp    |   118 +
+ src/hotspot/share/c1/c1_LIR.cpp               |   112 +-
+ src/hotspot/share/c1/c1_LIR.hpp               |   209 +-
  src/hotspot/share/c1/c1_LIRAssembler.cpp      |    15 +-
- src/hotspot/share/c1/c1_LIRAssembler.hpp      |     4 +-
- src/hotspot/share/c1/c1_LinearScan.cpp        |    14 +-
- src/hotspot/share/classfile/vmSymbols.cpp     |     2 +
- src/hotspot/share/classfile/vmSymbols.hpp     |     1 +
- .../gc/shenandoah/shenandoahArguments.cpp     |     2 +-
+ src/hotspot/share/c1/c1_LIRAssembler.hpp      |     5 +-
+ src/hotspot/share/c1/c1_LinearScan.cpp        |    18 +-
+ .../gc/shenandoah/shenandoahArguments.cpp     |     4 +-
+ src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp   |     4 +-
  .../share/jfr/utilities/jfrBigEndian.hpp      |     2 +-
- src/hotspot/share/opto/c2compiler.cpp         |     1 +
- src/hotspot/share/opto/chaitin.cpp            |    90 +-
- src/hotspot/share/opto/chaitin.hpp            |    32 +-
- src/hotspot/share/opto/intrinsicnode.hpp      |     5 +-
- src/hotspot/share/opto/library_call.cpp       |    13 +-
- src/hotspot/share/opto/machnode.cpp           |     2 +-
- src/hotspot/share/opto/machnode.hpp           |     4 +
- src/hotspot/share/opto/matcher.cpp            |    41 +-
- src/hotspot/share/opto/matcher.hpp            |     6 +-
- src/hotspot/share/opto/node.cpp               |    21 +
- src/hotspot/share/opto/node.hpp               |     5 +
- src/hotspot/share/opto/opcodes.cpp            |     4 +-
- src/hotspot/share/opto/opcodes.hpp            |     2 +
- src/hotspot/share/opto/phase.cpp              |     2 +
- src/hotspot/share/opto/phase.hpp              |     1 +
- src/hotspot/share/opto/postaloc.cpp           |    53 +-
- src/hotspot/share/opto/regmask.cpp            |    46 +-
- src/hotspot/share/opto/regmask.hpp            |    10 +-
- src/hotspot/share/opto/superword.cpp          |     7 +-
- src/hotspot/share/opto/type.cpp               |    14 +-
- src/hotspot/share/opto/type.hpp               |    12 +-
- src/hotspot/share/opto/vectornode.cpp         |     4 +-
- .../share/runtime/abstract_vm_version.cpp     |    12 +-
+ src/hotspot/share/opto/regmask.hpp            |     2 +-
+ .../share/runtime/abstract_vm_version.cpp     |     3 +-
+ src/hotspot/share/runtime/synchronizer.cpp    |     2 +-
  src/hotspot/share/runtime/thread.hpp          |     2 +-
- src/hotspot/share/runtime/thread.inline.hpp   |     2 +-
- src/hotspot/share/utilities/debug.cpp         |     1 +
+ src/hotspot/share/runtime/thread.inline.hpp   |     4 +-
  src/hotspot/share/utilities/macros.hpp        |    26 +
- .../share/classes/java/lang/StringLatin1.java |     5 +
  .../native/libsaproc/LinuxDebuggerLocal.c     |    49 +-
- .../linux/native/libsaproc/libproc.h          |     2 +
- .../linux/native/libsaproc/ps_proc.c          |     4 +
- .../classes/sun/jvm/hotspot/HotSpotAgent.java |     4 +
+ .../linux/native/libsaproc/libproc.h          |     4 +-
+ .../classes/sun/jvm/hotspot/HotSpotAgent.java |     3 +
  .../debugger/MachineDescriptionRISCV64.java   |    40 +
- .../debugger/linux/LinuxCDebugger.java        |    11 +-
+ .../debugger/linux/LinuxCDebugger.java        |    13 +-
  .../linux/riscv64/LinuxRISCV64CFrame.java     |    90 +
  .../riscv64/LinuxRISCV64ThreadContext.java    |    48 +
- .../debugger/proc/ProcDebuggerLocal.java      |     6 +
  .../proc/riscv64/ProcRISCV64Thread.java       |    88 +
  .../riscv64/ProcRISCV64ThreadContext.java     |    48 +
  .../riscv64/ProcRISCV64ThreadFactory.java     |    46 +
  .../remote/riscv64/RemoteRISCV64Thread.java   |    55 +
  .../riscv64/RemoteRISCV64ThreadContext.java   |    48 +
  .../riscv64/RemoteRISCV64ThreadFactory.java   |    46 +
- .../riscv64/RISCV64ThreadContext.java         |   172 +
- .../sun/jvm/hotspot/runtime/Threads.java      |     3 +
- .../LinuxRISCV64JavaThreadPDAccess.java       |   132 +
+ .../debugger/risv64/RISCV64ThreadContext.java |   172 +
+ .../sun/jvm/hotspot/runtime/Threads.java      |     5 +-
+ .../LinuxRISCV64JavaThreadPDAccess.java       |   134 +
  .../riscv64/RISCV64CurrentFrameGuess.java     |   223 +
- .../hotspot/runtime/riscv64/RISCV64Frame.java |   554 +
- .../riscv64/RISCV64JavaCallWrapper.java       |    58 +
+ .../hotspot/runtime/riscv64/RISCV64Frame.java |   556 +
+ .../riscv64/RISCV64JavaCallWrapper.java       |    61 +
  .../runtime/riscv64/RISCV64RegisterMap.java   |    53 +
- .../jvm/hotspot/utilities/PlatformInfo.java   |     2 +-
- src/utils/hsdis/hsdis.c                       |     6 +-
- test/hotspot/jtreg/compiler/c2/TestBit.java   |     6 +-
- ...eSHA1IntrinsicsOptionOnUnsupportedCPU.java |     4 +
- ...HA256IntrinsicsOptionOnUnsupportedCPU.java |     4 +
- ...HA512IntrinsicsOptionOnUnsupportedCPU.java |     4 +
- .../cli/TestUseSHAOptionOnUnsupportedCPU.java |     4 +
- .../testcases/GenericTestCaseForOtherCPU.java |    10 +-
- ...nericTestCaseForUnsupportedRISCV64CPU.java |   102 +
- .../string/TestStringLatin1IndexOfChar.java   |   153 +
- .../loopopts/superword/ProdRed_Double.java    |     2 +-
- .../loopopts/superword/ProdRed_Float.java     |     2 +-
- .../loopopts/superword/ProdRed_Int.java       |     2 +-
- .../loopopts/superword/ReductionPerf.java     |     2 +-
- .../superword/SumRedAbsNeg_Double.java        |     2 +-
- .../superword/SumRedAbsNeg_Float.java         |     2 +-
- .../loopopts/superword/SumRedSqrt_Double.java |     2 +-
- .../loopopts/superword/SumRed_Double.java     |     2 +-
- .../loopopts/superword/SumRed_Float.java      |     2 +-
- .../loopopts/superword/SumRed_Int.java        |     2 +-
- .../argumentcorruption/CheckLongArgs.java     |     2 +-
- .../criticalnatives/lookup/LookUp.java        |     2 +-
- .../sha/predicate/IntrinsicPredicates.java    |     9 +-
- .../NMT/CheckForProperDetailStackTrace.java   |     3 +-
- .../ReservedStack/ReservedStackTest.java      |     3 +-
- test/hotspot/jtreg/test_env.sh                |     5 +
- ...stMutuallyExclusivePlatformPredicates.java |     3 +-
- .../nsk/jvmti/GetThreadInfo/thrinfo001.java   |     2 +-
- .../jdk/jfr/event/os/TestCPUInformation.java  |     5 +-
- test/lib/jdk/test/lib/Platform.java           |     5 +
- .../bench/java/lang/StringIndexOfChar.java    |   221 +
- 218 files changed, 57653 insertions(+), 221 deletions(-)
+ .../jvm/hotspot/utilities/PlatformInfo.java   |     4 +-
+ test/hotspot/jtreg/compiler/c2/TestBit.java   |     7 +-
+ ...eSHA1IntrinsicsOptionOnUnsupportedCPU.java |     5 +-
+ ...HA256IntrinsicsOptionOnUnsupportedCPU.java |     5 +-
+ ...HA512IntrinsicsOptionOnUnsupportedCPU.java |     5 +-
+ .../cli/TestUseSHAOptionOnUnsupportedCPU.java |     5 +-
+ .../testcases/GenericTestCaseForOtherCPU.java |    11 +-
+ ...nericTestCaseForUnsupportedRISCV64CPU.java |   115 +
+ .../loopopts/superword/ProdRed_Double.java    |     4 +-
+ .../loopopts/superword/ProdRed_Float.java     |     4 +-
+ .../loopopts/superword/ProdRed_Int.java       |     4 +-
+ .../loopopts/superword/ReductionPerf.java     |     4 +-
+ .../superword/SumRedAbsNeg_Double.java        |     4 +-
+ .../superword/SumRedAbsNeg_Float.java         |     4 +-
+ .../loopopts/superword/SumRedSqrt_Double.java |     4 +-
+ .../loopopts/superword/SumRed_Double.java     |     4 +-
+ .../loopopts/superword/SumRed_Float.java      |     4 +-
+ .../loopopts/superword/SumRed_Int.java        |     4 +-
+ .../sha/predicate/IntrinsicPredicates.java    |    11 +-
+ .../NMT/CheckForProperDetailStackTrace.java   |     4 +-
+ .../ReservedStack/ReservedStackTest.java      |     4 +-
+ .../HeapMonitorEventsForTwoThreadsTest.java   |     1 -
+ ...stMutuallyExclusivePlatformPredicates.java |     2 +-
+ .../jdk/jfr/event/os/TestCPUInformation.java  |     6 +-
+ test/lib/jdk/test/lib/Platform.java           |     4 +
+ 187 files changed, 59079 insertions(+), 189 deletions(-)
  create mode 100644 src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.hpp
@@ -248,20 +218,26 @@ Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch)
  create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+ create mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+ create mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/c2_globals_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/c2_init_riscv.cpp
+ create mode 100644 src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/compiledIC_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/copy_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/depChecker_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/disassembler_riscv.hpp
+ create mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
+ create mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/frame_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/frame_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/frame_riscv.inline.hpp
  create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
+ create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
+ create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
@@ -270,6 +246,11 @@ Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch)
  create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
+ create mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
+ create mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
+ create mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
+ create mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
+ create mode 100644 src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
  create mode 100644 src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/globals_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/icBuffer_riscv.cpp
@@ -285,12 +266,13 @@ Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch)
  create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
+ create mode 100644 src/hotspot/cpu/riscv/matcher_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.hpp
+ create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/register_definitions_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/register_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/register_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.cpp
@@ -305,18 +287,20 @@ Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch)
  create mode 100644 src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.hpp
+ create mode 100644 src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
+ create mode 100644 src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/vmStructs_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.cpp
  create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.hpp
  create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
  create mode 100644 src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
+ create mode 100644 src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
  create mode 100644 src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
+ create mode 100644 src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
+ create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
+ create mode 100644 src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
  create mode 100644 src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
  create mode 100644 src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
  create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
@@ -335,101 +319,95 @@ Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch)
  create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
  create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
  create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java
+ create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java
  create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
  create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
  create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
  create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
  create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
  create mode 100644 test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
- create mode 100644 test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java
- create mode 100644 test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java
-
-diff --git a/make/autoconf/build-aux/config.sub b/make/autoconf/build-aux/config.sub
-index 3c280ac7c..eda408e01 100644
---- a/make/autoconf/build-aux/config.sub
-+++ b/make/autoconf/build-aux/config.sub
-@@ -48,6 +48,13 @@ if ! echo $* | grep '^aarch64-' >/dev/null ; then
-     exit
- fi
- 
-+# Canonicalize for riscv which autoconf-config.sub doesn't handle
-+if echo $* | grep '^riscv\(32\|64\)-linux' > /dev/null ; then
-+    result=`echo $@ | sed 's/linux/unknown-linux/'`
-+    echo $result
-+    exit
-+fi
-+
- while test $# -gt 0 ; do
-     case $1 in
-         -- )   # Stop option processing
+
+diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess
+index a88a9adec3f..15111d827ab 100644
+--- a/make/autoconf/build-aux/config.guess
++++ b/make/autoconf/build-aux/config.guess
+@@ -1,6 +1,6 @@
+ #!/bin/sh
+ #
+-# Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
+ # Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
+ # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ #
 diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4
-index a3e1e00b2..01ef26c10 100644
+index 9bb34363e5c..f84e8f84c60 100644
 --- a/make/autoconf/hotspot.m4
 +++ b/make/autoconf/hotspot.m4
-@@ -367,7 +367,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
+@@ -370,7 +370,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
    AC_MSG_CHECKING([if shenandoah can be built])
    if HOTSPOT_CHECK_JVM_FEATURE(shenandoahgc); then
      if test "x$OPENJDK_TARGET_CPU_ARCH" = "xx86" || \
 -       test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then
 +       test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \
-+       test "x$OPENJDK_TARGET_CPU" = "xriscv64" ; then
++       test "x$OPENJDK_TARGET_CPU" = "xriscv64"; then
        AC_MSG_RESULT([yes])
      else
        DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES shenandoahgc"
 diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4
-index 16e906bdc..c01fdbcce 100644
+index 16e906bdc6a..5c49fd9285d 100644
 --- a/make/autoconf/libraries.m4
 +++ b/make/autoconf/libraries.m4
-@@ -110,7 +110,7 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES],
-     GLOBAL_LIBS=""
-   fi
- 
--  BASIC_JDKLIB_LIBS=""
-+  BASIC_JDKLIB_LIBS="-latomic"
-   if test "x$TOOLCHAIN_TYPE" != xmicrosoft; then
-     BASIC_JDKLIB_LIBS="-ljava -ljvm"
-   fi
-@@ -147,6 +147,8 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES],
-         wsock32.lib winmm.lib version.lib psapi.lib"
+@@ -1,5 +1,5 @@
+ #
+-# Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved.
+ # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ #
+ # This code is free software; you can redistribute it and/or modify it
+@@ -130,6 +130,12 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES],
+     BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lthread"
    fi
  
-+  BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic"
-+
-   JDKLIB_LIBS="$BASIC_JDKLIB_LIBS"
-   JDKEXE_LIBS=""
-   JVM_LIBS="$BASIC_JVM_LIBS"
++  # Because RISC-V only has word-sized atomics, it requries libatomic where
++  # other common architectures do not.  So link libatomic by default.
++  if test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xriscv64; then
++    BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic"
++  fi
++
+   # perfstat lib
+   if test "x$OPENJDK_TARGET_OS" = xaix; then
+     BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lperfstat"
 diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
-index f89b22f5f..48d615992 100644
+index 26a58eb2ee8..67972d89248 100644
 --- a/make/autoconf/platform.m4
 +++ b/make/autoconf/platform.m4
-@@ -120,6 +120,12 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_CPU],
-       VAR_CPU_BITS=64
-       VAR_CPU_ENDIAN=little
-       ;;
-+    riscv32)
-+      VAR_CPU=riscv32
-+      VAR_CPU_ARCH=riscv
-+      VAR_CPU_BITS=32
-+      VAR_CPU_ENDIAN=little
-+      ;;
-     riscv64)
-       VAR_CPU=riscv64
-       VAR_CPU_ARCH=riscv
-@@ -564,8 +570,10 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
+@@ -1,5 +1,5 @@
+ #
+-# Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved.
+ # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ #
+ # This code is free software; you can redistribute it and/or modify it
+@@ -554,6 +554,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
+     HOTSPOT_$1_CPU_DEFINE=PPC64
+   elif test "x$OPENJDK_$1_CPU" = xppc64le; then
+     HOTSPOT_$1_CPU_DEFINE=PPC64
++  elif test "x$OPENJDK_$1_CPU" = xriscv64; then
++    HOTSPOT_$1_CPU_DEFINE=RISCV64
+ 
+   # The cpu defines below are for zero, we don't support them directly.
+   elif test "x$OPENJDK_$1_CPU" = xsparc; then
+@@ -564,8 +566,6 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
      HOTSPOT_$1_CPU_DEFINE=S390
    elif test "x$OPENJDK_$1_CPU" = xs390x; then
      HOTSPOT_$1_CPU_DEFINE=S390
-+  elif test "x$OPENJDK_$1_CPU" = xriscv32; then
-+    HOTSPOT_$1_CPU_DEFINE=RISCV32
-   elif test "x$OPENJDK_$1_CPU" = xriscv64; then
+-  elif test "x$OPENJDK_$1_CPU" = xriscv64; then
 -    HOTSPOT_$1_CPU_DEFINE=RISCV
-+    HOTSPOT_$1_CPU_DEFINE=RISCV64
+   elif test "x$OPENJDK_$1_CPU" = xloongarch64; then
+     HOTSPOT_$1_CPU_DEFINE=LOONGARCH64
    elif test "x$OPENJDK_$1_CPU" != x; then
-     HOTSPOT_$1_CPU_DEFINE=$(echo $OPENJDK_$1_CPU | tr a-z A-Z)
-   fi
 diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
-index c5a3ac572..9de6f663c 100644
+index c5a3ac5724b..67f4c6f0574 100644
 --- a/make/hotspot/gensrc/GensrcAdlc.gmk
 +++ b/make/hotspot/gensrc/GensrcAdlc.gmk
 @@ -1,5 +1,5 @@
@@ -439,17 +417,10 @@ index c5a3ac572..9de6f663c 100644
  # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  #
  # This code is free software; you can redistribute it and/or modify it
-@@ -150,6 +150,20 @@ ifeq ($(call check-jvm-feature, compiler2), true)
+@@ -150,6 +150,13 @@ ifeq ($(call check-jvm-feature, compiler2), true)
        $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \
      )))
  
-+  ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
-+    AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
-+        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_neon.ad \
-+        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \
-+    )))
-+  endif
-+
 +  ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv)
 +    AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
 +        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \
@@ -460,95 +431,17 @@ index c5a3ac572..9de6f663c 100644
    ifeq ($(call check-jvm-feature, shenandoahgc), true)
      AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
          $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \
-diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
-index 1e4ee33a9..ac5d56f0f 100644
---- a/src/hotspot/cpu/aarch64/aarch64.ad
-+++ b/src/hotspot/cpu/aarch64/aarch64.ad
-@@ -2062,15 +2062,17 @@ const bool Matcher::match_rule_supported(int opcode) {
-   return true;  // Per default match rules are supported.
- }
- 
--const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
- 
-   // TODO
-   // identify extra cases that we might want to provide match rules for
-   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
--  bool ret_value = match_rule_supported(opcode);
-+  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
-+    return false;
-+  }
-   // Add rules here.
- 
--  return ret_value;  // Per default match rules are supported.
-+  return true; // Per default match rules are supported.
- }
- 
- const bool Matcher::has_predicated_vectors(void) {
-@@ -2129,6 +2131,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
-     return size;
- }
- 
-+const bool Matcher::supports_scalable_vector() {
-+  return false;
-+}
-+
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return -1;
-+}
-+
- // Vector ideal reg.
- const uint Matcher::vector_ideal_reg(int len) {
-   switch(len) {
-@@ -15515,15 +15525,16 @@ instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
-   ins_pipe(pipe_class_memory);
- %}
- 
--instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
-+instruct string_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
-                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
-                               iRegINoSp tmp3, rFlagsReg cr)
- %{
-   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  predicate(((StrIndexOfCharNode*)n) ->encoding() == StrIntrinsicNode::U);
-   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
-          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
- 
--  format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
-+  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
- 
-   ins_encode %{
-     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
-@@ -15533,6 +15544,25 @@ instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
-   ins_pipe(pipe_class_memory);
- %}
- 
-+instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
-+                              iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                              iRegINoSp tmp3, rFlagsReg cr)
-+%{
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
-+  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
-+
-+  format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
-+
-+  ins_encode %{
-+    __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
-+                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
-+                           $tmp3$$Register);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
-+
- instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
-                         iRegI_R0 result, rFlagsReg cr)
- %{
 diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
-index fdd2c0ca3..1a35be210 100644
+index fdd2c0ca3d7..63f193de86e 100644
 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
 +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
 @@ -1593,7 +1593,9 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
  }
  
@@ -556,131 +449,21 @@ index fdd2c0ca3..1a35be210 100644
 -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
 +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
 +                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on aarch64");
++  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on aarch64");
  
    Assembler::Condition acond, ncond;
    switch (condition) {
-diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
-index 5753cc9a6..21c6fdf19 100644
---- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
-+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
-@@ -4829,6 +4829,70 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
-   BIND(DONE);
- }
- 
-+void MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,
-+                                            Register ch, Register result,
-+                                            Register tmp1, Register tmp2, Register tmp3)
-+{
-+  Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
-+  Register cnt1_neg = cnt1;
-+  Register ch1 = rscratch1;
-+  Register result_tmp = rscratch2;
-+
-+  cbz(cnt1, NOMATCH);
-+
-+  cmp(cnt1, (u1)8);
-+  br(LT, DO1_SHORT);
-+
-+  orr(ch, ch, ch, LSL, 8);
-+  orr(ch, ch, ch, LSL, 16);
-+  orr(ch, ch, ch, LSL, 32);
-+
-+  sub(cnt1, cnt1, 8);
-+  mov(result_tmp, cnt1);
-+  lea(str1, Address(str1, cnt1));
-+  sub(cnt1_neg, zr, cnt1);
-+
-+  mov(tmp3, 0x0101010101010101);
-+
-+  BIND(CH1_LOOP);
-+    ldr(ch1, Address(str1, cnt1_neg));
-+    eor(ch1, ch, ch1);
-+    sub(tmp1, ch1, tmp3);
-+    orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f);
-+    bics(tmp1, tmp1, tmp2);
-+    br(NE, HAS_ZERO);
-+    adds(cnt1_neg, cnt1_neg, 8);
-+    br(LT, CH1_LOOP);
-+
-+    cmp(cnt1_neg, (u1)8);
-+    mov(cnt1_neg, 0);
-+    br(LT, CH1_LOOP);
-+    b(NOMATCH);
-+
-+  BIND(HAS_ZERO);
-+    rev(tmp1, tmp1);
-+    clz(tmp1, tmp1);
-+    add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
-+    b(MATCH);
-+
-+  BIND(DO1_SHORT);
-+    mov(result_tmp, cnt1);
-+    lea(str1, Address(str1, cnt1));
-+    sub(cnt1_neg, zr, cnt1);
-+  BIND(DO1_LOOP);
-+    ldrb(ch1, Address(str1, cnt1_neg));
-+    cmp(ch, ch1);
-+    br(EQ, MATCH);
-+    adds(cnt1_neg, cnt1_neg, 1);
-+    br(LT, DO1_LOOP);
-+  BIND(NOMATCH);
-+    mov(result, -1);
-+    b(DONE);
-+  BIND(MATCH);
-+    add(result, result_tmp, cnt1_neg);
-+  BIND(DONE);
-+}
-+
- // Compare strings.
- void MacroAssembler::string_compare(Register str1, Register str2,
-     Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
-diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
-index 7e23c16a4..c3d472a9a 100644
---- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
-+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
-@@ -1260,6 +1260,9 @@ public:
-   void string_indexof_char(Register str1, Register cnt1,
-                            Register ch, Register result,
-                            Register tmp1, Register tmp2, Register tmp3);
-+  void stringL_indexof_char(Register str1, Register cnt1,
-+                            Register ch, Register result,
-+                            Register tmp1, Register tmp2, Register tmp3);
-   void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2,
-                 FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5,
-                 FloatRegister tmpC1, FloatRegister tmpC2, FloatRegister tmpC3,
-diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad
-index 51f2d9ce7..71f83521e 100644
---- a/src/hotspot/cpu/arm/arm.ad
-+++ b/src/hotspot/cpu/arm/arm.ad
-@@ -1093,7 +1093,7 @@ const bool Matcher::match_rule_supported(int opcode) {
-   return true;  // Per default match rules are supported.
- }
- 
--const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
- 
-   // TODO
-   // identify extra cases that we might want to provide match rules for
-@@ -1121,6 +1121,14 @@ const int Matcher::vector_width_in_bytes(BasicType bt) {
-   return MaxVectorSize;
- }
- 
-+const bool Matcher::supports_scalable_vector() {
-+  return false;
-+}
-+
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return -1;
-+}
-+
- // Vector ideal reg corresponding to specified size in bytes
- const uint Matcher::vector_ideal_reg(int size) {
-   assert(MaxVectorSize >= size, "");
 diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
-index f0a7229aa..2d06d3d58 100644
+index f0a7229aa18..cb095052534 100644
 --- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
 +++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -1824,7 +1824,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
  }
  
@@ -688,75 +471,44 @@ index f0a7229aa..2d06d3d58 100644
 -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
 +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
 +                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on arm");
++  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on arm");
 +
    AsmCondition acond = al;
    AsmCondition ncond = nv;
    if (opr1 != opr2) {
 diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
-index 847f7d61d..d081116be 100644
+index 847f7d61d2f..d74db914331 100644
 --- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
 +++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
-@@ -1554,7 +1554,10 @@ inline void load_to_reg(LIR_Assembler *lasm, LIR_Opr src, LIR_Opr dst) {
+@@ -1,6 +1,6 @@
+ /*
+- * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2012, 2019, SAP SE. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2012, 2021 SAP SE. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -1553,8 +1553,10 @@ inline void load_to_reg(LIR_Assembler *lasm, LIR_Opr src, LIR_Opr dst) {
+   }
  }
  
- 
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
 +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
 +                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on ppc");
-+
++  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on ppc");
+ 
+-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
    if (opr1->is_equal(opr2) || opr1->is_same_register(opr2)) {
      load_to_reg(this, opr1, result); // Condition doesn't matter.
      return;
-diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
-index ebbe80a26..df66a46dc 100644
---- a/src/hotspot/cpu/ppc/ppc.ad
-+++ b/src/hotspot/cpu/ppc/ppc.ad
-@@ -2242,15 +2242,17 @@ const bool Matcher::match_rule_supported(int opcode) {
-   return true;  // Per default match rules are supported.
- }
- 
--const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
- 
-   // TODO
-   // identify extra cases that we might want to provide match rules for
-   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
--  bool ret_value = match_rule_supported(opcode);
-+  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
-+    return false;
-+  }
-   // Add rules here.
- 
--  return ret_value;  // Per default match rules are supported.
-+  return true; // Per default match rules are supported.
- }
- 
- const bool Matcher::has_predicated_vectors(void) {
-@@ -2310,6 +2312,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
-   return max_vector_size(bt); // Same as max.
- }
- 
-+const bool Matcher::supports_scalable_vector() {
-+  return false;
-+}
-+
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return -1;
-+}
-+
- // PPC implementation uses VSX load/store instructions (if
- // SuperwordUseVSX) which support 4 byte but not arbitrary alignment
- const bool Matcher::misaligned_vectors_ok() {
 diff --git a/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
 new file mode 100644
-index 000000000..5661b7425
+index 00000000000..31c63abe71d
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
-@@ -0,0 +1,185 @@
+@@ -0,0 +1,177 @@
 +/*
-+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -784,13 +536,13 @@ index 000000000..5661b7425
 +#include "precompiled.hpp"
 +#include "interpreter/interpreter.hpp"
 +#include "oops/constMethod.hpp"
++#include "oops/klass.inline.hpp"
 +#include "oops/method.hpp"
 +#include "runtime/frame.inline.hpp"
 +#include "utilities/align.hpp"
 +#include "utilities/debug.hpp"
 +#include "utilities/macros.hpp"
 +
-+
 +int AbstractInterpreter::BasicType_as_index(BasicType type) {
 +  int i = 0;
 +  switch (type) {
@@ -880,7 +632,6 @@ index 000000000..5661b7425
 +  // as determined by a previous call to the size_activation() method.
 +  // It is also guaranteed to be walkable even though it is in a
 +  // skeletal state
-+
 +  assert_cond(method != NULL && caller != NULL && interpreter_frame != NULL);
 +  int max_locals = method->max_locals() * Interpreter::stackElementWords;
 +  int extra_locals = (method->max_locals() - method->size_of_parameters()) *
@@ -894,14 +645,6 @@ index 000000000..5661b7425
 +  // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
 +  // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
 +  // and sender_sp is fp
-+  //
-+  // The interpreted method entry on riscv aligns SP to 16 bytes
-+  // before generating the fixed part of the activation frame. So there
-+  // may be a gap between the locals block and the saved sender SP. For
-+  // an interpreted caller we need to recreate this gap and exactly
-+  // align the incoming parameters with the caller's temporary
-+  // expression stack. For other types of caller frame it doesn't
-+  // matter.
 +  intptr_t* locals = NULL;
 +  if (caller->is_interpreted_frame()) {
 +    locals = caller->interpreter_frame_last_sp() + caller_actual_parameters - 1;
@@ -935,6 +678,7 @@ index 000000000..5661b7425
 +    interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() +
 +                                                       extra_locals);
 +  }
++
 +  *interpreter_frame->interpreter_frame_cache_addr() =
 +    method->constants()->cache();
 +  *interpreter_frame->interpreter_frame_mirror_addr() =
@@ -942,14 +686,14 @@ index 000000000..5661b7425
 +}
 diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp
 new file mode 100644
-index 000000000..40ecf1a6c
+index 00000000000..f15ef5304c5
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp
-@@ -0,0 +1,365 @@
+@@ -0,0 +1,372 @@
 +/*
 + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -969,6 +713,7 @@ index 000000000..40ecf1a6c
 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 + * or visit www.oracle.com if you need additional information or have any
 + * questions.
++ *
 + */
 +
 +#include <stdio.h>
@@ -983,8 +728,6 @@ index 000000000..40ecf1a6c
 +#include "runtime/interfaceSupport.inline.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +
-+#define __ _masm.
-+
 +int AbstractAssembler::code_fill_byte() {
 +  return 0;
 +}
@@ -999,7 +742,7 @@ index 000000000..40ecf1a6c
 +  }
 +}
 +
-+void Assembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) {
++void Assembler::addw(Register Rd, Register Rn, int64_t increment, Register temp) {
 +  if (is_imm_in_range(increment, 12, 0)) {
 +    addiw(Rd, Rn, increment);
 +  } else {
@@ -1019,7 +762,7 @@ index 000000000..40ecf1a6c
 +  }
 +}
 +
-+void Assembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) {
++void Assembler::subw(Register Rd, Register Rn, int64_t decrement, Register temp) {
 +  if (is_imm_in_range(-decrement, 12, 0)) {
 +    addiw(Rd, Rn, -decrement);
 +  } else {
@@ -1033,11 +776,12 @@ index 000000000..40ecf1a6c
 +  add_uw(Rd, Rs, zr);
 +}
 +
-+void Assembler::li(Register Rd, int64_t imm) {
++void Assembler::_li(Register Rd, int64_t imm) {
 +  // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff
 +  int shift = 12;
 +  int64_t upper = imm, lower = imm;
-+  // Split imm to a lower 12-bit sign-extended part and the remainder, because addi will sign-extend the lower imm.
++  // Split imm to a lower 12-bit sign-extended part and the remainder,
++  // because addi will sign-extend the lower imm.
 +  lower = ((int32_t)imm << 20) >> 20;
 +  upper -= lower;
 +
@@ -1051,8 +795,7 @@ index 000000000..40ecf1a6c
 +    if (lower != 0) {
 +      addi(Rd, Rd, lower);
 +    }
-+  }
-+  else {
++  } else {
 +    // 32-bit integer
 +    Register hi_Rd = zr;
 +    if (upper != 0) {
@@ -1066,30 +809,30 @@ index 000000000..40ecf1a6c
 +}
 +
 +void Assembler::li64(Register Rd, int64_t imm) {
-+  // Load upper 32 bits. Upper = imm[63:32], but if imm[31] = 1 or (imm[31:28] == 0x7ff && imm[19] == 1),
-+  // upper = imm[63:32] + 1.
-+  int64_t lower = imm & 0xffffffff;
-+  lower -= ((lower << 44) >> 44);
-+  int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower;
-+  int32_t upper = (tmp_imm - (int32_t)lower) >> 32;
-+
-+  // Load upper 32 bits
-+  int64_t up = upper, lo = upper;
-+  lo = (lo << 52) >> 52;
-+  up -= lo;
-+  up = (int32_t)up;
-+  lui(Rd, up);
-+  addi(Rd, Rd, lo);
-+
-+  // Load the rest 32 bits.
-+  slli(Rd, Rd, 12);
-+  addi(Rd, Rd, (int32_t)lower >> 20);
-+  slli(Rd, Rd, 12);
-+  lower = ((int32_t)imm << 12) >> 20;
-+  addi(Rd, Rd, lower);
-+  slli(Rd, Rd, 8);
-+  lower = imm & 0xff;
-+  addi(Rd, Rd, lower);
++   // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or
++   // (imm[31:28] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1.
++   int64_t lower = imm & 0xffffffff;
++   lower -= ((lower << 44) >> 44);
++   int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower;
++   int32_t upper = (tmp_imm - (int32_t)lower) >> 32;
++
++   // Load upper 32 bits
++   int64_t up = upper, lo = upper;
++   lo = (lo << 52) >> 52;
++   up -= lo;
++   up = (int32_t)up;
++   lui(Rd, up);
++   addi(Rd, Rd, lo);
++
++   // Load the rest 32 bits.
++   slli(Rd, Rd, 12);
++   addi(Rd, Rd, (int32_t)lower >> 20);
++   slli(Rd, Rd, 12);
++   lower = ((int32_t)imm << 12) >> 20;
++   addi(Rd, Rd, lower);
++   slli(Rd, Rd, 8);
++   lower = imm & 0xff;
++   addi(Rd, Rd, lower);
 +}
 +
 +void Assembler::li32(Register Rd, int32_t imm) {
@@ -1162,15 +905,16 @@ index 000000000..40ecf1a6c
 +
 +#define INSN(NAME, REGISTER)                                   \
 +  void Assembler::NAME(const Address &adr, Register temp) {    \
-+    switch(adr.getMode()) {                                    \
++    switch (adr.getMode()) {                                   \
 +      case Address::literal: {                                 \
 +        code_section()->relocate(pc(), adr.rspec());           \
 +        NAME(adr.target(), temp);                              \
 +        break;                                                 \
 +      }                                                        \
 +      case Address::base_plus_offset: {                        \
-+          Address tmp_adr = form_address(adr.base(), adr.offset(), 12, temp); \
-+          jalr(REGISTER, tmp_adr.base(), tmp_adr.offset());    \
++        int32_t offset = 0;                                    \
++        baseOffset(temp, adr, offset);                         \
++        jalr(REGISTER, temp, offset);                          \
 +        break;                                                 \
 +      }                                                        \
 +      default:                                                 \
@@ -1230,9 +974,9 @@ index 000000000..40ecf1a6c
 +  }
 +#endif
 +  assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1),
-+         "bit 47 overflows in address constant");
-+  // Load upper 31 bits
-+  int32_t imm = imm64 >> 17;
++         "48-bit overflow in address constant");
++  // Load upper 32 bits
++  int32_t imm = imm64 >> 16;
 +  int64_t upper = imm, lower = imm;
 +  lower = (lower << 52) >> 52;
 +  upper -= lower;
@@ -1240,13 +984,13 @@ index 000000000..40ecf1a6c
 +  lui(Rd, upper);
 +  addi(Rd, Rd, lower);
 +
-+  // Load the rest 17 bits.
++  // Load the rest 16 bits.
 +  slli(Rd, Rd, 11);
-+  addi(Rd, Rd, (imm64 >> 6) & 0x7ff);
-+  slli(Rd, Rd, 6);
++  addi(Rd, Rd, (imm64 >> 5) & 0x7ff);
++  slli(Rd, Rd, 5);
 +
-+  // Here, remove the addi instruct and return the offset directly. This offset will be used by following jalr/ld.
-+  offset = imm64 & 0x3f;
++  // This offset will be used by following jalr/ld.
++  offset = imm64 & 0x1f;
 +}
 +
 +void Assembler::movptr(Register Rd, uintptr_t imm64) {
@@ -1259,6 +1003,13 @@ index 000000000..40ecf1a6c
 +  addi(Rd, Rd, offset);
 +}
 +
++void Assembler::ifence() {
++  fence_i();
++  if (UseConservativeFence) {
++    fence(ir, ir);
++  }
++}
++
 +#define INSN(NAME, NEG_INSN)                                                         \
 +  void Assembler::NAME(Register Rs, Register Rt, const address &dest) {              \
 +    NEG_INSN(Rt, Rs, dest);                                                          \
@@ -1313,14 +1064,14 @@ index 000000000..40ecf1a6c
 +}
 diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
 new file mode 100644
-index 000000000..d4da30ed6
+index 00000000000..4923962a496
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-@@ -0,0 +1,2004 @@
+@@ -0,0 +1,3047 @@
 +/*
-+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -1348,6 +1099,7 @@ index 000000000..d4da30ed6
 +
 +#include "asm/register.hpp"
 +#include "assembler_riscv.inline.hpp"
++#include "metaprogramming/enableIf.hpp"
 +
 +#define XLEN 64
 +
@@ -1359,10 +1111,10 @@ index 000000000..d4da30ed6
 +class Argument {
 + public:
 +  enum {
-+    n_int_register_parameters_c   = 8,  // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
-+    n_float_register_parameters_c = 8,  // f10, f11, ... f17 (c_farg0, c_farg1, ... )
++    n_int_register_parameters_c   = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
++    n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... )
 +
-+    n_int_register_parameters_j   = 8, // x11, ... x17, x10 (rj_rarg0, j_rarg1, ...)
++    n_int_register_parameters_j   = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...)
 +    n_float_register_parameters_j = 8  // f10, f11, ... f17 (j_farg0, j_farg1, ...)
 +  };
 +};
@@ -1386,7 +1138,21 @@ index 000000000..d4da30ed6
 +REGISTER_DECLARATION(FloatRegister, c_farg6, f16);
 +REGISTER_DECLARATION(FloatRegister, c_farg7, f17);
 +
-+// java function register(caller-save registers)
++// Symbolically name the register arguments used by the Java calling convention.
++// We have control over the convention for java so we can do what we please.
++// What pleases us is to offset the java calling convention so that when
++// we call a suitable jni method the arguments are lined up and we don't
++// have to do much shuffling. A suitable jni method is non-static and a
++// small number of arguments.
++//
++// |------------------------------------------------------------------------|
++// | c_rarg0  c_rarg1  c_rarg2  c_rarg3  c_rarg4  c_rarg5  c_rarg6  c_rarg7 |
++// |------------------------------------------------------------------------|
++// | x10      x11      x12      x13      x14      x15      x16      x17     |
++// |------------------------------------------------------------------------|
++// | j_rarg7  j_rarg0  j_rarg1  j_rarg2  j_rarg3  j_rarg4  j_rarg5  j_rarg6 |
++// |------------------------------------------------------------------------|
++
 +REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
 +REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
 +REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
@@ -1396,6 +1162,8 @@ index 000000000..d4da30ed6
 +REGISTER_DECLARATION(Register, j_rarg6, c_rarg7);
 +REGISTER_DECLARATION(Register, j_rarg7, c_rarg0);
 +
++// Java floating args are passed as per C
++
 +REGISTER_DECLARATION(FloatRegister, j_farg0, f10);
 +REGISTER_DECLARATION(FloatRegister, j_farg1, f11);
 +REGISTER_DECLARATION(FloatRegister, j_farg2, f12);
@@ -1412,6 +1180,9 @@ index 000000000..d4da30ed6
 +// thread pointer
 +REGISTER_DECLARATION(Register, tp,        x4);
 +
++// registers used to hold VM data either temporarily within a method
++// or across method calls
++
 +// volatile (caller-save) registers
 +
 +// current method -- must be in a call-clobbered register
@@ -1434,9 +1205,6 @@ index 000000000..d4da30ed6
 +// locals on stack
 +REGISTER_DECLARATION(Register, xlocals,   x24);
 +
-+/* If you use x4(tp) as java thread pointer according to the instruction manual,
-+ * it overlaps with the register used by c++ thread.
-+ */
 +// java thread pointer
 +REGISTER_DECLARATION(Register, xthread,   x23);
 +// bytecode pointer
@@ -1446,13 +1214,13 @@ index 000000000..d4da30ed6
 +// Java stack pointer
 +REGISTER_DECLARATION(Register, esp,       x20);
 +
-+// tempory register(caller-save registers)
++// temporary register(caller-save registers)
 +REGISTER_DECLARATION(Register, t0, x5);
 +REGISTER_DECLARATION(Register, t1, x6);
 +REGISTER_DECLARATION(Register, t2, x7);
 +
 +const Register g_INTArgReg[Argument::n_int_register_parameters_c] = {
-+  c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5,  c_rarg6,  c_rarg7
++  c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7
 +};
 +
 +const FloatRegister g_FPArgReg[Argument::n_float_register_parameters_c] = {
@@ -1469,6 +1237,7 @@ index 000000000..d4da30ed6
 +
 + private:
 +  Register _base;
++  Register _index;
 +  int64_t _offset;
 +  enum mode _mode;
 +
@@ -1481,46 +1250,40 @@ index 000000000..d4da30ed6
 +
 + public:
 +  Address()
-+    : _base(noreg), _offset(0), _mode(no_mode),          _target(NULL) { }
++    : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { }
 +  Address(Register r)
-+    : _base(r),     _offset(0), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, int o)
-+    : _base(r),     _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, long o)
-+    : _base(r),     _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, long long o)
-+    : _base(r),     _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, unsigned int o)
-+    : _base(r),     _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, unsigned long o)
-+    : _base(r),     _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, unsigned long long o)
-+    : _base(r),     _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+#ifdef ASSERT
++    : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { }
++
++  template<typename T, ENABLE_IF(std::is_integral<T>::value)>
++  Address(Register r, T o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) {}
++
 +  Address(Register r, ByteSize disp)
-+    : _base(r), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(NULL) { }
-+#endif
++    : Address(r, in_bytes(disp)) {}
 +  Address(address target, RelocationHolder const& rspec)
 +    : _base(noreg),
++      _index(noreg),
 +      _offset(0),
 +      _mode(literal),
 +      _rspec(rspec),
-+      _target(target)  { }
++      _target(target) { }
 +  Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type);
 +
 +  const Register base() const {
-+    guarantee((_mode == base_plus_offset || _mode == pcrel || _mode == literal), "wrong mode");
++    guarantee((_mode == base_plus_offset | _mode == pcrel | _mode == literal), "wrong mode");
 +    return _base;
 +  }
 +  long offset() const {
 +    return _offset;
 +  }
-+
++  Register index() const {
++    return _index;
++  }
 +  mode getMode() const {
 +    return _mode;
 +  }
 +
-+  bool uses(Register reg) const { return _base == reg;}
++  bool uses(Register reg) const { return _base == reg; }
 +  const address target() const { return _target; }
 +  const RelocationHolder& rspec() const { return _rspec; }
 +  ~Address() {
@@ -1575,6 +1338,14 @@ index 000000000..d4da30ed6
 +
 +  enum { instruction_size = 4 };
 +
++  //---<  calculate length of instruction  >---
++  // We just use the values set above.
++  // instruction must start at passed address
++  static unsigned int instr_len(unsigned char *instr) { return instruction_size; }
++
++  //---<  longest instructions  >---
++  static unsigned int instr_maxlen() { return instruction_size; }
++
 +  enum RoundingMode {
 +    rne = 0b000,     // round to Nearest, ties to Even
 +    rtz = 0b001,     // round towards Zero
@@ -1584,34 +1355,41 @@ index 000000000..d4da30ed6
 +    rdy = 0b111,     // in instruction's rm field, selects dynamic rounding mode.In Rounding Mode register, Invalid.
 +  };
 +
-+  Address form_address_complex(Register base, int64_t offset, int8_t expect_offbits, Register temp = t0) {
-+    assert_different_registers(noreg, temp, base);
-+    int64_t upper = offset, lower = offset;
-+
-+    int8_t shift = 64 - expect_offbits;
-+    lower = (offset << shift) >> shift;
-+    upper -= lower;
-+
-+    li(temp, upper);
-+    add(temp, temp, base);
-+    return Address(temp, lower);
++  void baseOffset32(Register Rd, const Address &adr, int32_t &offset) {
++    assert(Rd != noreg, "Rd must not be empty register!");
++    guarantee(Rd != adr.base(), "should use different registers!");
++    if (is_offset_in_range(adr.offset(), 32)) {
++      int32_t imm = adr.offset();
++      int32_t upper = imm, lower = imm;
++      lower = (imm << 20) >> 20;
++      upper -= lower;
++      lui(Rd, upper);
++      offset = lower;
++    } else {
++      movptr_with_offset(Rd, (address)(uintptr_t)adr.offset(), offset);
++    }
++    add(Rd, Rd, adr.base());
 +  }
 +
-+  Address form_address(Register base, int64_t offset, int8_t expect_offbits, Register temp = t0) {
-+    if (is_offset_in_range(offset, expect_offbits)) {
-+      return Address(base, offset);
++  void baseOffset(Register Rd, const Address &adr, int32_t &offset) {
++    if (is_offset_in_range(adr.offset(), 12)) {
++      assert(Rd != noreg, "Rd must not be empty register!");
++      addi(Rd, adr.base(), adr.offset());
++      offset = 0;
++    } else {
++      baseOffset32(Rd, adr, offset);
 +    }
-+    return form_address_complex(base, offset, expect_offbits, temp);
 +  }
 +
-+  void li(Register Rd, int64_t imm);  // optimized load immediate
++  void _li(Register Rd, int64_t imm);  // optimized load immediate
 +  void li32(Register Rd, int32_t imm);
 +  void li64(Register Rd, int64_t imm);
 +  void movptr(Register Rd, address addr);
 +  void movptr_with_offset(Register Rd, address addr, int32_t &offset);
 +  void movptr(Register Rd, uintptr_t imm64);
++  void ifence();
 +  void j(const address &dest, Register temp = t0);
-+  void j(const Address &adr, Register temp = t0) ;
++  void j(const Address &adr, Register temp = t0);
 +  void j(Label &l, Register temp = t0);
 +  void jal(Label &l, Register temp = t0);
 +  void jal(const address &dest, Register temp = t0);
@@ -1633,7 +1411,7 @@ index 000000000..d4da30ed6
 +  static inline uint32_t extract(uint32_t val, unsigned msb, unsigned lsb) {
 +    assert_cond(msb >= lsb && msb <= 31);
 +    unsigned nbits = msb - lsb + 1;
-+    uint32_t mask = checked_cast<uint32_t>(right_n_bits(nbits));
++    uint32_t mask = (1U << nbits) - 1;
 +    uint32_t result = val >> lsb;
 +    result &= mask;
 +    return result;
@@ -1650,8 +1428,8 @@ index 000000000..d4da30ed6
 +    assert_cond(a != NULL);
 +    assert_cond(msb >= lsb && msb <= 31);
 +    unsigned nbits = msb - lsb + 1;
-+    guarantee(val < (1ULL << nbits), "Field too big for insn");
-+    unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
++    guarantee(val < (1U << nbits), "Field too big for insn");
++    unsigned mask = (1U << nbits) - 1;
 +    val <<= lsb;
 +    mask <<= lsb;
 +    unsigned target = *(unsigned *)a;
@@ -1680,11 +1458,11 @@ index 000000000..d4da30ed6
 +    emit_int32((jint)insn);
 +  }
 +
-+  void halt() {
++  void _halt() {
 +    emit_int32(0);
 +  }
 +
-+// Rigster Instruction
++// Register Instruction
 +#define INSN(NAME, op, funct3, funct7)                          \
 +  void NAME(Register Rd, Register Rs1, Register Rs2) {          \
 +    unsigned insn = 0;                                          \
@@ -1697,18 +1475,18 @@ index 000000000..d4da30ed6
 +    emit(insn);                                                 \
 +  }
 +
-+  INSN(add,   0b0110011, 0b000, 0b0000000);
-+  INSN(sub,   0b0110011, 0b000, 0b0100000);
-+  INSN(andr,  0b0110011, 0b111, 0b0000000);
-+  INSN(orr,   0b0110011, 0b110, 0b0000000);
-+  INSN(xorr,  0b0110011, 0b100, 0b0000000);
++  INSN(_add,  0b0110011, 0b000, 0b0000000);
++  INSN(_sub,  0b0110011, 0b000, 0b0100000);
++  INSN(_andr, 0b0110011, 0b111, 0b0000000);
++  INSN(_orr,  0b0110011, 0b110, 0b0000000);
++  INSN(_xorr, 0b0110011, 0b100, 0b0000000);
 +  INSN(sll,   0b0110011, 0b001, 0b0000000);
 +  INSN(sra,   0b0110011, 0b101, 0b0100000);
 +  INSN(srl,   0b0110011, 0b101, 0b0000000);
 +  INSN(slt,   0b0110011, 0b010, 0b0000000);
 +  INSN(sltu,  0b0110011, 0b011, 0b0000000);
-+  INSN(addw,  0b0111011, 0b000, 0b0000000);
-+  INSN(subw,  0b0111011, 0b000, 0b0100000);
++  INSN(_addw, 0b0111011, 0b000, 0b0000000);
++  INSN(_subw, 0b0111011, 0b000, 0b0100000);
 +  INSN(sllw,  0b0111011, 0b001, 0b0000000);
 +  INSN(sraw,  0b0111011, 0b101, 0b0100000);
 +  INSN(srlw,  0b0111011, 0b101, 0b0000000);
@@ -1726,22 +1504,20 @@ index 000000000..d4da30ed6
 +  INSN(remw,  0b0111011, 0b110, 0b0000001);
 +  INSN(remuw, 0b0111011, 0b111, 0b0000001);
 +
-+  // Vector Configuration Instruction
-+  INSN(vsetvl, 0b1010111, 0b111, 0b1000000);
-+
 +#undef INSN
 +
 +#define INSN_ENTRY_RELOC(result_type, header)                               \
 +  result_type header {                                                      \
++    InstructionMark im(this);                                               \
 +    guarantee(rtype == relocInfo::internal_word_type,                       \
 +              "only internal_word_type relocs make sense here");            \
-+    code_section()->relocate(pc(), InternalAddress(dest).rspec());
++    code_section()->relocate(inst_mark(), InternalAddress(dest).rspec());
 +
 +  // Load/store register (all modes)
 +#define INSN(NAME, op, funct3)                                                                     \
 +  void NAME(Register Rd, Register Rs, const int32_t offset) {                                      \
-+    unsigned insn = 0;                                                                             \
 +    guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                               \
++    unsigned insn = 0;                                                                             \
 +    int32_t val = offset & 0xfff;                                                                  \
 +    patch((address)&insn, 6, 0, op);                                                               \
 +    patch((address)&insn, 14, 12, funct3);                                                         \
@@ -1749,7 +1525,19 @@ index 000000000..d4da30ed6
 +    patch_reg((address)&insn, 7, Rd);                                                              \
 +    patch((address)&insn, 31, 20, val);                                                            \
 +    emit(insn);                                                                                    \
-+  }                                                                                                \
++  }
++
++  INSN(lb,  0b0000011, 0b000);
++  INSN(lbu, 0b0000011, 0b100);
++  INSN(lh,  0b0000011, 0b001);
++  INSN(lhu, 0b0000011, 0b101);
++  INSN(_lw, 0b0000011, 0b010);
++  INSN(lwu, 0b0000011, 0b110);
++  INSN(_ld, 0b0000011, 0b011);
++
++#undef INSN
++
++#define INSN(NAME)                                                                                 \
 +  void NAME(Register Rd, address dest) {                                                           \
 +    assert_cond(dest != NULL);                                                                     \
 +    int64_t distance = (dest - pc());                                                              \
@@ -1766,7 +1554,7 @@ index 000000000..d4da30ed6
 +    NAME(Rd, dest);                                                                                \
 +  }                                                                                                \
 +  void NAME(Register Rd, const Address &adr, Register temp = t0) {                                 \
-+    switch(adr.getMode()) {                                                                        \
++    switch (adr.getMode()) {                                                                       \
 +      case Address::literal: {                                                                     \
 +        code_section()->relocate(pc(), adr.rspec());                                               \
 +        NAME(Rd, adr.target());                                                                    \
@@ -1776,7 +1564,14 @@ index 000000000..d4da30ed6
 +        if (is_offset_in_range(adr.offset(), 12)) {                                                \
 +          NAME(Rd, adr.base(), adr.offset());                                                      \
 +        } else {                                                                                   \
-+          NAME(Rd, form_address_complex(adr.base(), adr.offset(), 12, Rd == adr.base() ? temp : Rd)); \
++          int32_t offset = 0;                                                                      \
++          if (Rd == adr.base()) {                                                                  \
++            baseOffset32(temp, adr, offset);                                                       \
++            NAME(Rd, temp, offset);                                                                \
++          } else {                                                                                 \
++            baseOffset32(Rd, adr, offset);                                                         \
++            NAME(Rd, Rd, offset);                                                                  \
++          }                                                                                        \
 +        }                                                                                          \
 +        break;                                                                                     \
 +      }                                                                                            \
@@ -1788,20 +1583,20 @@ index 000000000..d4da30ed6
 +    wrap_label(Rd, L, &Assembler::NAME);                                                           \
 +  }
 +
-+  INSN(lb,  0b0000011, 0b000);
-+  INSN(lbu, 0b0000011, 0b100);
-+  INSN(ld,  0b0000011, 0b011);
-+  INSN(lh,  0b0000011, 0b001);
-+  INSN(lhu, 0b0000011, 0b101);
-+  INSN(lw,  0b0000011, 0b010);
-+  INSN(lwu, 0b0000011, 0b110);
++  INSN(lb);
++  INSN(lbu);
++  INSN(lh);
++  INSN(lhu);
++  INSN(lw);
++  INSN(lwu);
++  INSN(ld);
 +
 +#undef INSN
 +
 +#define INSN(NAME, op, funct3)                                                                     \
 +  void NAME(FloatRegister Rd, Register Rs, const int32_t offset) {                                 \
-+    unsigned insn = 0;                                                                             \
 +    guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                               \
++    unsigned insn = 0;                                                                             \
 +    uint32_t val = offset & 0xfff;                                                                 \
 +    patch((address)&insn, 6, 0, op);                                                               \
 +    patch((address)&insn, 14, 12, funct3);                                                         \
@@ -1809,7 +1604,14 @@ index 000000000..d4da30ed6
 +    patch_reg((address)&insn, 7, Rd);                                                              \
 +    patch((address)&insn, 31, 20, val);                                                            \
 +    emit(insn);                                                                                    \
-+  }                                                                                                \
++  }
++
++  INSN(flw,  0b0000111, 0b010);
++  INSN(_fld, 0b0000111, 0b011);
++
++#undef INSN
++
++#define INSN(NAME)                                                                                 \
 +  void NAME(FloatRegister Rd, address dest, Register temp = t0) {                                  \
 +    assert_cond(dest != NULL);                                                                     \
 +    int64_t distance = (dest - pc());                                                              \
@@ -1826,7 +1628,7 @@ index 000000000..d4da30ed6
 +    NAME(Rd, dest, temp);                                                                          \
 +  }                                                                                                \
 +  void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) {                            \
-+    switch(adr.getMode()) {                                                                        \
++    switch (adr.getMode()) {                                                                       \
 +      case Address::literal: {                                                                     \
 +        code_section()->relocate(pc(), adr.rspec());                                               \
 +        NAME(Rd, adr.target(), temp);                                                              \
@@ -1836,7 +1638,9 @@ index 000000000..d4da30ed6
 +        if (is_offset_in_range(adr.offset(), 12)) {                                                \
 +          NAME(Rd, adr.base(), adr.offset());                                                      \
 +        } else {                                                                                   \
-+          NAME(Rd, form_address_complex(adr.base(), adr.offset(), 12, temp));                      \
++          int32_t offset = 0;                                                                      \
++          baseOffset32(temp, adr, offset);                                                         \
++          NAME(Rd, temp, offset);                                                                  \
 +        }                                                                                          \
 +        break;                                                                                     \
 +      }                                                                                            \
@@ -1845,14 +1649,14 @@ index 000000000..d4da30ed6
 +    }                                                                                              \
 +  }
 +
-+  INSN(flw, 0b0000111, 0b010);
-+  INSN(fld, 0b0000111, 0b011);
++  INSN(flw);
++  INSN(fld);
 +#undef INSN
 +
 +#define INSN(NAME, op, funct3)                                                                           \
 +  void NAME(Register Rs1, Register Rs2, const int64_t offset) {                                          \
-+    unsigned insn = 0;                                                                                   \
 +    guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid.");                                     \
++    unsigned insn = 0;                                                                                   \
 +    uint32_t val  = offset & 0x1fff;                                                                     \
 +    uint32_t val11 = (val >> 11) & 0x1;                                                                  \
 +    uint32_t val12 = (val >> 12) & 0x1;                                                                  \
@@ -1867,7 +1671,18 @@ index 000000000..d4da30ed6
 +    patch((address)&insn, 30, 25, high);                                                                 \
 +    patch((address)&insn, 31, val12);                                                                    \
 +    emit(insn);                                                                                          \
-+  }                                                                                                      \
++  }
++
++  INSN(_beq, 0b1100011, 0b000);
++  INSN(_bne, 0b1100011, 0b001);
++  INSN(bge,  0b1100011, 0b101);
++  INSN(bgeu, 0b1100011, 0b111);
++  INSN(blt,  0b1100011, 0b100);
++  INSN(bltu, 0b1100011, 0b110);
++
++#undef INSN
++
++#define INSN(NAME)                                                                                       \
 +  void NAME(Register Rs1, Register Rs2, const address dest) {                                            \
 +    assert_cond(dest != NULL);                                                                           \
 +    int64_t offset = (dest - pc());                                                                      \
@@ -1878,12 +1693,12 @@ index 000000000..d4da30ed6
 +    NAME(Rs1, Rs2, dest);                                                                                \
 +  }
 +
-+  INSN(beq,  0b1100011, 0b000);
-+  INSN(bge,  0b1100011, 0b101);
-+  INSN(bgeu, 0b1100011, 0b111);
-+  INSN(blt,  0b1100011, 0b100);
-+  INSN(bltu, 0b1100011, 0b110);
-+  INSN(bne,  0b1100011, 0b001);
++  INSN(beq);
++  INSN(bne);
++  INSN(bge);
++  INSN(bgeu);
++  INSN(blt);
++  INSN(bltu);
 +
 +#undef INSN
 +
@@ -1903,8 +1718,8 @@ index 000000000..d4da30ed6
 +
 +#define INSN(NAME, REGISTER, op, funct3)                                                                    \
 +  void NAME(REGISTER Rs1, Register Rs2, const int32_t offset) {                                             \
-+    unsigned insn = 0;                                                                                      \
 +    guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                                        \
++    unsigned insn = 0;                                                                                      \
 +    uint32_t val  = offset & 0xfff;                                                                         \
 +    uint32_t low  = val & 0x1f;                                                                             \
 +    uint32_t high = (val >> 5) & 0x7f;                                                                      \
@@ -1916,16 +1731,27 @@ index 000000000..d4da30ed6
 +    patch((address)&insn, 31, 25, high);                                                                    \
 +    emit(insn);                                                                                             \
 +  }                                                                                                         \
++
++  INSN(sb,   Register,      0b0100011, 0b000);
++  INSN(sh,   Register,      0b0100011, 0b001);
++  INSN(_sw,  Register,      0b0100011, 0b010);
++  INSN(_sd,  Register,      0b0100011, 0b011);
++  INSN(fsw,  FloatRegister, 0b0100111, 0b010);
++  INSN(_fsd, FloatRegister, 0b0100111, 0b011);
++
++#undef INSN
++
++#define INSN(NAME, REGISTER)                                                                                \
 +  INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, relocInfo::relocType rtype, Register temp = t0))   \
 +    NAME(Rs, dest, temp);                                                                                   \
 +  }
 +
-+  INSN(sb,  Register,      0b0100011, 0b000);
-+  INSN(sh,  Register,      0b0100011, 0b001);
-+  INSN(sw,  Register,      0b0100011, 0b010);
-+  INSN(sd,  Register,      0b0100011, 0b011);
-+  INSN(fsw, FloatRegister, 0b0100111, 0b010);
-+  INSN(fsd, FloatRegister, 0b0100111, 0b011);
++  INSN(sb,  Register);
++  INSN(sh,  Register);
++  INSN(sw,  Register);
++  INSN(sd,  Register);
++  INSN(fsw, FloatRegister);
++  INSN(fsd, FloatRegister);
 +
 +#undef INSN
 +
@@ -1944,7 +1770,7 @@ index 000000000..d4da30ed6
 +    }                                                                                              \
 +  }                                                                                                \
 +  void NAME(Register Rs, const Address &adr, Register temp = t0) {                                 \
-+    switch(adr.getMode()) {                                                                        \
++    switch (adr.getMode()) {                                                                       \
 +      case Address::literal: {                                                                     \
 +        assert_different_registers(Rs, temp);                                                      \
 +        code_section()->relocate(pc(), adr.rspec());                                               \
@@ -1955,8 +1781,10 @@ index 000000000..d4da30ed6
 +        if (is_offset_in_range(adr.offset(), 12)) {                                                \
 +          NAME(Rs, adr.base(), adr.offset());                                                      \
 +        } else {                                                                                   \
++          int32_t offset= 0;                                                                       \
 +          assert_different_registers(Rs, temp);                                                    \
-+          NAME(Rs, form_address_complex(adr.base(), adr.offset(), 12, temp));                      \
++          baseOffset32(temp, adr, offset);                                                         \
++          NAME(Rs, temp, offset);                                                                  \
 +        }                                                                                          \
 +        break;                                                                                     \
 +      }                                                                                            \
@@ -1986,7 +1814,7 @@ index 000000000..d4da30ed6
 +    }                                                                                              \
 +  }                                                                                                \
 +  void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) {                            \
-+    switch(adr.getMode()) {                                                                        \
++    switch (adr.getMode()) {                                                                       \
 +      case Address::literal: {                                                                     \
 +        code_section()->relocate(pc(), adr.rspec());                                               \
 +        NAME(Rs, adr.target(), temp);                                                              \
@@ -1996,7 +1824,9 @@ index 000000000..d4da30ed6
 +        if (is_offset_in_range(adr.offset(), 12)) {                                                \
 +          NAME(Rs, adr.base(), adr.offset());                                                      \
 +        } else {                                                                                   \
-+          NAME(Rs, form_address_complex(adr.base(), adr.offset(), 12, temp));                      \
++          int32_t offset = 0;                                                                      \
++          baseOffset32(temp, adr, offset);                                                         \
++          NAME(Rs, temp, offset);                                                                  \
 +        }                                                                                          \
 +        break;                                                                                     \
 +      }                                                                                            \
@@ -2050,8 +1880,8 @@ index 000000000..d4da30ed6
 +
 +#define INSN(NAME, op)                                                                        \
 +  void NAME(Register Rd, const int32_t offset) {                                              \
-+    unsigned insn = 0;                                                                        \
 +    guarantee(is_imm_in_range(offset, 20, 1), "offset is invalid.");                          \
++    unsigned insn = 0;                                                                        \
 +    patch((address)&insn, 6, 0, op);                                                          \
 +    patch_reg((address)&insn, 7, Rd);                                                         \
 +    patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff));                         \
@@ -2059,7 +1889,13 @@ index 000000000..d4da30ed6
 +    patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff));                         \
 +    patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1));                              \
 +    emit(insn);                                                                               \
-+  }                                                                                           \
++  }
++
++  INSN(_jal, 0b1101111);
++
++#undef INSN
++
++#define INSN(NAME)                                                                            \
 +  void NAME(Register Rd, const address dest, Register temp = t0) {                            \
 +    assert_cond(dest != NULL);                                                                \
 +    int64_t offset = dest - pc();                                                             \
@@ -2077,7 +1913,7 @@ index 000000000..d4da30ed6
 +    wrap_label(Rd, L, temp, &Assembler::NAME);                                                \
 +  }
 +
-+  INSN(jal, 0b1101111);
++  INSN(jal);
 +
 +#undef INSN
 +
@@ -2085,8 +1921,8 @@ index 000000000..d4da30ed6
 +
 +#define INSN(NAME, op, funct)                                                              \
 +  void NAME(Register Rd, Register Rs, const int32_t offset) {                              \
-+    unsigned insn = 0;                                                                     \
 +    guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                       \
++    unsigned insn = 0;                                                                     \
 +    patch((address)&insn, 6, 0, op);                                                       \
 +    patch_reg((address)&insn, 7, Rd);                                                      \
 +    patch((address)&insn, 14, 12, funct);                                                  \
@@ -2096,7 +1932,7 @@ index 000000000..d4da30ed6
 +    emit(insn);                                                                            \
 +  }
 +
-+  INSN(jalr, 0b1100111, 0b000);
++  INSN(_jalr, 0b1100111, 0b000);
 +
 +#undef INSN
 +
@@ -2130,8 +1966,10 @@ index 000000000..d4da30ed6
 +    emit(insn);                                             \
 +  }
 +
++  INSN(fence_i, 0b0001111, 0b001, 0b000000000000);
 +  INSN(ecall,   0b1110011, 0b000, 0b000000000000);
-+  INSN(ebreak,  0b1110011, 0b000, 0b000000000001);
++  INSN(_ebreak, 0b1110011, 0b000, 0b000000000001);
++
 +#undef INSN
 +
 +enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11};
@@ -2239,12 +2077,12 @@ index 000000000..d4da30ed6
 +    emit(insn);                                                                             \
 +  }
 +
-+  INSN(addi,  0b0010011, 0b000);
-+  INSN(slti,  0b0010011, 0b010);
-+  INSN(addiw, 0b0011011, 0b000);
-+  INSN(and_imm12,  0b0010011, 0b111);
-+  INSN(ori,   0b0010011, 0b110);
-+  INSN(xori,  0b0010011, 0b100);
++  INSN(_addi,      0b0010011, 0b000);
++  INSN(slti,       0b0010011, 0b010);
++  INSN(_addiw,     0b0011011, 0b000);
++  INSN(_and_imm12, 0b0010011, 0b111);
++  INSN(ori,        0b0010011, 0b110);
++  INSN(xori,       0b0010011, 0b100);
 +
 +#undef INSN
 +
@@ -2278,9 +2116,9 @@ index 000000000..d4da30ed6
 +    emit(insn);                                                          \
 +  }
 +
-+  INSN(slli,  0b0010011, 0b001, 0b000000);
-+  INSN(srai,  0b0010011, 0b101, 0b010000);
-+  INSN(srli,  0b0010011, 0b101, 0b000000);
++  INSN(_slli, 0b0010011, 0b001, 0b000000);
++  INSN(_srai, 0b0010011, 0b101, 0b010000);
++  INSN(_srli, 0b0010011, 0b101, 0b000000);
 +
 +#undef INSN
 +
@@ -2316,7 +2154,7 @@ index 000000000..d4da30ed6
 +    emit(insn);                                                         \
 +  }
 +
-+  INSN(lui,   0b0110111);
++  INSN(_lui,  0b0110111);
 +  INSN(auipc, 0b0010111);
 +
 +#undef INSN
@@ -2592,6 +2430,23 @@ index 000000000..d4da30ed6
 +
 +#undef patch_vtype
 +
++#define INSN(NAME, op, funct3, funct7)                          \
++  void NAME(Register Rd, Register Rs1, Register Rs2) {          \
++    unsigned insn = 0;                                          \
++    patch((address)&insn, 6,  0, op);                           \
++    patch((address)&insn, 14, 12, funct3);                      \
++    patch((address)&insn, 31, 25, funct7);                      \
++    patch_reg((address)&insn, 7, Rd);                           \
++    patch_reg((address)&insn, 15, Rs1);                         \
++    patch_reg((address)&insn, 20, Rs2);                         \
++    emit(insn);                                                 \
++  }
++
++  // Vector Configuration Instruction
++  INSN(vsetvl, 0b1010111, 0b111, 0b1000000);
++
++#undef INSN
++
 +enum VectorMask {
 +  v0_t = 0b0,
 +  unmasked = 0b1
@@ -3159,7 +3014,6 @@ index 000000000..d4da30ed6
 +
 +// ====================================
 +// RISC-V Bit-Manipulation Extension
-+// Currently only support Zba and Zbb.
 +// ====================================
 +#define INSN(NAME, op, funct3, funct7)                  \
 +  void NAME(Register Rd, Register Rs1, Register Rs2) {  \
@@ -3238,7 +3092,7 @@ index 000000000..d4da30ed6
 +#undef INSN
 +
 +#define INSN(NAME, op, funct3, funct7)                  \
-+  void NAME(Register Rd, Register Rs1, unsigned shamt){ \
++  void NAME(Register Rd, Register Rs1, unsigned shamt) {\
 +    guarantee(shamt <= 0x1f, "Shamt is invalid");       \
 +    unsigned insn = 0;                                  \
 +    patch((address)&insn, 6, 0, op);                    \
@@ -3251,9 +3105,966 @@ index 000000000..d4da30ed6
 +  }
 +
 +  INSN(roriw, 0b0011011, 0b101, 0b0110000);
-+  
++
++#undef INSN
++
++// ========================================
++// RISC-V Compressed Instructions Extension
++// ========================================
++// Note:
++// 1. When UseRVC is enabled, 32-bit instructions under 'CompressibleRegion's will be
++//    transformed to 16-bit instructions if compressible.
++// 2. RVC instructions in Assembler always begin with 'c_' prefix, as 'c_li',
++//    but most of time we have no need to explicitly use these instructions.
++// 3. 'CompressibleRegion' is introduced to hint instructions in this Region's RTTI range
++//    are qualified to be compressed with their 2-byte versions.
++//    An example:
++//
++//      CompressibleRegion cr(_masm);
++//      __ andr(...);      // this instruction could change to c.and if able to
++//
++// 4. Using -XX:PrintAssemblyOptions=no-aliases could distinguish RVC instructions from
++//    normal ones.
++//
++
++private:
++  bool _in_compressible_region;
++public:
++  bool in_compressible_region() const { return _in_compressible_region; }
++  void set_in_compressible_region(bool b) { _in_compressible_region = b; }
++public:
++
++  // a compressible region
++  class CompressibleRegion : public StackObj {
++  protected:
++    Assembler *_masm;
++    bool _saved_in_compressible_region;
++  public:
++    CompressibleRegion(Assembler *_masm)
++    : _masm(_masm)
++    , _saved_in_compressible_region(_masm->in_compressible_region()) {
++      _masm->set_in_compressible_region(true);
++    }
++    ~CompressibleRegion() {
++      _masm->set_in_compressible_region(_saved_in_compressible_region);
++    }
++  };
++
++  // patch a 16-bit instruction.
++  static void c_patch(address a, unsigned msb, unsigned lsb, uint16_t val) {
++    assert_cond(a != NULL);
++    assert_cond(msb >= lsb && msb <= 15);
++    unsigned nbits = msb - lsb + 1;
++    guarantee(val < (1U << nbits), "Field too big for insn");
++    uint16_t mask = (1U << nbits) - 1;
++    val <<= lsb;
++    mask <<= lsb;
++    uint16_t target = *(uint16_t *)a;
++    target &= ~mask;
++    target |= val;
++    *(uint16_t *)a = target;
++  }
++
++  static void c_patch(address a, unsigned bit, uint16_t val) {
++    c_patch(a, bit, bit, val);
++  }
++
++  // patch a 16-bit instruction with a general purpose register ranging [0, 31] (5 bits)
++  static void c_patch_reg(address a, unsigned lsb, Register reg) {
++    c_patch(a, lsb + 4, lsb, reg->encoding_nocheck());
++  }
++
++  // patch a 16-bit instruction with a general purpose register ranging [8, 15] (3 bits)
++  static void c_patch_compressed_reg(address a, unsigned lsb, Register reg) {
++    c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck());
++  }
++
++  // patch a 16-bit instruction with a float register ranging [0, 31] (5 bits)
++  static void c_patch_reg(address a, unsigned lsb, FloatRegister reg) {
++    c_patch(a, lsb + 4, lsb, reg->encoding_nocheck());
++  }
++
++  // patch a 16-bit instruction with a float register ranging [8, 15] (3 bits)
++  static void c_patch_compressed_reg(address a, unsigned lsb, FloatRegister reg) {
++    c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck());
++  }
++
++// --------------  RVC Instruction Definitions  --------------
++
++  void c_nop() {
++    c_addi(x0, 0);
++  }
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd_Rs1, int32_t imm) {                                                  \
++    assert_cond(is_imm_in_range(imm, 6, 0));                                                 \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 6, 2, (imm & right_n_bits(5)));                                  \
++    c_patch_reg((address)&insn, 7, Rd_Rs1);                                                  \
++    c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5);                                \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_addi,   0b000, 0b01);
++  INSN(c_addiw,  0b001, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(int32_t imm) {                                                                   \
++    assert_cond(is_imm_in_range(imm, 10, 0));                                                \
++    assert_cond((imm & 0b1111) == 0);                                                        \
++    assert_cond(imm != 0);                                                                   \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5);                                  \
++    c_patch((address)&insn, 4, 3, (imm & right_n_bits(9)) >> 7);                             \
++    c_patch((address)&insn, 5, 5, (imm & nth_bit(6)) >> 6);                                  \
++    c_patch((address)&insn, 6, 6, (imm & nth_bit(4)) >> 4);                                  \
++    c_patch_reg((address)&insn, 7, sp);                                                      \
++    c_patch((address)&insn, 12, 12, (imm & nth_bit(9)) >> 9);                                \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_addi16sp, 0b011, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd, uint32_t uimm) {                                                    \
++    assert_cond(is_unsigned_imm_in_range(uimm, 10, 0));                                      \
++    assert_cond((uimm & 0b11) == 0);                                                         \
++    assert_cond(uimm != 0);                                                                  \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_compressed_reg((address)&insn, 2, Rd);                                           \
++    c_patch((address)&insn, 5, 5, (uimm & nth_bit(3)) >> 3);                                 \
++    c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2);                                 \
++    c_patch((address)&insn, 10, 7, (uimm & right_n_bits(10)) >> 6);                          \
++    c_patch((address)&insn, 12, 11, (uimm & right_n_bits(6)) >> 4);                          \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_addi4spn, 0b000, 0b00);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd_Rs1, uint32_t shamt) {                                               \
++    assert_cond(is_unsigned_imm_in_range(shamt, 6, 0));                                      \
++    assert_cond(shamt != 0);                                                                 \
++    assert_cond(Rd_Rs1 != x0);                                                               \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5)));                                \
++    c_patch_reg((address)&insn, 7, Rd_Rs1);                                                  \
++    c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5);                              \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_slli, 0b000, 0b10);
++
++#undef INSN
++
++#define INSN(NAME, funct3, funct2, op)                                                       \
++  void NAME(Register Rd_Rs1, uint32_t shamt) {                                               \
++    assert_cond(is_unsigned_imm_in_range(shamt, 6, 0));                                      \
++    assert_cond(shamt != 0);                                                                 \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5)));                                \
++    c_patch_compressed_reg((address)&insn, 7, Rd_Rs1);                                       \
++    c_patch((address)&insn, 11, 10, funct2);                                                 \
++    c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5);                              \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_srli, 0b100, 0b00, 0b01);
++  INSN(c_srai, 0b100, 0b01, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct3, funct2, op)                                                       \
++  void NAME(Register Rd_Rs1, int32_t imm) {                                                  \
++    assert_cond(is_imm_in_range(imm, 6, 0));                                                 \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 6, 2, (imm & right_n_bits(5)));                                  \
++    c_patch_compressed_reg((address)&insn, 7, Rd_Rs1);                                       \
++    c_patch((address)&insn, 11, 10, funct2);                                                 \
++    c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5);                                \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_andi, 0b100, 0b10, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct6, funct2, op)                                                       \
++  void NAME(Register Rd_Rs1, Register Rs2) {                                                 \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_compressed_reg((address)&insn, 2, Rs2);                                          \
++    c_patch((address)&insn, 6, 5, funct2);                                                   \
++    c_patch_compressed_reg((address)&insn, 7, Rd_Rs1);                                       \
++    c_patch((address)&insn, 15, 10, funct6);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_sub,  0b100011, 0b00, 0b01);
++  INSN(c_xor,  0b100011, 0b01, 0b01);
++  INSN(c_or,   0b100011, 0b10, 0b01);
++  INSN(c_and,  0b100011, 0b11, 0b01);
++  INSN(c_subw, 0b100111, 0b00, 0b01);
++  INSN(c_addw, 0b100111, 0b01, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct4, op)                                                               \
++  void NAME(Register Rd_Rs1, Register Rs2) {                                                 \
++    assert_cond(Rd_Rs1 != x0);                                                               \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_reg((address)&insn, 2, Rs2);                                                     \
++    c_patch_reg((address)&insn, 7, Rd_Rs1);                                                  \
++    c_patch((address)&insn, 15, 12, funct4);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_mv,  0b1000, 0b10);
++  INSN(c_add, 0b1001, 0b10);
++
++#undef INSN
++
++#define INSN(NAME, funct4, op)                                                               \
++  void NAME(Register Rs1) {                                                                  \
++    assert_cond(Rs1 != x0);                                                                  \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_reg((address)&insn, 2, x0);                                                      \
++    c_patch_reg((address)&insn, 7, Rs1);                                                     \
++    c_patch((address)&insn, 15, 12, funct4);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_jr,   0b1000, 0b10);
++  INSN(c_jalr, 0b1001, 0b10);
++
++#undef INSN
++
++  typedef void (Assembler::* j_c_insn)(address dest);
++  typedef void (Assembler::* compare_and_branch_c_insn)(Register Rs1, address dest);
++
++  void wrap_label(Label &L, j_c_insn insn) {
++    if (L.is_bound()) {
++      (this->*insn)(target(L));
++    } else {
++      L.add_patch_at(code(), locator());
++      (this->*insn)(pc());
++    }
++  }
++
++  void wrap_label(Label &L, Register r, compare_and_branch_c_insn insn) {
++    if (L.is_bound()) {
++      (this->*insn)(r, target(L));
++    } else {
++      L.add_patch_at(code(), locator());
++      (this->*insn)(r, pc());
++    }
++  }
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(int32_t offset) {                                                                \
++    assert_cond(is_imm_in_range(offset, 11, 1));                                             \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 2, 2, (offset & nth_bit(5)) >> 5);                               \
++    c_patch((address)&insn, 5, 3, (offset & right_n_bits(4)) >> 1);                          \
++    c_patch((address)&insn, 6, 6, (offset & nth_bit(7)) >> 7);                               \
++    c_patch((address)&insn, 7, 7, (offset & nth_bit(6)) >> 6);                               \
++    c_patch((address)&insn, 8, 8, (offset & nth_bit(10)) >> 10);                             \
++    c_patch((address)&insn, 10, 9, (offset & right_n_bits(10)) >> 8);                        \
++    c_patch((address)&insn, 11, 11, (offset & nth_bit(4)) >> 4);                             \
++    c_patch((address)&insn, 12, 12, (offset & nth_bit(11)) >> 11);                           \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }                                                                                          \
++  void NAME(address dest) {                                                                  \
++    assert_cond(dest != NULL);                                                               \
++    int64_t distance = dest - pc();                                                          \
++    assert_cond(is_imm_in_range(distance, 11, 1));                                           \
++    c_j(distance);                                                                           \
++  }                                                                                          \
++  void NAME(Label &L) {                                                                      \
++    wrap_label(L, &Assembler::NAME);                                                         \
++  }
++
++  INSN(c_j, 0b101, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rs1, int32_t imm) {                                                     \
++    assert_cond(is_imm_in_range(imm, 8, 1));                                                 \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5);                                  \
++    c_patch((address)&insn, 4, 3, (imm & right_n_bits(3)) >> 1);                             \
++    c_patch((address)&insn, 6, 5, (imm & right_n_bits(8)) >> 6);                             \
++    c_patch_compressed_reg((address)&insn, 7, Rs1);                                          \
++    c_patch((address)&insn, 11, 10, (imm & right_n_bits(5)) >> 3);                           \
++    c_patch((address)&insn, 12, 12, (imm & nth_bit(8)) >> 8);                                \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }                                                                                          \
++  void NAME(Register Rs1, address dest) {                                                    \
++    assert_cond(dest != NULL);                                                               \
++    int64_t distance = dest - pc();                                                          \
++    assert_cond(is_imm_in_range(distance, 8, 1));                                            \
++    NAME(Rs1, distance);                                                                     \
++  }                                                                                          \
++  void NAME(Register Rs1, Label &L) {                                                        \
++    wrap_label(L, Rs1, &Assembler::NAME);                                                    \
++  }
++
++  INSN(c_beqz, 0b110, 0b01);
++  INSN(c_bnez, 0b111, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd, int32_t imm) {                                                      \
++    assert_cond(is_imm_in_range(imm, 18, 0));                                                \
++    assert_cond((imm & 0xfff) == 0);                                                         \
++    assert_cond(imm != 0);                                                                   \
++    assert_cond(Rd != x0 && Rd != x2);                                                       \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 6, 2, (imm & right_n_bits(17)) >> 12);                           \
++    c_patch_reg((address)&insn, 7, Rd);                                                      \
++    c_patch((address)&insn, 12, 12, (imm & nth_bit(17)) >> 17);                              \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_lui, 0b011, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd, int32_t imm) {                                                      \
++    assert_cond(is_imm_in_range(imm, 6, 0));                                                 \
++    assert_cond(Rd != x0);                                                                   \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 6, 2, (imm & right_n_bits(5)));                                  \
++    c_patch_reg((address)&insn, 7, Rd);                                                      \
++    c_patch((address)&insn, 12, 12, (imm & right_n_bits(6)) >> 5);                           \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_li, 0b010, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd, uint32_t uimm) {                                                    \
++    assert_cond(is_unsigned_imm_in_range(uimm, 9, 0));                                       \
++    assert_cond((uimm & 0b111) == 0);                                                        \
++    assert_cond(Rd != x0);                                                                   \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6);                            \
++    c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3);                            \
++    c_patch_reg((address)&insn, 7, Rd);                                                      \
++    c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5);                               \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_ldsp,  0b011, 0b10);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(FloatRegister Rd, uint32_t uimm) {                                               \
++    assert_cond(is_unsigned_imm_in_range(uimm, 9, 0));                                       \
++    assert_cond((uimm & 0b111) == 0);                                                        \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6);                            \
++    c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3);                            \
++    c_patch_reg((address)&insn, 7, Rd);                                                      \
++    c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5);                               \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_fldsp, 0b001, 0b10);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op, REGISTER_TYPE)                                                \
++  void NAME(REGISTER_TYPE Rd_Rs2, Register Rs1, uint32_t uimm) {                             \
++    assert_cond(is_unsigned_imm_in_range(uimm, 8, 0));                                       \
++    assert_cond((uimm & 0b111) == 0);                                                        \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_compressed_reg((address)&insn, 2, Rd_Rs2);                                       \
++    c_patch((address)&insn, 6, 5, (uimm & right_n_bits(8)) >> 6);                            \
++    c_patch_compressed_reg((address)&insn, 7, Rs1);                                          \
++    c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3);                          \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_ld,  0b011, 0b00, Register);
++  INSN(c_sd,  0b111, 0b00, Register);
++  INSN(c_fld, 0b001, 0b00, FloatRegister);
++  INSN(c_fsd, 0b101, 0b00, FloatRegister);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op, REGISTER_TYPE)                                                \
++  void NAME(REGISTER_TYPE Rs2, uint32_t uimm) {                                              \
++    assert_cond(is_unsigned_imm_in_range(uimm, 9, 0));                                       \
++    assert_cond((uimm & 0b111) == 0);                                                        \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_reg((address)&insn, 2, Rs2);                                                     \
++    c_patch((address)&insn, 9, 7, (uimm & right_n_bits(9)) >> 6);                            \
++    c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3);                          \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_sdsp,  0b111, 0b10, Register);
++  INSN(c_fsdsp, 0b101, 0b10, FloatRegister);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rs2, uint32_t uimm) {                                                   \
++    assert_cond(is_unsigned_imm_in_range(uimm, 8, 0));                                       \
++    assert_cond((uimm & 0b11) == 0);                                                         \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_reg((address)&insn, 2, Rs2);                                                     \
++    c_patch((address)&insn, 8, 7, (uimm & right_n_bits(8)) >> 6);                            \
++    c_patch((address)&insn, 12, 9, (uimm & right_n_bits(6)) >> 2);                           \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_swsp, 0b110, 0b10);
++
 +#undef INSN
 +
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd, uint32_t uimm) {                                                    \
++    assert_cond(is_unsigned_imm_in_range(uimm, 8, 0));                                       \
++    assert_cond((uimm & 0b11) == 0);                                                         \
++    assert_cond(Rd != x0);                                                                   \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 3, 2, (uimm & right_n_bits(8)) >> 6);                            \
++    c_patch((address)&insn, 6, 4, (uimm & right_n_bits(5)) >> 2);                            \
++    c_patch_reg((address)&insn, 7, Rd);                                                      \
++    c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5);                               \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_lwsp, 0b010, 0b10);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd_Rs2, Register Rs1, uint32_t uimm) {                                  \
++    assert_cond(is_unsigned_imm_in_range(uimm, 7, 0));                                       \
++    assert_cond((uimm & 0b11) == 0);                                                         \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_compressed_reg((address)&insn, 2, Rd_Rs2);                                       \
++    c_patch((address)&insn, 5, 5, (uimm & nth_bit(6)) >> 6);                                 \
++    c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2);                                 \
++    c_patch_compressed_reg((address)&insn, 7, Rs1);                                          \
++    c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3);                          \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_lw, 0b010, 0b00);
++  INSN(c_sw, 0b110, 0b00);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME() {                                                                              \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 11, 2, 0x0);                                                     \
++    c_patch((address)&insn, 12, 12, 0b1);                                                    \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_ebreak, 0b100, 0b10);
++
++#undef INSN
++
++// --------------  RVC Transformation Functions  --------------
++
++// --------------------------
++// Register instructions
++// --------------------------
++#define INSN(NAME)                                                                             \
++  void NAME(Register Rd, Register Rs1, Register Rs2) {                                         \
++    /* add -> c.add */                                                                         \
++    if (do_compress()) {                                                                       \
++      Register src = noreg;                                                                    \
++      if (Rs1 != x0 && Rs2 != x0 && ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) {      \
++        c_add(Rd, src);                                                                        \
++        return;                                                                                \
++      }                                                                                        \
++    }                                                                                          \
++    _add(Rd, Rs1, Rs2);                                                                        \
++  }
++
++  INSN(add);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
++  void NAME(Register Rd, Register Rs1, Register Rs2) {                                       \
++    /* sub/subw -> c.sub/c.subw */                                                           \
++    if (do_compress() &&                                                                     \
++        (Rd == Rs1 && Rd->is_compressed_valid() && Rs2->is_compressed_valid())) {            \
++      C_NAME(Rd, Rs2);                                                                       \
++      return;                                                                                \
++    }                                                                                        \
++    NORMAL_NAME(Rd, Rs1, Rs2);                                                               \
++  }
++
++  INSN(sub,  c_sub,  _sub);
++  INSN(subw, c_subw, _subw);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
++  void NAME(Register Rd, Register Rs1, Register Rs2) {                                       \
++    /* and/or/xor/addw -> c.and/c.or/c.xor/c.addw */                                         \
++    if (do_compress()) {                                                                     \
++      Register src = noreg;                                                                  \
++      if (Rs1->is_compressed_valid() && Rs2->is_compressed_valid() &&                        \
++        ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) {                                \
++        C_NAME(Rd, src);                                                                     \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    NORMAL_NAME(Rd, Rs1, Rs2);                                                               \
++  }
++
++  INSN(andr, c_and,  _andr);
++  INSN(orr,  c_or,   _orr);
++  INSN(xorr, c_xor,  _xorr);
++  INSN(addw, c_addw, _addw);
++
++#undef INSN
++
++private:
++// some helper functions
++  bool do_compress() const {
++    return UseRVC && in_compressible_region();
++  }
++
++#define FUNC(NAME, funct3, bits)                                                             \
++  bool NAME(Register rs1, Register rd_rs2, int32_t imm12, bool ld) {                         \
++    return rs1 == sp &&                                                                      \
++      is_unsigned_imm_in_range(imm12, bits, 0) &&                                            \
++      (intx(imm12) & funct3) == 0x0 &&                                                       \
++      (!ld || rd_rs2 != x0);                                                                 \
++  }                                                                                          \
++
++  FUNC(is_c_ldsdsp,  0b111, 9);
++  FUNC(is_c_lwswsp,  0b011, 8);
++
++#undef FUNC
++
++#define FUNC(NAME, funct3, bits)                                                             \
++  bool NAME(Register rs1, int32_t imm12) {                                                   \
++    return rs1 == sp &&                                                                      \
++      is_unsigned_imm_in_range(imm12, bits, 0) &&                                            \
++      (intx(imm12) & funct3) == 0x0;                                                         \
++  }                                                                                          \
++
++  FUNC(is_c_fldsdsp, 0b111, 9);
++
++#undef FUNC
++
++#define FUNC(NAME, REG_TYPE, funct3, bits)                                                   \
++  bool NAME(Register rs1, REG_TYPE rd_rs2, int32_t imm12) {                                  \
++    return rs1->is_compressed_valid() &&                                                     \
++      rd_rs2->is_compressed_valid() &&                                                       \
++      is_unsigned_imm_in_range(imm12, bits, 0) &&                                            \
++      (intx(imm12) & funct3) == 0x0;                                                         \
++  }                                                                                          \
++
++  FUNC(is_c_ldsd,  Register,      0b111, 8);
++  FUNC(is_c_lwsw,  Register,      0b011, 7);
++  FUNC(is_c_fldsd, FloatRegister, 0b111, 8);
++
++#undef FUNC
++
++public:
++// --------------------------
++// Load/store register
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
++    /* lw -> c.lwsp/c.lw */                                                                  \
++    if (do_compress()) {                                                                     \
++      if (is_c_lwswsp(Rs, Rd, offset, true)) {                                               \
++        c_lwsp(Rd, offset);                                                                  \
++        return;                                                                              \
++      } else if (is_c_lwsw(Rs, Rd, offset)) {                                                \
++        c_lw(Rd, Rs, offset);                                                                \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    _lw(Rd, Rs, offset);                                                                     \
++  }
++
++  INSN(lw);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
++    /* ld -> c.ldsp/c.ld */                                                                  \
++    if (do_compress()) {                                                                     \
++      if (is_c_ldsdsp(Rs, Rd, offset, true)) {                                               \
++        c_ldsp(Rd, offset);                                                                  \
++        return;                                                                              \
++      } else if (is_c_ldsd(Rs, Rd, offset)) {                                                \
++        c_ld(Rd, Rs, offset);                                                                \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    _ld(Rd, Rs, offset);                                                                     \
++  }
++
++  INSN(ld);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(FloatRegister Rd, Register Rs, const int32_t offset) {                           \
++    /* fld -> c.fldsp/c.fld */                                                               \
++    if (do_compress()) {                                                                     \
++      if (is_c_fldsdsp(Rs, offset)) {                                                        \
++        c_fldsp(Rd, offset);                                                                 \
++        return;                                                                              \
++      } else if (is_c_fldsd(Rs, Rd, offset)) {                                               \
++        c_fld(Rd, Rs, offset);                                                               \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    _fld(Rd, Rs, offset);                                                                    \
++  }
++
++  INSN(fld);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
++    /* sd -> c.sdsp/c.sd */                                                                  \
++    if (do_compress()) {                                                                     \
++      if (is_c_ldsdsp(Rs, Rd, offset, false)) {                                              \
++        c_sdsp(Rd, offset);                                                                  \
++        return;                                                                              \
++      } else if (is_c_ldsd(Rs, Rd, offset)) {                                                \
++        c_sd(Rd, Rs, offset);                                                                \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    _sd(Rd, Rs, offset);                                                                     \
++  }
++
++  INSN(sd);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
++    /* sw -> c.swsp/c.sw */                                                                  \
++    if (do_compress()) {                                                                     \
++      if (is_c_lwswsp(Rs, Rd, offset, false)) {                                              \
++        c_swsp(Rd, offset);                                                                  \
++        return;                                                                              \
++      } else if (is_c_lwsw(Rs, Rd, offset)) {                                                \
++        c_sw(Rd, Rs, offset);                                                                \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    _sw(Rd, Rs, offset);                                                                     \
++  }
++
++  INSN(sw);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(FloatRegister Rd, Register Rs, const int32_t offset) {                           \
++    /* fsd -> c.fsdsp/c.fsd */                                                               \
++    if (do_compress()) {                                                                     \
++      if (is_c_fldsdsp(Rs, offset)) {                                                        \
++        c_fsdsp(Rd, offset);                                                                 \
++        return;                                                                              \
++      } else if (is_c_fldsd(Rs, Rd, offset)) {                                               \
++        c_fsd(Rd, Rs, offset);                                                               \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    _fsd(Rd, Rs, offset);                                                                    \
++  }
++
++  INSN(fsd);
++
++#undef INSN
++
++// --------------------------
++// Conditional branch instructions
++// --------------------------
++#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
++  void NAME(Register Rs1, Register Rs2, const int64_t offset) {                              \
++    /* beq/bne -> c.beqz/c.bnez */                                                           \
++    if (do_compress() &&                                                                     \
++        (offset != 0 && Rs2 == x0 && Rs1->is_compressed_valid() &&                           \
++        is_imm_in_range(offset, 8, 1))) {                                                    \
++      C_NAME(Rs1, offset);                                                                   \
++      return;                                                                                \
++    }                                                                                        \
++    NORMAL_NAME(Rs1, Rs2, offset);                                                           \
++  }
++
++  INSN(beq, c_beqz, _beq);
++  INSN(bne, c_beqz, _bne);
++
++#undef INSN
++
++// --------------------------
++// Unconditional branch instructions
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, const int32_t offset) {                                             \
++    /* jal -> c.j */                                                                         \
++    if (do_compress() && offset != 0 && Rd == x0 && is_imm_in_range(offset, 11, 1)) {        \
++      c_j(offset);                                                                           \
++      return;                                                                                \
++    }                                                                                        \
++    _jal(Rd, offset);                                                                        \
++  }
++
++  INSN(jal);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
++    /* jalr -> c.jr/c.jalr */                                                                \
++    if (do_compress() && (offset == 0 && Rs != x0)) {                                        \
++      if (Rd == x1) {                                                                        \
++        c_jalr(Rs);                                                                          \
++        return;                                                                              \
++      } else if (Rd == x0) {                                                                 \
++        c_jr(Rs);                                                                            \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    _jalr(Rd, Rs, offset);                                                                   \
++  }
++
++  INSN(jalr);
++
++#undef INSN
++
++// --------------------------
++// Miscellaneous Instructions
++// --------------------------
++#define INSN(NAME)                                                     \
++  void NAME() {                                                        \
++    /* ebreak -> c.ebreak */                                           \
++    if (do_compress()) {                                               \
++      c_ebreak();                                                      \
++      return;                                                          \
++    }                                                                  \
++    _ebreak();                                                         \
++  }
++
++  INSN(ebreak);
++
++#undef INSN
++
++#define INSN(NAME)                                                      \
++  void NAME() {                                                         \
++    /* The illegal instruction in RVC is presented by a 16-bit 0. */    \
++    if (do_compress()) {                                                \
++      emit_int16(0);                                                    \
++      return;                                                           \
++    }                                                                   \
++    _halt();                                                            \
++  }
++
++  INSN(halt);
++
++#undef INSN
++
++// --------------------------
++// Immediate Instructions
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, int64_t imm) {                                                      \
++    /* li -> c.li */                                                                         \
++    if (do_compress() && (is_imm_in_range(imm, 6, 0) && Rd != x0)) {                         \
++      c_li(Rd, imm);                                                                         \
++      return;                                                                                \
++    }                                                                                        \
++    _li(Rd, imm);                                                                            \
++  }
++
++  INSN(li);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs1, int32_t imm) {                                        \
++    /* addi -> c.addi/c.nop/c.mv/c.addi16sp/c.addi4spn */                                    \
++    if (do_compress()) {                                                                     \
++      if (Rd == Rs1 && is_imm_in_range(imm, 6, 0)) {                                         \
++        c_addi(Rd, imm);                                                                     \
++        return;                                                                              \
++      } else if (imm == 0 && Rd != x0 && Rs1 != x0) {                                        \
++        c_mv(Rd, Rs1);                                                                       \
++        return;                                                                              \
++      } else if (Rs1 == sp && imm != 0) {                                                    \
++        if (Rd == Rs1 && (imm & 0b1111) == 0x0 && is_imm_in_range(imm, 10, 0)) {             \
++          c_addi16sp(imm);                                                                   \
++          return;                                                                            \
++        } else if (Rd->is_compressed_valid() && (imm & 0b11) == 0x0 && is_unsigned_imm_in_range(imm, 10, 0)) { \
++          c_addi4spn(Rd, imm);                                                               \
++          return;                                                                            \
++        }                                                                                    \
++      }                                                                                      \
++    }                                                                                        \
++    _addi(Rd, Rs1, imm);                                                                     \
++  }
++
++  INSN(addi);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs1, int32_t imm) {                                        \
++    /* addiw -> c.addiw */                                                                   \
++    if (do_compress() && (Rd == Rs1 && Rd != x0 && is_imm_in_range(imm, 6, 0))) {            \
++      c_addiw(Rd, imm);                                                                      \
++      return;                                                                                \
++    }                                                                                        \
++    _addiw(Rd, Rs1, imm);                                                                    \
++  }
++
++  INSN(addiw);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs1, int32_t imm) {                                        \
++    /* and_imm12 -> c.andi */                                                                \
++    if (do_compress() &&                                                                     \
++        (Rd == Rs1 && Rd->is_compressed_valid() && is_imm_in_range(imm, 6, 0))) {            \
++      c_andi(Rd, imm);                                                                       \
++      return;                                                                                \
++    }                                                                                        \
++    _and_imm12(Rd, Rs1, imm);                                                                \
++  }
++
++  INSN(and_imm12);
++
++#undef INSN
++
++// --------------------------
++// Shift Immediate Instructions
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs1, unsigned shamt) {                                     \
++    /* slli -> c.slli */                                                                     \
++    if (do_compress() && (Rd == Rs1 && Rd != x0 && shamt != 0)) {                            \
++      c_slli(Rd, shamt);                                                                     \
++      return;                                                                                \
++    }                                                                                        \
++    _slli(Rd, Rs1, shamt);                                                                   \
++  }
++
++  INSN(slli);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
++  void NAME(Register Rd, Register Rs1, unsigned shamt) {                                     \
++    /* srai/srli -> c.srai/c.srli */                                                         \
++    if (do_compress() && (Rd == Rs1 && Rd->is_compressed_valid() && shamt != 0)) {           \
++      C_NAME(Rd, shamt);                                                                     \
++      return;                                                                                \
++    }                                                                                        \
++    NORMAL_NAME(Rd, Rs1, shamt);                                                             \
++  }
++
++  INSN(srai, c_srai, _srai);
++  INSN(srli, c_srli, _srli);
++
++#undef INSN
++
++// --------------------------
++// Upper Immediate Instruction
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, int32_t imm) {                                                      \
++    /* lui -> c.lui */                                                                       \
++    if (do_compress() && (Rd != x0 && Rd != x2 && imm != 0 && is_imm_in_range(imm, 18, 0))) { \
++      c_lui(Rd, imm);                                                                        \
++      return;                                                                                \
++    }                                                                                        \
++    _lui(Rd, imm);                                                                           \
++  }
++
++  INSN(lui);
++
++#undef INSN
++
++// ---------------------------------------------------------------------------------------
++
 +  void bgt(Register Rs, Register Rt, const address &dest);
 +  void ble(Register Rs, Register Rt, const address &dest);
 +  void bgtu(Register Rs, Register Rt, const address &dest);
@@ -3273,25 +4084,17 @@ index 000000000..d4da30ed6
 +  void wrap_label(Register r, Label &L, Register t, load_insn_by_temp insn);
 +  void wrap_label(Register r, Label &L, jal_jalr_insn insn);
 +
-+  // Computational pseudo instructions
++  // calculate pseudoinstruction
 +  void add(Register Rd, Register Rn, int64_t increment, Register temp = t0);
-+  void addw(Register Rd, Register Rn, int32_t increment, Register temp = t0);
-+  
++  void addw(Register Rd, Register Rn, int64_t increment, Register temp = t0);
 +  void sub(Register Rd, Register Rn, int64_t decrement, Register temp = t0);
-+  void subw(Register Rd, Register Rn, int32_t decrement, Register temp = t0);
++  void subw(Register Rd, Register Rn, int64_t decrement, Register temp = t0);
 +
 +  // RVB pseudo instructions
 +  // zero extend word
 +  void zext_w(Register Rd, Register Rs);
 +
-+  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
-+  }
-+
-+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
-+                                                Register tmp,
-+                                                int offset) {
-+    ShouldNotCallThis();
-+    return RegisterOrConstant();
++  Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) {
 +  }
 +
 +  // Stack overflow checking
@@ -3301,34 +4104,25 @@ index 000000000..d4da30ed6
 +    return is_imm_in_range(imm, 12, 0);
 +  }
 +
-+  // The maximum range of a branch is fixed for the riscv
-+  // architecture.
++  // The maximum range of a branch is fixed for the RISCV architecture.
 +  static const unsigned long branch_range = 1 * M;
 +
 +  static bool reachable_from_branch_at(address branch, address target) {
 +    return uabs(target - branch) < branch_range;
 +  }
 +
-+  static Assembler::SEW elemBytes_to_sew(int esize) {
-+    assert(esize > 0 && esize <= 64 && is_power_of_2(esize), "unsupported element size");
-+    return (Assembler::SEW) exact_log2(esize);
-+  }
-+
 +  virtual ~Assembler() {}
-+
 +};
 +
-+class BiasedLockingCounters;
-+
 +#endif // CPU_RISCV_ASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
 new file mode 100644
-index 000000000..82b825db7
+index 00000000000..7ffe8803985
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
 @@ -0,0 +1,47 @@
 +/*
-+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -3376,14 +4170,14 @@ index 000000000..82b825db7
 +#endif // CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP
 diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp
 new file mode 100644
-index 000000000..d0ac7ef46
+index 00000000000..23d982f9abd
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp
-@@ -0,0 +1,169 @@
+@@ -0,0 +1,167 @@
 +/*
-+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -3409,7 +4203,7 @@ index 000000000..d0ac7ef46
 +#ifndef CPU_RISCV_BYTES_RISCV_HPP
 +#define CPU_RISCV_BYTES_RISCV_HPP
 +
-+#include "memory/allocation.hpp"
++#include "memory/allStatic.hpp"
 +
 +class Bytes: AllStatic {
 + public:
@@ -3457,7 +4251,6 @@ index 000000000..d0ac7ef46
 +               ((u8)(((u4*)p)[0]));
 +
 +      case 2:
-+      case 6:
 +        return ((u8)(((u2*)p)[3]) << 48) |
 +               ((u8)(((u2*)p)[2]) << 32) |
 +               ((u8)(((u2*)p)[1]) << 16) |
@@ -3471,7 +4264,7 @@ index 000000000..d0ac7ef46
 +               ((u8)(p[3]) << 24) |
 +               ((u8)(p[2]) << 16) |
 +               ((u8)(p[1]) <<  8) |
-+                (u8)(p[0]);
++               ((u8)(p[0]));
 +    }
 +  }
 +
@@ -3516,7 +4309,6 @@ index 000000000..d0ac7ef46
 +        break;
 +
 +      case 2:
-+      case 6:
 +        ((u2*)p)[3] = x >> 48;
 +        ((u2*)p)[2] = x >> 32;
 +        ((u2*)p)[1] = x >> 16;
@@ -3546,17 +4338,17 @@ index 000000000..d0ac7ef46
 +  static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); }
 +};
 +
-+#include OS_CPU_HEADER_INLINE(bytes)
++#include OS_CPU_HEADER(bytes)
 +
 +#endif // CPU_RISCV_BYTES_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
 new file mode 100644
-index 000000000..522eedd29
+index 00000000000..dcd0472c540
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-@@ -0,0 +1,352 @@
+@@ -0,0 +1,353 @@
 +/*
-+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -3588,6 +4380,7 @@ index 000000000..522eedd29
 +#include "c1/c1_LIRAssembler.hpp"
 +#include "c1/c1_MacroAssembler.hpp"
 +#include "c1/c1_Runtime1.hpp"
++#include "classfile/javaClasses.hpp"
 +#include "nativeInst_riscv.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "vmreg_riscv.inline.hpp"
@@ -3595,8 +4388,21 @@ index 000000000..522eedd29
 +
 +#define __ ce->masm()->
 +
-+void CounterOverflowStub::emit_code(LIR_Assembler* ce)
-+{
++void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset());
++  __ code_section()->relocate(__ pc(), safepoint_pc.rspec());
++  __ la(t0, safepoint_pc.target());
++  __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
++
++  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
++         "polling page return stub not created yet");
++  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
++
++  __ far_jump(RuntimeAddress(stub));
++}
++
++void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
 +  __ bind(_entry);
 +  Metadata *m = _method->as_constant_ptr()->as_metadata();
 +  __ mov_metadata(t0, m);
@@ -3608,22 +4414,19 @@ index 000000000..522eedd29
 +  __ j(_continuation);
 +}
 +
-+RangeCheckStub::RangeCheckStub(CodeEmitInfo *info, LIR_Opr index, LIR_Opr array)
-+  : _index(index), _array(array), _throw_index_out_of_bounds_exception(false)
-+{
++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array)
++  : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) {
 +  assert(info != NULL, "must have info");
 +  _info = new CodeEmitInfo(info);
 +}
 +
 +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index)
-+  : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true)
-+{
++  : _index(index), _array(), _throw_index_out_of_bounds_exception(true) {
 +  assert(info != NULL, "must have info");
 +  _info = new CodeEmitInfo(info);
 +}
 +
-+void RangeCheckStub::emit_code(LIR_Assembler* ce)
-+{
++void RangeCheckStub::emit_code(LIR_Assembler* ce) {
 +  __ bind(_entry);
 +  if (_info->deoptimize_on_exception()) {
 +    address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
@@ -3643,7 +4446,7 @@ index 000000000..522eedd29
 +  if (_throw_index_out_of_bounds_exception) {
 +    stub_id = Runtime1::throw_index_exception_id;
 +  } else {
-+    assert(_array != NULL, "sanity");
++    assert(_array != LIR_Opr::nullOpr(), "sanity");
 +    __ mv(t1, _array->as_pointer_register());
 +    stub_id = Runtime1::throw_range_check_failed_id;
 +  }
@@ -3655,13 +4458,11 @@ index 000000000..522eedd29
 +  debug_only(__ should_not_reach_here());
 +}
 +
-+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info)
-+{
++PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
 +  _info = new CodeEmitInfo(info);
 +}
 +
-+void PredicateFailedStub::emit_code(LIR_Assembler* ce)
-+{
++void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
 +  __ bind(_entry);
 +  address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
 +  __ far_call(RuntimeAddress(a));
@@ -3670,8 +4471,7 @@ index 000000000..522eedd29
 +  debug_only(__ should_not_reach_here());
 +}
 +
-+void DivByZeroStub::emit_code(LIR_Assembler* ce)
-+{
++void DivByZeroStub::emit_code(LIR_Assembler* ce) {
 +  if (_offset != -1) {
 +    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
 +  }
@@ -3685,21 +4485,19 @@ index 000000000..522eedd29
 +}
 +
 +// Implementation of NewInstanceStub
-+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id)
-+{
++NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
 +  _result = result;
 +  _klass = klass;
 +  _klass_reg = klass_reg;
 +  _info = new CodeEmitInfo(info);
-+  assert(stub_id == Runtime1::new_instance_id ||
-+         stub_id == Runtime1::fast_new_instance_id ||
++  assert(stub_id == Runtime1::new_instance_id                 ||
++         stub_id == Runtime1::fast_new_instance_id            ||
 +         stub_id == Runtime1::fast_new_instance_init_check_id,
 +         "need new_instance id");
 +  _stub_id = stub_id;
 +}
 +
-+void NewInstanceStub::emit_code(LIR_Assembler* ce)
-+{
++void NewInstanceStub::emit_code(LIR_Assembler* ce) {
 +  assert(__ rsp_offset() == 0, "frame size should be fixed");
 +  __ bind(_entry);
 +  __ mv(x13, _klass_reg->as_register());
@@ -3711,16 +4509,14 @@ index 000000000..522eedd29
 +}
 +
 +// Implementation of NewTypeArrayStub
-+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info)
-+{
++NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
 +  _klass_reg = klass_reg;
 +  _length = length;
 +  _result = result;
 +  _info = new CodeEmitInfo(info);
 +}
 +
-+void NewTypeArrayStub::emit_code(LIR_Assembler* ce)
-+{
++void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
 +  assert(__ rsp_offset() == 0, "frame size should be fixed");
 +  __ bind(_entry);
 +  assert(_length->as_register() == x9, "length must in x9");
@@ -3733,16 +4529,14 @@ index 000000000..522eedd29
 +}
 +
 +// Implementation of NewObjectArrayStub
-+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info)
-+{
++NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
 +  _klass_reg = klass_reg;
 +  _result = result;
 +  _length = length;
 +  _info = new CodeEmitInfo(info);
 +}
 +
-+void NewObjectArrayStub::emit_code(LIR_Assembler* ce)
-+{
++void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
 +  assert(__ rsp_offset() == 0, "frame size should be fixed");
 +  __ bind(_entry);
 +  assert(_length->as_register() == x9, "length must in x9");
@@ -3756,13 +4550,11 @@ index 000000000..522eedd29
 +
 +// Implementation of MonitorAccessStubs
 +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
-+: MonitorAccessStub(obj_reg, lock_reg)
-+{
++: MonitorAccessStub(obj_reg, lock_reg) {
 +  _info = new CodeEmitInfo(info);
 +}
 +
-+void MonitorEnterStub::emit_code(LIR_Assembler* ce)
-+{
++void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
 +  assert(__ rsp_offset() == 0, "frame size should be fixed");
 +  __ bind(_entry);
 +  ce->store_parameter(_obj_reg->as_register(),  1);
@@ -3779,8 +4571,7 @@ index 000000000..522eedd29
 +  __ j(_continuation);
 +}
 +
-+void MonitorExitStub::emit_code(LIR_Assembler* ce)
-+{
++void MonitorExitStub::emit_code(LIR_Assembler* ce) {
 +  __ bind(_entry);
 +  if (_compute_lock) {
 +    // lock_reg was destroyed by fast unlocking attempt => recompute it
@@ -3798,18 +4589,23 @@ index 000000000..522eedd29
 +  __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
 +}
 +
++// Implementation of patching:
++// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
++// - Replace original code with a call to the stub
++// At Runtime:
++// - call to stub, jump to runtime
++// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object)
++// - in runtime: after initializing class, restore original code, reexecute instruction
++
 +int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
 +
 +void PatchingStub::align_patch_site(MacroAssembler* masm) {}
 +
-+// RISCV don't use C1 runtime patching. When need patch, just deoptimize.
-+void PatchingStub::emit_code(LIR_Assembler* ce)
-+{
++void PatchingStub::emit_code(LIR_Assembler* ce) {
 +  assert(false, "RISCV should not use C1 runtime patching");
 +}
 +
-+void DeoptimizeStub::emit_code(LIR_Assembler* ce)
-+{
++void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
 +  __ bind(_entry);
 +  ce->store_parameter(_trap_request, 0);
 +  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
@@ -3817,8 +4613,7 @@ index 000000000..522eedd29
 +  DEBUG_ONLY(__ should_not_reach_here());
 +}
 +
-+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce)
-+{
++void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
 +  address a = NULL;
 +  if (_info->deoptimize_on_exception()) {
 +    // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
@@ -3835,8 +4630,7 @@ index 000000000..522eedd29
 +  debug_only(__ should_not_reach_here());
 +}
 +
-+void SimpleExceptionStub::emit_code(LIR_Assembler* ce)
-+{
++void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
 +  assert(__ rsp_offset() == 0, "frame size should be fixed");
 +
 +  __ bind(_entry);
@@ -3845,32 +4639,29 @@ index 000000000..522eedd29
 +  if (_obj->is_cpu_register()) {
 +    __ mv(t0, _obj->as_register());
 +  }
-+  __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), t1);
++  __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), NULL, t1);
 +  ce->add_call_info_here(_info);
 +  debug_only(__ should_not_reach_here());
 +}
 +
-+void ArrayCopyStub::emit_code(LIR_Assembler* ce)
-+{
++void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
 +  // ---------------slow case: call to native-----------------
 +  __ bind(_entry);
 +  // Figure out where the args should go
 +  // This should really convert the IntrinsicID to the Method* and signature
 +  // but I don't know how to do that.
-+  //
 +  const int args_num = 5;
 +  VMRegPair args[args_num];
 +  BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT };
-+  SharedRuntime::java_calling_convention(signature, args, args_num, true);
++  SharedRuntime::java_calling_convention(signature, args, args_num);
 +
 +  // push parameters
 +  Register r[args_num];
-+  int i = 0;
-+  r[i++] = src()->as_register();
-+  r[i++] = src_pos()->as_register();
-+  r[i++] = dst()->as_register();
-+  r[i++] = dst_pos()->as_register();
-+  r[i++] = length()->as_register();
++  r[0] = src()->as_register();
++  r[1] = src_pos()->as_register();
++  r[2] = dst()->as_register();
++  r[3] = dst_pos()->as_register();
++  r[4] = length()->as_register();
 +
 +  // next registers will get stored on the stack
 +  for (int j = 0; j < args_num; j++) {
@@ -3879,7 +4670,7 @@ index 000000000..522eedd29
 +      int st_off = r_1->reg2stack() * wordSize;
 +      __ sd(r[j], Address(sp, st_off));
 +    } else {
-+      assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg ");
++      assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg");
 +    }
 +  }
 +
@@ -3899,8 +4690,10 @@ index 000000000..522eedd29
 +  ce->add_call_info_here(info());
 +
 +#ifndef PRODUCT
-+  __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt));
-+  __ incrementw(Address(t1));
++  if (PrintC1Statistics) {
++    __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt));
++    __ add_memory_int32(Address(t1), 1);
++  }
 +#endif
 +
 +  __ j(_continuation);
@@ -3909,13 +4702,12 @@ index 000000000..522eedd29
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
 new file mode 100644
-index 000000000..a0f411352
+index 00000000000..4417ad63091
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
-@@ -0,0 +1,85 @@
+@@ -0,0 +1,84 @@
 +/*
-+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -3973,7 +4765,7 @@ index 000000000..a0f411352
 +
 +  pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan
 +  pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of float registers visible to linear scan
-+  pd_nof_xmm_regs_linearscan = 0, // like sparc we don't have any of these
++  pd_nof_xmm_regs_linearscan = 0, // don't have vector registers
 +
 +  pd_first_cpu_reg  = 0,
 +  pd_last_cpu_reg   = pd_nof_cpu_regs_reg_alloc - 1,
@@ -4000,13 +4792,12 @@ index 000000000..a0f411352
 +#endif // CPU_RISCV_C1_DEFS_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
 new file mode 100644
-index 000000000..d4876625c
+index 00000000000..e3a2606c532
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
-@@ -0,0 +1,31 @@
+@@ -0,0 +1,30 @@
 +/*
 + * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -4037,13 +4828,12 @@ index 000000000..d4876625c
 +// No FPU stack on RISCV
 diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
 new file mode 100644
-index 000000000..4b43bc4d7
+index 00000000000..7bc3d311501
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
-@@ -0,0 +1,33 @@
+@@ -0,0 +1,32 @@
 +/*
-+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -4076,13 +4866,12 @@ index 000000000..4b43bc4d7
 +#endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
 new file mode 100644
-index 000000000..94b4e0f0b
+index 00000000000..172031941b2
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
-@@ -0,0 +1,391 @@
+@@ -0,0 +1,388 @@
 +/*
-+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -4112,8 +4901,7 @@ index 000000000..94b4e0f0b
 +#include "runtime/sharedRuntime.hpp"
 +#include "vmreg_riscv.inline.hpp"
 +
-+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool)
-+{
++LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
 +  LIR_Opr opr = LIR_OprFact::illegalOpr;
 +  VMReg r_1 = reg->first();
 +  VMReg r_2 = reg->second();
@@ -4129,7 +4917,7 @@ index 000000000..94b4e0f0b
 +      Register reg2 = r_2->as_Register();
 +      assert(reg2 == reg1, "must be same register");
 +      opr = as_long_opr(reg1);
-+    } else if (type == T_OBJECT || type == T_ARRAY) {
++    } else if (is_reference_type(type)) {
 +      opr = as_oop_opr(reg1);
 +    } else if (type == T_METADATA) {
 +      opr = as_metadata_opr(reg1);
@@ -4240,8 +5028,8 @@ index 000000000..94b4e0f0b
 +LIR_Opr FrameMap::fpu10_float_opr;
 +LIR_Opr FrameMap::fpu10_double_opr;
 +
-+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
-+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
++LIR_Opr FrameMap::_caller_save_cpu_regs[] = {};
++LIR_Opr FrameMap::_caller_save_fpu_regs[] = {};
 +
 +//--------------------------------------------------------
 +//               FrameMap
@@ -4398,7 +5186,7 @@ index 000000000..94b4e0f0b
 +
 +  VMRegPair regs;
 +  BasicType sig_bt = T_OBJECT;
-+  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1, true);
++  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1);
 +  receiver_opr = as_oop_opr(regs.first()->as_Register());
 +
 +  for (i = 0; i < nof_caller_save_fpu_regs; i++) {
@@ -4413,7 +5201,7 @@ index 000000000..94b4e0f0b
 +
 +
 +// ----------------mapping-----------------------
-+// all mapping is based on rfp addressing, except for simple leaf methods where we access
++// all mapping is based on fp addressing, except for simple leaf methods where we access
 +// the locals sp based (and no frame is built)
 +
 +
@@ -4430,7 +5218,7 @@ index 000000000..94b4e0f0b
 +//   | .........|  <- TOS
 +//   | locals   |
 +//   +----------+
-+//   |  old fp, |  
++//   |  old fp, |
 +//   +----------+
 +//   | ret addr |
 +//   +----------+
@@ -4458,8 +5246,7 @@ index 000000000..94b4e0f0b
 +  return as_FloatRegister(n)->as_VMReg();
 +}
 +
-+LIR_Opr FrameMap::stack_pointer()
-+{
++LIR_Opr FrameMap::stack_pointer() {
 +  return FrameMap::sp_opr;
 +}
 +
@@ -4473,13 +5260,12 @@ index 000000000..94b4e0f0b
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
 new file mode 100644
-index 000000000..f600c2f6f
+index 00000000000..01281f5c9e1
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
-@@ -0,0 +1,149 @@
+@@ -0,0 +1,148 @@
 +/*
-+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -4628,13 +5414,12 @@ index 000000000..f600c2f6f
 +#endif // CPU_RISCV_C1_FRAMEMAP_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
 new file mode 100644
-index 000000000..a846d60ae
+index 00000000000..4c1c13dc290
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-@@ -0,0 +1,287 @@
+@@ -0,0 +1,281 @@
 +/*
 + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -4671,16 +5456,15 @@ index 000000000..a846d60ae
 +
 +#define __ _masm->
 +
-+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) {
-+
++void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal,
++                                    LIR_Opr result, CodeEmitInfo* info) {
 +  // opcode check
 +  assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem");
 +  bool is_irem = (code == lir_irem);
-+
-+  // operand check
-+  assert(left->is_single_cpu(),   "left must be register");
-+  assert(right->is_single_cpu() || right->is_constant(),  "right must be register or constant");
-+  assert(result->is_single_cpu(), "result must be register");
++  // opreand check
++  assert(left->is_single_cpu(), "left must be a register");
++  assert(right->is_single_cpu() || right->is_constant(), "right must be a register or constant");
++  assert(result->is_single_cpu(), "result must be a register");
 +  Register lreg = left->as_register();
 +  Register dreg = result->as_register();
 +
@@ -4754,7 +5538,7 @@ index 000000000..a846d60ae
 +        case lir_sub: __ subw(dreg, lreg, c); break;
 +        default:      ShouldNotReachHere();
 +      }
-+      break;
++    break;
 +    case T_OBJECT:  // fall through
 +    case T_ADDRESS:
 +      switch (code) {
@@ -4762,7 +5546,7 @@ index 000000000..a846d60ae
 +        case lir_sub: __ sub(dreg, lreg, c); break;
 +        default:      ShouldNotReachHere();
 +      }
-+      break;
++    break;
 +    default:
 +      ShouldNotReachHere();
 +  }
@@ -4817,7 +5601,7 @@ index 000000000..a846d60ae
 +    jlong c = right->as_constant_ptr()->as_jlong();
 +    Register dreg = as_reg(dest);
 +    switch (code) {
-+      case lir_add:
++      case lir_add: // fall through
 +      case lir_sub:
 +        if (c == 0 && dreg == lreg_lo) {
 +          COMMENT("effective nop elided");
@@ -4831,7 +5615,7 @@ index 000000000..a846d60ae
 +          // move lreg_lo to dreg if divisor is 1
 +          __ mv(dreg, lreg_lo);
 +        } else {
-+          unsigned int shift = exact_log2(c);
++          unsigned int shift = exact_log2_long(c);
 +          // use t0 as intermediate result register
 +          __ srai(t0, lreg_lo, 0x3f);
 +          if (is_imm_in_range(c - 1, 12, 0)) {
@@ -4849,7 +5633,7 @@ index 000000000..a846d60ae
 +          // move 0 to dreg if divisor is 1
 +          __ mv(dreg, zr);
 +        } else {
-+          unsigned int shift = exact_log2(c);
++          unsigned int shift = exact_log2_long(c);
 +          __ srai(t0, lreg_lo, 0x3f);
 +          __ srli(t0, t0, BitsPerLong - shift);
 +          __ add(t1, lreg_lo, t0);
@@ -4874,9 +5658,7 @@ index 000000000..a846d60ae
 +  switch (code) {
 +    case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
 +    case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
-+    case lir_mul_strictfp:  // fall through
 +    case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
-+    case lir_div_strictfp:  // fall through
 +    case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
 +    default:
 +      ShouldNotReachHere();
@@ -4889,9 +5671,7 @@ index 000000000..a846d60ae
 +    switch (code) {
 +      case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
 +      case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
-+      case lir_mul_strictfp:  // fall through
 +      case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
-+      case lir_div_strictfp:  // fall through
 +      case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
 +      default:
 +        ShouldNotReachHere();
@@ -4921,13 +5701,12 @@ index 000000000..a846d60ae
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
 new file mode 100644
-index 000000000..93530ef58
+index 00000000000..ab0a9963fc1
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
-@@ -0,0 +1,36 @@
+@@ -0,0 +1,37 @@
 +/*
 + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -4950,6 +5729,7 @@ index 000000000..93530ef58
 + * questions.
 + *
 + */
++
 +#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
 +#define CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
 +
@@ -4960,17 +5740,17 @@ index 000000000..93530ef58
 +  void arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest);
 +  void arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right, Register lreg, Register dreg);
 +  void arithmetic_idiv(LIR_Op3* op, bool is_irem);
++
 +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
 new file mode 100644
-index 000000000..31f8d6a4a
+index 00000000000..b7f53e395f3
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
-@@ -0,0 +1,387 @@
+@@ -0,0 +1,388 @@
 +/*
 + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -4999,6 +5779,7 @@ index 000000000..31f8d6a4a
 +#include "c1/c1_MacroAssembler.hpp"
 +#include "ci/ciArrayKlass.hpp"
 +#include "oops/objArrayKlass.hpp"
++#include "runtime/stubRoutines.hpp"
 +
 +#define __ _masm->
 +
@@ -5026,7 +5807,7 @@ index 000000000..31f8d6a4a
 +  __ mv(c_rarg4, j_rarg4);
 +#ifndef PRODUCT
 +  if (PrintC1Statistics) {
-+    __ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
++    __ add_memory_int32(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt), 1);
 +  }
 +#endif
 +  __ far_call(RuntimeAddress(copyfunc_addr));
@@ -5064,14 +5845,14 @@ index 000000000..31f8d6a4a
 +    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) {
 +      __ load_klass(tmp, dst);
 +      __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset())));
-+      __ mv(t1, Klass::_lh_neutral_value);
++      __ li(t1, Klass::_lh_neutral_value);
 +      __ bge(t0, t1, *stub->entry(), /* is_far */ true);
 +    }
 +
 +    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) {
 +      __ load_klass(tmp, src);
 +      __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset())));
-+      __ mv(t1, Klass::_lh_neutral_value);
++      __ li(t1, Klass::_lh_neutral_value);
 +      __ bge(t0, t1, *stub->entry(), /* is_far */ true);
 +    }
 +  }
@@ -5133,7 +5914,7 @@ index 000000000..31f8d6a4a
 +  if (PrintC1Statistics) {
 +    Label failed;
 +    __ bnez(x10, failed);
-+    __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt));
++    __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt), 1);
 +    __ bind(failed);
 +  }
 +#endif
@@ -5142,7 +5923,7 @@ index 000000000..31f8d6a4a
 +
 +#ifndef PRODUCT
 +  if (PrintC1Statistics) {
-+    __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt));
++    __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt), 1);
 +  }
 +#endif
 +  assert_different_registers(dst, dst_pos, length, src_pos, src, x10, t0);
@@ -5214,6 +5995,7 @@ index 000000000..31f8d6a4a
 +void LIR_Assembler::arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags) {
 +  assert(default_type != NULL, "NULL default_type!");
 +  BasicType basic_type = default_type->element_type()->basic_type();
++
 +  if (basic_type == T_ARRAY) { basic_type = T_OBJECT; }
 +  if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
 +    // Sanity check the known type with the incoming class.  For the
@@ -5269,7 +6051,7 @@ index 000000000..31f8d6a4a
 +  CodeStub* stub = op->stub();
 +  int flags = op->flags();
 +  BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
-+  if (basic_type == T_ARRAY) { basic_type = T_OBJECT; }
++  if (is_reference_type(basic_type)) { basic_type = T_OBJECT; }
 +
 +  // if we don't know anything, just go through the generic arraycopy
 +  if (default_type == NULL) {
@@ -5292,7 +6074,7 @@ index 000000000..31f8d6a4a
 +
 +#ifndef PRODUCT
 +  if (PrintC1Statistics) {
-+    __ incrementw(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)));
++    __ add_memory_int32(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)), 1);
 +  }
 +#endif
 +  arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type);
@@ -5356,13 +6138,12 @@ index 000000000..31f8d6a4a
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
 new file mode 100644
-index 000000000..872fd2ef6
+index 00000000000..06a0f248ca6
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
-@@ -0,0 +1,51 @@
+@@ -0,0 +1,52 @@
 +/*
 + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -5388,6 +6169,7 @@ index 000000000..872fd2ef6
 +
 +#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
 +#define CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
++
 +  // arraycopy sub functions
 +  void generic_arraycopy(Register src, Register src_pos, Register length,
 +                         Register dst, Register dst_pos, CodeStub *stub);
@@ -5410,17 +6192,18 @@ index 000000000..872fd2ef6
 +                            Register dst, Register dst_pos);
 +  void arraycopy_load_args(Register src, Register src_pos, Register length,
 +                           Register dst, Register dst_pos);
++
 +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
 new file mode 100644
-index 000000000..222e3e97e
+index 00000000000..742c2126e60
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -0,0 +1,2275 @@
+@@ -0,0 +1,2267 @@
 +/*
 + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -5455,14 +6238,12 @@ index 000000000..222e3e97e
 +#include "ci/ciArrayKlass.hpp"
 +#include "ci/ciInstance.hpp"
 +#include "code/compiledIC.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/cardTableBarrierSet.hpp"
 +#include "gc/shared/collectedHeap.hpp"
 +#include "nativeInst_riscv.hpp"
 +#include "oops/objArrayKlass.hpp"
 +#include "runtime/frame.inline.hpp"
 +#include "runtime/sharedRuntime.hpp"
-+#include "utilities/macros.hpp"
++#include "utilities/powerOfTwo.hpp"
 +#include "vmreg_riscv.inline.hpp"
 +
 +#ifndef PRODUCT
@@ -5512,6 +6293,17 @@ index 000000000..222e3e97e
 +
 +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
 +
++void LIR_Assembler::clinit_barrier(ciMethod* method) {
++  assert(VM_Version::supports_fast_class_init_checks(), "sanity");
++  assert(!method->holder()->is_not_initialized(), "initialization should have been started");
++
++  Label L_skip_barrier;
++
++  __ mov_metadata(t1, method->holder()->constant_encoding());
++  __ clinit_barrier(t1, t0, &L_skip_barrier /* L_fast_path */);
++  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
++  __ bind(L_skip_barrier);
++}
 +
 +LIR_Opr LIR_Assembler::receiverOpr() {
 +  return FrameMap::receiver_opr;
@@ -5521,25 +6313,11 @@ index 000000000..222e3e97e
 +  return FrameMap::as_pointer_opr(receiverOpr()->as_register());
 +}
 +
-+//--------------fpu register translations-----------------------
-+void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
-+
-+void LIR_Assembler::reset_FPU() { Unimplemented(); }
-+
-+void LIR_Assembler::fpop() { Unimplemented(); }
-+
-+void LIR_Assembler::fxch(int i) { Unimplemented(); }
-+
-+void LIR_Assembler::fld(int i) { Unimplemented(); }
-+
-+void LIR_Assembler::ffree(int i) { Unimplemented(); }
-+
 +void LIR_Assembler::breakpoint() { Unimplemented(); }
 +
 +void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); }
 +
 +void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); }
-+//-------------------------------------------
 +
 +static jlong as_long(LIR_Opr data) {
 +  jlong result;
@@ -5557,6 +6335,43 @@ index 000000000..222e3e97e
 +  return result;
 +}
 +
++Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
++  if (addr->base()->is_illegal()) {
++    assert(addr->index()->is_illegal(), "must be illegal too");
++    __ movptr(tmp, addr->disp());
++    return Address(tmp, 0);
++  }
++
++  Register base = addr->base()->as_pointer_register();
++  LIR_Opr index_opr = addr->index();
++
++  if (index_opr->is_illegal()) {
++    return Address(base, addr->disp());
++  }
++
++  int scale = addr->scale();
++  if (index_opr->is_cpu_register()) {
++    Register index;
++    if (index_opr->is_single_cpu()) {
++      index = index_opr->as_register();
++    } else {
++      index = index_opr->as_register_lo();
++    }
++    if (scale != 0) {
++      __ shadd(tmp, index, base, tmp, scale);
++    } else {
++      __ add(tmp, base, index);
++    }
++    return Address(tmp, addr->disp());
++  } else if (index_opr->is_constant()) {
++    intptr_t addr_offset = (((intptr_t)index_opr->as_constant_ptr()->as_jint()) << scale) + addr->disp();
++    return Address(base, addr_offset);
++  }
++
++  Unimplemented();
++  return Address();
++}
++
 +Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
 +  ShouldNotReachHere();
 +  return Address();
@@ -5572,7 +6387,7 @@ index 000000000..222e3e97e
 +
 +// Ensure a valid Address (base + offset) to a stack-slot. If stack access is
 +// not encodable as a base + (immediate) offset, generate an explicit address
-+// calculation to hold the address in a temporary register.
++// calculation to hold the address in t0.
 +Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) {
 +  precond(size == 4 || size == 8);
 +  Address addr = frame_map()->address_for_slot(index, adjust);
@@ -5690,10 +6505,7 @@ index 000000000..222e3e97e
 +int LIR_Assembler::initial_frame_size_in_bytes() const {
 +  // if rounding, must let FrameMap know!
 +
-+  // The frame_map records size in slots (32bit word)
-+
-+  // subtract two words to account for return address and link
-+  return (frame_map()->framesize() - (2 * VMRegImpl::slots_per_word)) * VMRegImpl::stack_slot_size;
++  return in_bytes(frame_map()->framesize_in_bytes());
 +}
 +
 +int LIR_Assembler::emit_exception_handler() {
@@ -5757,7 +6569,11 @@ index 000000000..222e3e97e
 +  if (method()->is_synchronized()) {
 +    monitor_address(0, FrameMap::r10_opr);
 +    stub = new MonitorExitStub(FrameMap::r10_opr, true, 0);
-+    __ unlock_object(x15, x14, x10, *stub->entry());
++    if (UseHeavyMonitors) {
++      __ j(*stub->entry());
++    } else {
++      __ unlock_object(x15, x14, x10, *stub->entry());
++    }
 +    __ bind(*stub->continuation());
 +  }
 +
@@ -5810,7 +6626,7 @@ index 000000000..222e3e97e
 +  return offset;
 +}
 +
-+void LIR_Assembler::return_op(LIR_Opr result) {
++void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
 +  assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10");
 +
 +  // Pop the stack before the safepoint code
@@ -5820,20 +6636,18 @@ index 000000000..222e3e97e
 +    __ reserved_stack_check();
 +  }
 +
-+  address polling_page(os::get_polling_page());
-+  __ read_polling_page(t0, polling_page, relocInfo::poll_return_type);
++  code_stub->set_safepoint_offset(__ offset());
++  __ relocate(relocInfo::poll_return_type);
++  __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */);
 +  __ ret();
 +}
 +
 +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
-+  address polling_page(os::get_polling_page());
 +  guarantee(info != NULL, "Shouldn't be NULL");
-+  assert(os::is_poll_address(polling_page), "should be");
-+  int32_t offset = 0;
-+  __ get_polling_page(t0, polling_page, offset, relocInfo::poll_type);
++  __ get_polling_page(t0, relocInfo::poll_type);
 +  add_debug_info_for_branch(info);  // This isn't just debug info:
 +                                    // it's the oop map
-+  __ read_polling_page(t0, offset, relocInfo::poll_type);
++  __ read_polling_page(t0, 0, relocInfo::poll_type);
 +  return __ offset();
 +}
 +
@@ -6007,7 +6821,7 @@ index 000000000..222e3e97e
 +    }
 +    move_regs(src->as_register(), dest->as_register());
 +  } else if (dest->is_double_cpu()) {
-+    if (src->type() == T_OBJECT || src->type() == T_ARRAY) {
++    if (is_reference_type(src->type())) {
 +      __ verify_oop(src->as_register());
 +      move_regs(src->as_register(), dest->as_register_lo());
 +      return;
@@ -6064,8 +6878,7 @@ index 000000000..222e3e97e
 +  }
 +}
 +
-+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info,
-+                            bool pop_fpu_stack, bool wide, bool /* unaligned */) {
++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) {
 +  LIR_Address* to_addr = dest->as_address_ptr();
 +  // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src
 +  Register compressed_src = t1;
@@ -6075,7 +6888,7 @@ index 000000000..222e3e97e
 +    return;
 +  }
 +
-+  if (type == T_ARRAY || type == T_OBJECT) {
++  if (is_reference_type(type)) {
 +    __ verify_oop(src->as_register());
 +
 +    if (UseCompressedOops && !wide) {
@@ -6187,8 +7000,7 @@ index 000000000..222e3e97e
 +  reg2stack(temp, dest, dest->type(), false);
 +}
 +
-+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info,
-+                            bool wide, bool /* unaligned */) {
++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) {
 +  assert(src->is_address(), "should not call otherwise");
 +  assert(dest->is_register(), "should not call otherwise");
 +
@@ -6233,11 +7045,7 @@ index 000000000..222e3e97e
 +      __ ld(dest->as_register(), as_Address(from_addr));
 +      break;
 +    case T_ADDRESS:
-+      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
-+        __ lwu(dest->as_register(), as_Address(from_addr));
-+      } else {
-+        __ ld(dest->as_register(), as_Address(from_addr));
-+      }
++      __ ld(dest->as_register(), as_Address(from_addr));
 +      break;
 +    case T_INT:
 +      __ lw(dest->as_register(), as_Address(from_addr));
@@ -6261,21 +7069,21 @@ index 000000000..222e3e97e
 +      ShouldNotReachHere();
 +  }
 +
-+  if (type == T_ARRAY || type == T_OBJECT) {
++  if (is_reference_type(type)) {
 +    if (UseCompressedOops && !wide) {
 +      __ decode_heap_oop(dest->as_register());
 +    }
-+    __ verify_oop(dest->as_register());
-+  } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
-+    if (UseCompressedClassPointers) {
-+      __ decode_klass_not_null(dest->as_register());
++
++    if (!UseZGC) {
++      // Load barrier has not yet been applied, so ZGC can't verify the oop here
++      __ verify_oop(dest->as_register());
 +    }
 +  }
 +}
 +
 +void LIR_Assembler::emit_op3(LIR_Op3* op) {
 +  switch (op->code()) {
-+    case lir_idiv:
++    case lir_idiv: // fall through
 +    case lir_irem:
 +      arithmetic_idiv(op->code(),
 +                      op->in_opr1(),
@@ -6311,13 +7119,11 @@ index 000000000..222e3e97e
 +  Label done;
 +  move_op(opr2, result, type, lir_patch_none, NULL,
 +          false,   // pop_fpu_stack
-+          false,   // unaligned
 +          false);  // wide
 +  __ j(done);
 +  __ bind(label);
 +  move_op(opr1, result, type, lir_patch_none, NULL,
 +          false,   // pop_fpu_stack
-+          false,   // unaligned
 +          false);  // wide
 +  __ bind(done);
 +}
@@ -6431,8 +7237,8 @@ index 000000000..222e3e97e
 +  Register len = op->len()->as_register();
 +
 +  if (UseSlowPath ||
-+      (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
-+      (!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
++      (!UseFastNewObjectArray && is_reference_type(op->type())) ||
++      (!UseFastNewTypeArray   && !is_reference_type(op->type()))) {
 +    __ j(*op->stub()->entry());
 +  } else {
 +    Register tmp1 = op->tmp1()->as_register();
@@ -6467,7 +7273,7 @@ index 000000000..222e3e97e
 +    __ ld(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
 +    __ bne(recv, t1, next_test);
 +    Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
-+    __ increment(data_addr, DataLayout::counter_increment);
++    __ add_memory_int64(data_addr, DataLayout::counter_increment);
 +    __ j(*update_done);
 +    __ bind(next_test);
 +  }
@@ -6479,7 +7285,7 @@ index 000000000..222e3e97e
 +    __ ld(t1, recv_addr);
 +    __ bnez(t1, next_test);
 +    __ sd(recv, recv_addr);
-+    __ mv(t1, DataLayout::counter_increment);
++    __ li(t1, DataLayout::counter_increment);
 +    __ sd(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))));
 +    __ j(*update_done);
 +    __ bind(next_test);
@@ -6505,7 +7311,7 @@ index 000000000..222e3e97e
 +  __ load_klass(klass_RInfo, obj);
 +  if (k->is_loaded()) {
 +    // See if we get an immediate positive hit
-+    __ ld(t0, Address(klass_RInfo, long(k->super_check_offset())));
++    __ ld(t0, Address(klass_RInfo, int64_t(k->super_check_offset())));
 +    if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
 +      __ bne(k_RInfo, t0, *failure_target, /* is_far */ true);
 +      // successful cast, fall through to profile or jump
@@ -6550,10 +7356,7 @@ index 000000000..222e3e97e
 +  // Object is null, update MDO and exit
 +  Register mdo = klass_RInfo;
 +  __ mov_metadata(mdo, md->constant_encoding());
-+  Address data_addr = __ form_address(mdo,   /* base */
-+                                      md->byte_offset_of_slot(data, DataLayout::flags_offset()), /* offset */
-+                                      12,    /* expect offset bits */
-+                                      t1);   /* temp reg */
++  Address data_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
 +  __ lbu(t0, data_addr);
 +  __ ori(t0, t0, BitData::null_seen_byte_constant());
 +  __ sb(t0, data_addr);
@@ -6667,7 +7470,7 @@ index 000000000..222e3e97e
 +    assert(op->addr()->is_address(), "what else?");
 +    LIR_Address* addr_ptr = op->addr()->as_address_ptr();
 +    assert(addr_ptr->disp() == 0, "need 0 disp");
-+    assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index");
++    assert(addr_ptr->index() == LIR_Opr::illegalOpr(), "need 0 index");
 +    addr = as_reg(addr_ptr->base());
 +  }
 +  Register newval = as_reg(op->new_value());
@@ -6758,7 +7561,12 @@ index 000000000..222e3e97e
 +  }
 +}
 +
-+void LIR_Assembler::align_call(LIR_Code code) {  }
++void LIR_Assembler::align_call(LIR_Code code) {
++  // With RVC a call instruction may get 2-byte aligned.
++  // The address of the call instruction needs to be 4-byte aligned to
++  // ensure that it does not span a cache line so that it can be patched.
++  __ align(4);
++}
 +
 +void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
 +  address call = __ trampoline_call(Address(op->addr(), rtype));
@@ -6778,10 +7586,9 @@ index 000000000..222e3e97e
 +  add_call_info(code_offset(), op->info());
 +}
 +
-+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ShouldNotReachHere(); }
-+
 +void LIR_Assembler::emit_static_call_stub() {
 +  address call_pc = __ pc();
++  assert((__ offset() % 4) == 0, "bad alignment");
 +  address stub = __ start_a_stub(call_stub_size());
 +  if (stub == NULL) {
 +    bailout("static call stub overflow");
@@ -6793,7 +7600,8 @@ index 000000000..222e3e97e
 +  __ relocate(static_stub_Relocation::spec(call_pc));
 +  __ emit_static_call_stub();
 +
-+  assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(), "stub too big");
++  assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size()
++         <= call_stub_size(), "stub too big");
 +  __ end_a_stub();
 +}
 +
@@ -6838,7 +7646,6 @@ index 000000000..222e3e97e
 +  __ j(_unwind_handler_entry);
 +}
 +
-+
 +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
 +  Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
 +  Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
@@ -6866,7 +7673,6 @@ index 000000000..222e3e97e
 +  }
 +}
 +
-+
 +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
 +  Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
 +  Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
@@ -6901,22 +7707,16 @@ index 000000000..222e3e97e
 +  }
 +}
 +
-+
-+
 +void LIR_Assembler::emit_lock(LIR_OpLock* op) {
 +  Register obj = op->obj_opr()->as_register();  // may not be an oop
 +  Register hdr = op->hdr_opr()->as_register();
 +  Register lock = op->lock_opr()->as_register();
-+  if (!UseFastLocking) {
++  if (UseHeavyMonitors) {
 +    __ j(*op->stub()->entry());
 +  } else if (op->code() == lir_lock) {
-+    Register scratch = noreg;
-+    if (UseBiasedLocking) {
-+      scratch = op->scratch_opr()->as_register();
-+    }
 +    assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
 +    // add debug info for NullPointerException only if one is possible
-+    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
++    int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry());
 +    if (op->info() != NULL) {
 +      add_debug_info_for_null_check(null_check_offset, op->info());
 +    }
@@ -6929,6 +7729,23 @@ index 000000000..222e3e97e
 +  __ bind(*op->stub()->continuation());
 +}
 +
++void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
++  Register obj = op->obj()->as_pointer_register();
++  Register result = op->result_opr()->as_pointer_register();
++
++  CodeEmitInfo* info = op->info();
++  if (info != NULL) {
++    add_debug_info_for_null_check_here(info);
++  }
++
++  if (UseCompressedClassPointers) {
++    __ lwu(result, Address(obj, oopDesc::klass_offset_in_bytes()));
++    __ decode_klass_not_null(result);
++  } else {
++    __ ld(result, Address(obj, oopDesc::klass_offset_in_bytes()));
++  }
++}
++
 +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
 +  ciMethod* method = op->profiled_method();
 +  int bci          = op->profiled_bci();
@@ -6962,7 +7779,7 @@ index 000000000..222e3e97e
 +        ciKlass* receiver = vc_data->receiver(i);
 +        if (known_klass->equals(receiver)) {
 +          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
-+          __ increment(data_addr, DataLayout::counter_increment);
++          __ add_memory_int64(data_addr, DataLayout::counter_increment);
 +          return;
 +        }
 +      }
@@ -6978,7 +7795,7 @@ index 000000000..222e3e97e
 +          __ mov_metadata(t1, known_klass->constant_encoding());
 +          __ sd(t1, recv_addr);
 +          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
-+          __ increment(data_addr, DataLayout::counter_increment);
++          __ add_memory_int64(data_addr, DataLayout::counter_increment);
 +          return;
 +        }
 +      }
@@ -6988,13 +7805,13 @@ index 000000000..222e3e97e
 +      type_profile_helper(mdo, md, data, recv, &update_done);
 +      // Receiver did not match any saved receiver and there is no empty row for it.
 +      // Increment total counter to indicate polymorphic case.
-+      __ increment(counter_addr, DataLayout::counter_increment);
++      __ add_memory_int64(counter_addr, DataLayout::counter_increment);
 +
 +      __ bind(update_done);
 +    }
 +  } else {
 +    // Static call
-+    __ increment(counter_addr, DataLayout::counter_increment);
++    __ add_memory_int64(counter_addr, DataLayout::counter_increment);
 +  }
 +}
 +
@@ -7029,7 +7846,7 @@ index 000000000..222e3e97e
 +
 +    if (TypeEntries::is_type_none(current_klass)) {
 +      __ beqz(t1, none);
-+      __ mv(t0, (u1)TypeEntries::null_seen);
++      __ li(t0, (u1)TypeEntries::null_seen);
 +      __ beq(t0, t1, none);
 +      // There is a chance that the checks above (re-reading profiling
 +      // data from memory) fail if another thread has just set the
@@ -7079,7 +7896,7 @@ index 000000000..222e3e97e
 +    Label ok;
 +    __ ld(t0, mdo_addr);
 +    __ beqz(t0, ok);
-+    __ mv(t1, (u1)TypeEntries::null_seen);
++    __ li(t1, (u1)TypeEntries::null_seen);
 +    __ beq(t0, t1, ok);
 +    // may have been set by another thread
 +    __ membar(MacroAssembler::LoadLoad);
@@ -7199,32 +8016,30 @@ index 000000000..222e3e97e
 +
 +
 +void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
-+#if INCLUDE_SHENANDOAHGC
-+  if (UseShenandoahGC && patch_code != lir_patch_none) {
++  if (patch_code != lir_patch_none) {
 +    deoptimize_trap(info);
 +    return;
 +  }
-+#endif
-+  assert(patch_code == lir_patch_none, "Patch code not supported");
++
 +  LIR_Address* adr = addr->as_address_ptr();
 +  Register dst = dest->as_register_lo();
 +
 +  assert_different_registers(dst, t0);
-+  if(adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) {
-+
++  if (adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) {
++    int scale = adr->scale();
 +    intptr_t offset = adr->disp();
 +    LIR_Opr index_op = adr->index();
-+    int scale = adr->scale();
-+    if(index_op->is_constant()) {
++    if (index_op->is_constant()) {
 +      offset += ((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale;
 +    }
 +
-+    if(!is_imm_in_range(offset, 12, 0)) {
++    if (!is_imm_in_range(offset, 12, 0)) {
 +      __ la(t0, as_Address(adr));
 +      __ mv(dst, t0);
 +      return;
 +    }
 +  }
++
 +  __ la(dst, as_Address(adr));
 +}
 +
@@ -7248,8 +8063,7 @@ index 000000000..222e3e97e
 +
 +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
 +  if (dest->is_address() || src->is_address()) {
-+    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false,
-+           /* unaligned */ false, /* wide */ false);
++    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /* wide */ false);
 +  } else {
 +    ShouldNotReachHere();
 +  }
@@ -7326,7 +8140,7 @@ index 000000000..222e3e97e
 +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) {
 +  Address addr = as_Address(src->as_address_ptr());
 +  BasicType type = src->type();
-+  bool is_oop = type == T_OBJECT || type == T_ARRAY;
++  bool is_oop = is_reference_type(type);
 +
 +  get_op(type);
 +
@@ -7376,41 +8190,6 @@ index 000000000..222e3e97e
 +  return exact_log2(elem_size);
 +}
 +
-+Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
-+  if (addr->base()->is_illegal()) {
-+    assert(addr->index()->is_illegal(), "must be illegal too");
-+    __ movptr(tmp, addr->disp());
-+    return Address(tmp, 0);
-+  }
-+
-+  Register base = addr->base()->as_pointer_register();
-+  LIR_Opr index_op = addr->index();
-+  int scale = addr->scale();
-+
-+  if (index_op->is_illegal()) {
-+    return Address(base, addr->disp());
-+  } else if (index_op->is_cpu_register()) {
-+    Register index;
-+    if (index_op->is_single_cpu()) {
-+      index = index_op->as_register();
-+    } else {
-+      index = index_op->as_register_lo();
-+    }
-+    if (scale != 0) {
-+      __ shadd(tmp, index, base, tmp, scale);
-+    } else {
-+      __ add(tmp, base, index);
-+    }
-+    return Address(tmp, addr->disp());
-+  } else if (index_op->is_constant()) {
-+    intptr_t addr_offset = (((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale) + addr->disp();
-+    return Address(base, addr_offset);
-+  }
-+
-+  Unimplemented();
-+  return Address();
-+}
-+
 +// helper functions which checks for overflow and sets bailout if it
 +// occurs.  Always returns a valid embeddable pointer but in the
 +// bailout case the pointer won't be to unique storage.
@@ -7444,16 +8223,6 @@ index 000000000..222e3e97e
 +  }
 +}
 +
-+void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
-+  _masm->code_section()->relocate(adr, relocInfo::poll_type);
-+  int pc_offset = code_offset();
-+  flush_debug_info(pc_offset);
-+  info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
-+  if (info->exception_handlers() != NULL) {
-+    compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
-+  }
-+}
-+
 +void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
 +  __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */,
 +             Assembler::rl /* release */, t0, true /* result as bool */);
@@ -7498,7 +8267,6 @@ index 000000000..222e3e97e
 +  add_call_info_here(info);
 +}
 +
-+
 +void LIR_Assembler::check_exact_klass(Register tmp, ciKlass* exact_klass) {
 +  Label ok;
 +  __ load_klass(tmp, tmp);
@@ -7588,6 +8356,16 @@ index 000000000..222e3e97e
 +  __ bind(done);
 +}
 +
++void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
++  _masm->code_section()->relocate(adr, relocInfo::poll_type);
++  int pc_offset = code_offset();
++  flush_debug_info(pc_offset);
++  info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
++  if (info->exception_handlers() != NULL) {
++    compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
++  }
++}
++
 +void LIR_Assembler::type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo,
 +                                 ciProfileData* data, Label* success, Label* failure,
 +                                 Label& profile_cast_success, Label& profile_cast_failure) {
@@ -7602,10 +8380,7 @@ index 000000000..222e3e97e
 +
 +  __ bind(profile_cast_failure);
 +  __ mov_metadata(mdo, md->constant_encoding());
-+  Address counter_addr = __ form_address(mdo,   /* base */
-+                                         md->byte_offset_of_slot(data, CounterData::count_offset()), /* offset */
-+                                         12,    /* expect offset bits */
-+                                         t1);  /* temp reg */
++  Address counter_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
 +  __ ld(t0, counter_addr);
 +  __ addi(t0, t0, -DataLayout::counter_increment);
 +  __ sd(t0, counter_addr);
@@ -7687,21 +8462,21 @@ index 000000000..222e3e97e
 +  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
 +  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
 +  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
-+  __ mv(t0, c);
++  __ li(t0, c);
 +  __ sd(t0, Address(sp, offset_from_rsp_in_bytes));
 +}
 +
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
 new file mode 100644
-index 000000000..11a47fd6e
+index 00000000000..051328c3a8a
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
 @@ -0,0 +1,132 @@
 +/*
-+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -7743,9 +8518,6 @@ index 000000000..11a47fd6e
 +
 +  Address as_Address(LIR_Address* addr, Register tmp);
 +
-+  // Ensure we have a valid Address (base+offset) to a stack-slot.
-+  Address stack_slot_address(int index, uint shift, int adjust = 0);
-+
 +  // helper functions which checks for overflow and sets bailout if it
 +  // occurs.  Always returns a valid embeddable pointer but in the
 +  // bailout case the pointer won't be to unique storage.
@@ -7753,6 +8525,9 @@ index 000000000..11a47fd6e
 +  address double_constant(double d);
 +  address int_constant(jlong n);
 +
++  // Ensure we have a valid Address (base + offset) to a stack-slot.
++  Address stack_slot_address(int index, uint shift, int adjust = 0);
++
 +  // Record the type of the receiver in ReceiverTypeData
 +  void type_profile_helper(Register mdo,
 +                           ciMethodData *md, ciProfileData *data,
@@ -7768,17 +8543,15 @@ index 000000000..11a47fd6e
 +
 +  void deoptimize_trap(CodeEmitInfo *info);
 +
-+  enum
-+  {
-+    // see emit_static_call_stub for detail:
++  enum {
++    // See emit_static_call_stub for detail
 +    // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address)
 +    _call_stub_size = 14 * NativeInstruction::instruction_size +
 +                      (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size),
-+    _call_aot_stub_size = 0,
-+    // see emit_exception_handler for detail:
++    // See emit_exception_handler for detail
 +    // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY)
 +    _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller
-+    // see emit_deopt_handler for detail
++    // See emit_deopt_handler for detail
 +    // auipc (1) + far_jump (6 or 2)
 +    _deopt_handler_size = 1 * NativeInstruction::instruction_size +
 +                          6 * NativeInstruction::instruction_size // or smaller
@@ -7789,10 +8562,12 @@ index 000000000..11a47fd6e
 +  void check_no_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, Address mdo_addr, Label &next);
 +
 +  void check_exact_klass(Register tmp, ciKlass* exact_klass);
++
 +  void check_null(Register tmp, Label &update, intptr_t current_klass, Address mdo_addr, bool do_update, Label &next);
 +
 +  void (MacroAssembler::*add)(Register prev, RegisterOrConstant incr, Register addr);
 +  void (MacroAssembler::*xchg)(Register prev, Register newv, Register addr);
++
 +  void get_op(BasicType type);
 +
 +  // emit_typecheck_helper sub functions
@@ -7832,12 +8607,12 @@ index 000000000..11a47fd6e
 +#endif // CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
 new file mode 100644
-index 000000000..8ba9ed66d
+index 00000000000..e126f148cdf
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -0,0 +1,1083 @@
+@@ -0,0 +1,1075 @@
 +/*
-+ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -7876,6 +8651,7 @@ index 000000000..8ba9ed66d
 +#include "ci/ciTypeArrayKlass.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
++#include "utilities/powerOfTwo.hpp"
 +#include "vmreg_riscv.inline.hpp"
 +
 +#ifdef ASSERT
@@ -7980,7 +8756,6 @@ index 000000000..8ba9ed66d
 +  return false;
 +}
 +
-+
 +bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const {
 +  if (c->as_constant() != NULL) {
 +    long constant = 0;
@@ -7996,7 +8771,6 @@ index 000000000..8ba9ed66d
 +  return false;
 +}
 +
-+
 +LIR_Opr LIRGenerator::safepoint_poll_register() {
 +  return LIR_OprFact::illegalOpr;
 +}
@@ -8004,7 +8778,7 @@ index 000000000..8ba9ed66d
 +LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
 +                                            int shift, int disp, BasicType type) {
 +  assert(base->is_register(), "must be");
-+  
++
 +  if (index->is_constant()) {
 +    LIR_Const *constant = index->as_constant_ptr();
 +    jlong c;
@@ -8031,17 +8805,22 @@ index 000000000..8ba9ed66d
 +  int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
 +  int elem_size = type2aelembytes(type);
 +  int shift = exact_log2(elem_size);
-+  
 +  return generate_address(array_opr, index_opr, shift, offset_in_bytes, type);
 +}
 +
 +LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
++  LIR_Opr r;
 +  switch (type) {
-+    case T_LONG: return LIR_OprFact::longConst(x);
-+    case T_INT:  return LIR_OprFact::intConst(x);
-+    default:     ShouldNotReachHere();
++    case T_LONG:
++      r = LIR_OprFact::longConst(x);
++      break;
++    case T_INT:
++      r = LIR_OprFact::intConst(x);
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
-+  return NULL;
++  return r;
 +}
 +
 +void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
@@ -8111,11 +8890,6 @@ index 000000000..8ba9ed66d
 +
 +  // "lock" stores the address of the monitor stack slot, so this is not an oop
 +  LIR_Opr lock = new_register(T_INT);
-+  // Need a tmp register for biased locking
-+  LIR_Opr tmp = LIR_OprFact::illegalOpr;
-+  if (UseBiasedLocking) {
-+    tmp = new_register(T_INT);
-+  }
 +
 +  CodeEmitInfo* info_for_exception = NULL;
 +  if (x->needs_null_check()) {
@@ -8124,7 +8898,7 @@ index 000000000..8ba9ed66d
 +  // this CodeEmitInfo must not have the xhandlers because here the
 +  // object is already locked (xhandlers expect object to be unlocked)
 +  CodeEmitInfo* info = state_for(x, x->state(), true);
-+  monitor_enter(obj.result(), lock, syncTempOpr(), tmp,
++  monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr,
 +                x->monitor_no(), info_for_exception, info);
 +}
 +
@@ -8194,12 +8968,7 @@ index 000000000..8ba9ed66d
 +  right.load_item();
 +
 +  LIR_Opr reg = rlock(x);
-+  LIR_Opr tmp = LIR_OprFact::illegalOpr;
-+  if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) {
-+    tmp = new_register(T_DOUBLE);
-+  }
-+
-+  arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp());
++  arithmetic_op_fpu(x->op(), reg, left.result(), right.result());
 +
 +  set_result(x, round_item(reg));
 +}
@@ -8208,7 +8977,7 @@ index 000000000..8ba9ed66d
 +void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
 +
 +  // missing test if instr is commutative and if we should swap
-+  LIRItem left(x->x(),  this);
++  LIRItem left(x->x(), this);
 +  LIRItem right(x->y(), this);
 +
 +  if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
@@ -8232,7 +9001,7 @@ index 000000000..8ba9ed66d
 +    if (need_zero_check) {
 +      CodeEmitInfo* info = state_for(x);
 +      __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
-+      __ branch(lir_cond_equal, right.result()->type(), new DivByZeroStub(info));
++      __ branch(lir_cond_equal, new DivByZeroStub(info));
 +    }
 +
 +    rlock_result(x);
@@ -8306,16 +9075,16 @@ index 000000000..8ba9ed66d
 +    if (need_zero_check) {
 +      CodeEmitInfo* info = state_for(x);
 +      __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
-+      __ branch(lir_cond_equal, right.result()->type(), new DivByZeroStub(info));
++      __ branch(lir_cond_equal, new DivByZeroStub(info));
 +    }
 +
 +    LIR_Opr ill = LIR_OprFact::illegalOpr;
-+
 +    if (x->op() == Bytecodes::_irem) {
 +      __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
 +    } else if (x->op() == Bytecodes::_idiv) {
 +      __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
 +    }
++
 +  } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) {
 +    if (right.is_constant() &&
 +        ((x->op() == Bytecodes::_iadd && !Assembler::operand_valid_for_add_immediate(right.get_jint_constant())) ||
@@ -8389,7 +9158,7 @@ index 000000000..8ba9ed66d
 +  left.load_item();
 +  rlock_result(x);
 +  ValueTag tag = right.type()->tag();
-+  if(right.is_constant() &&
++  if (right.is_constant() &&
 +     ((tag == longTag && Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) ||
 +      (tag == intTag && Assembler::operand_valid_for_add_immediate(right.get_jint_constant()))))  {
 +    right.dont_load_item();
@@ -8438,7 +9207,7 @@ index 000000000..8ba9ed66d
 +  new_value.load_item();
 +  cmp_value.load_item();
 +  LIR_Opr result = new_register(T_INT);
-+  if (type == T_OBJECT || type == T_ARRAY) {
++  if (is_reference_type(type)) {
 +    __ cas_obj(addr, cmp_value.result(), new_value.result(), new_register(T_INT), new_register(T_INT), result);
 +  } else if (type == T_INT) {
 +    __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill);
@@ -8452,7 +9221,7 @@ index 000000000..8ba9ed66d
 +}
 +
 +LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) {
-+  bool is_oop = type == T_OBJECT || type == T_ARRAY;
++  bool is_oop = is_reference_type(type);
 +  LIR_Opr result = new_register(type);
 +  value.load_item();
 +  assert(type == T_INT || is_oop LP64_ONLY( || type == T_LONG ), "unexpected type");
@@ -8485,14 +9254,16 @@ index 000000000..8ba9ed66d
 +      do_LibmIntrinsic(x);
 +      break;
 +    case vmIntrinsics::_dabs: // fall through
-+    case vmIntrinsics::_dsqrt: {
++    case vmIntrinsics::_dsqrt: // fall through
++    case vmIntrinsics::_dsqrt_strict: {
 +      assert(x->number_of_arguments() == 1, "wrong type");
 +      LIRItem value(x->argument_at(0), this);
 +      value.load_item();
 +      LIR_Opr dst = rlock_result(x);
 +
 +      switch (x->id()) {
-+        case vmIntrinsics::_dsqrt: {
++        case vmIntrinsics::_dsqrt: // fall through
++        case vmIntrinsics::_dsqrt_strict: {
 +          __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
 +          break;
 +        }
@@ -8892,9 +9663,9 @@ index 000000000..8ba9ed66d
 +  profile_branch(x, cond);
 +  move_to_phi(x->state());
 +  if (x->x()->type()->is_float_kind()) {
-+    __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
++    __ branch(lir_cond(cond), x->tsux(), x->usux());
 +  } else {
-+    __ branch(lir_cond(cond), right->type(), x->tsux());
++    __ branch(lir_cond(cond), x->tsux());
 +  }
 +  assert(x->default_sux() == x->fsux(), "wrong destination above");
 +  __ jump(x->default_sux());
@@ -8913,20 +9684,16 @@ index 000000000..8ba9ed66d
 +
 +void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
 +                                       CodeEmitInfo* info) {
-+  if (!UseBarriersForVolatile) {
-+    __ membar();
-+  }
-+
 +  __ volatile_load_mem_reg(address, result, info);
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
 new file mode 100644
-index 000000000..00e33e882
+index 00000000000..5f1c394ab3d
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
 @@ -0,0 +1,55 @@
 +/*
-+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -8954,40 +9721,40 @@ index 000000000..00e33e882
 +#include "asm/register.hpp"
 +#include "c1/c1_LIR.hpp"
 +
-+FloatRegister LIR_OprDesc::as_float_reg() const {
++FloatRegister LIR_Opr::as_float_reg() const {
 +  return as_FloatRegister(fpu_regnr());
 +}
 +
-+FloatRegister LIR_OprDesc::as_double_reg() const {
++FloatRegister LIR_Opr::as_double_reg() const {
 +  return as_FloatRegister(fpu_regnrLo());
 +}
 +
 +// Reg2 unused.
 +LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
 +  assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform");
-+  return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
-+                             (reg1 << LIR_OprDesc::reg2_shift) |
-+                             LIR_OprDesc::double_type          |
-+                             LIR_OprDesc::fpu_register         |
-+                             LIR_OprDesc::double_size);
++  return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) |
++                             (reg1 << LIR_Opr::reg2_shift) |
++                             LIR_Opr::double_type          |
++                             LIR_Opr::fpu_register         |
++                             LIR_Opr::double_size);
 +}
 +
 +#ifndef PRODUCT
 +void LIR_Address::verify() const {
 +  assert(base()->is_cpu_register(), "wrong base operand");
 +  assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand");
-+  assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA,
-+         "wrong type for addresses");
++  assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || base()->type() == T_LONG ||
++         base()->type() == T_METADATA, "wrong type for addresses");
 +}
 +#endif // PRODUCT
 diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
 new file mode 100644
-index 000000000..60dcdc0e1
+index 00000000000..78a61128bdd
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
 @@ -0,0 +1,33 @@
 +/*
-+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -9021,14 +9788,14 @@ index 000000000..60dcdc0e1
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
 new file mode 100644
-index 000000000..f0aa08a39
+index 00000000000..d7ca7b0fd05
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
-@@ -0,0 +1,85 @@
+@@ -0,0 +1,83 @@
 +/*
-+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -9063,7 +9830,6 @@ index 000000000..f0aa08a39
 +  return 1;
 +}
 +
-+
 +inline bool LinearScan::requires_adjacent_regs(BasicType type) {
 +  return false;
 +}
@@ -9085,8 +9851,8 @@ index 000000000..f0aa08a39
 +  return false;
 +}
 +
-+
 +inline void LinearScan::pd_add_temps(LIR_Op* op) {
++  // No special case behaviours yet
 +}
 +
 +
@@ -9099,8 +9865,8 @@ index 000000000..f0aa08a39
 +    _first_reg = pd_first_callee_saved_reg;
 +    _last_reg = pd_last_callee_saved_reg;
 +    return true;
-+  } else if (cur->type() == T_INT || cur->type() == T_LONG ||
-+             cur->type() == T_OBJECT || cur->type() == T_ADDRESS || cur->type() == T_METADATA) {
++  } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT ||
++             cur->type() == T_ADDRESS || cur->type() == T_METADATA) {
 +    _first_reg = pd_first_cpu_reg;
 +    _last_reg = pd_last_allocatable_cpu_reg;
 +    return true;
@@ -9108,18 +9874,17 @@ index 000000000..f0aa08a39
 +  return false;
 +}
 +
-+
 +#endif // CPU_RISCV_C1_LINEARSCAN_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
 new file mode 100644
-index 000000000..370ec45c6
+index 00000000000..6f656c8c533
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -0,0 +1,441 @@
+@@ -0,0 +1,432 @@
 +/*
-+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -9143,15 +9908,16 @@ index 000000000..370ec45c6
 + */
 +
 +#include "precompiled.hpp"
++#include "c1/c1_LIR.hpp"
 +#include "c1/c1_MacroAssembler.hpp"
 +#include "c1/c1_Runtime1.hpp"
 +#include "classfile/systemDictionary.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
 +#include "gc/shared/collectedHeap.hpp"
 +#include "interpreter/interpreter.hpp"
 +#include "oops/arrayOop.hpp"
-+#include "oops/markOop.hpp"
++#include "oops/markWord.hpp"
 +#include "runtime/basicLock.hpp"
-+#include "runtime/biasedLocking.hpp"
 +#include "runtime/os.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
@@ -9167,7 +9933,7 @@ index 000000000..370ec45c6
 +  }
 +}
 +
-+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case) {
++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
 +  const int aligned_mask = BytesPerWord - 1;
 +  const int hdr_offset = oopDesc::mark_offset_in_bytes();
 +  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
@@ -9179,17 +9945,19 @@ index 000000000..370ec45c6
 +  // save object being locked into the BasicObjectLock
 +  sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
 +
-+  if (UseBiasedLocking) {
-+    assert(tmp != noreg, "should have tmp register at this point");
-+    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, tmp, false, done, &slow_case);
-+  } else {
-+    null_check_offset = offset();
++  null_check_offset = offset();
++
++  if (DiagnoseSyncOnValueBasedClasses != 0) {
++    load_klass(hdr, obj);
++    lwu(hdr, Address(hdr, Klass::access_flags_offset()));
++    andi(t0, hdr, JVM_ACC_IS_VALUE_BASED_CLASS);
++    bnez(t0, slow_case, true /* is_far */);
 +  }
 +
 +  // Load object header
 +  ld(hdr, Address(obj, hdr_offset));
 +  // and mark it as unlocked
-+  ori(hdr, hdr, markOopDesc::unlocked_value);
++  ori(hdr, hdr, markWord::unlocked_value);
 +  // save unlocked object header into the displaced header location on the stack
 +  sd(hdr, Address(disp_hdr, 0));
 +  // test if object header is still the same (i.e. unlocked), and if so, store the
@@ -9212,7 +9980,7 @@ index 000000000..370ec45c6
 +  // assuming both the stack pointer and page_size have their least
 +  // significant 2 bits cleared and page_size is a power of 2
 +  sub(hdr, hdr, sp);
-+  mv(t0, aligned_mask - os::vm_page_size());
++  li(t0, aligned_mask - os::vm_page_size());
 +  andr(hdr, hdr, t0);
 +  // for recursive locking, the result is zero => save it in the displaced header
 +  // location (NULL in the displaced hdr location indicates recursive locking)
@@ -9220,10 +9988,6 @@ index 000000000..370ec45c6
 +  // otherwise we don't care about the result and handle locking via runtime call
 +  bnez(hdr, slow_case, /* is_far */ true);
 +  bind(done);
-+  if (PrintBiasedLockingStatistics) {
-+    la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
-+    incrementw(Address(t1, 0));
-+  }
 +  return null_check_offset;
 +}
 +
@@ -9233,21 +9997,13 @@ index 000000000..370ec45c6
 +  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
 +  Label done;
 +
-+  if (UseBiasedLocking) {
-+    // load object
-+    ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
-+    biased_locking_exit(obj, hdr, done);
-+  }
-+
 +  // load displaced header
 +  ld(hdr, Address(disp_hdr, 0));
 +  // if the loaded hdr is NULL we had recursive locking
 +  // if we had recursive locking, we are done
 +  beqz(hdr, done);
-+  if (!UseBiasedLocking) {
-+    // load object
-+    ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
-+  }
++  // load object
++  ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
 +  verify_oop(obj);
 +  // test if object header is pointing to the displaced header, and if so, restore
 +  // the displaced header in the object - if the object header is not pointing to
@@ -9274,13 +10030,8 @@ index 000000000..370ec45c6
 +
 +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) {
 +  assert_different_registers(obj, klass, len);
-+  if (UseBiasedLocking && !len->is_valid()) {
-+    assert_different_registers(obj, klass, len, tmp1, tmp2);
-+    ld(tmp1, Address(klass, Klass::prototype_header_offset()));
-+  } else {
-+    // This assumes that all prototype bits fitr in an int32_t
-+    mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype());
-+  }
++  // This assumes that all prototype bits fitr in an int32_t
++  mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value());
 +  sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes()));
 +
 +  if (UseCompressedClassPointers) { // Take care not to kill klass
@@ -9298,7 +10049,7 @@ index 000000000..370ec45c6
 +}
 +
 +// preserves obj, destroys len_in_bytes
-+void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1) {
++void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp) {
 +  assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
 +  Label done;
 +
@@ -9310,7 +10061,7 @@ index 000000000..370ec45c6
 +  if (hdr_size_in_bytes) {
 +    add(obj, obj, hdr_size_in_bytes);
 +  }
-+  zero_memory(obj, len_in_bytes, tmp1);
++  zero_memory(obj, len_in_bytes, tmp);
 +  if (hdr_size_in_bytes) {
 +    sub(obj, obj, hdr_size_in_bytes);
 +  }
@@ -9434,24 +10185,29 @@ index 000000000..370ec45c6
 +}
 +
 +void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
-+  // If we have to make this method not-entrant we'll overwrite its
-+  // first instruction with a jump.  For this action to be legal we
-+  // must ensure that this first instruction is a J, JAL or NOP.
-+  // Make it a NOP.
-+  nop();
 +  assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
 +  // Make sure there is enough stack space for this method's activation.
-+  // Note that we do this before doing an enter().
++  // Note that we do this before creating a frame.
 +  generate_stack_overflow_check(bang_size_in_bytes);
-+  MacroAssembler::build_frame(framesize + 2 * wordSize); // 2: multipler for wordSize
++  MacroAssembler::build_frame(framesize);
++
++  // Insert nmethod entry barrier into frame.
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->nmethod_entry_barrier(this);
 +}
 +
 +void C1_MacroAssembler::remove_frame(int framesize) {
-+  MacroAssembler::remove_frame(framesize + 2 * wordSize); // 2: multiper for wordSize
++  MacroAssembler::remove_frame(framesize);
 +}
 +
 +
-+void C1_MacroAssembler::verified_entry() {
++void C1_MacroAssembler::verified_entry(bool breakAtEntry) {
++  // If we have to make this method not-entrant we'll overwrite its
++  // first instruction with a jump. For this action to be legal we
++  // must ensure that this first instruction is a J, JAL or NOP.
++  // Make it a NOP.
++
++  nop();
 +}
 +
 +void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
@@ -9539,9 +10295,9 @@ index 000000000..370ec45c6
 +  if (type == T_OBJECT || type == T_ARRAY) {
 +    assert(cmpFlag == lir_cond_equal || cmpFlag == lir_cond_notEqual, "Should be equal or notEqual");
 +    if (cmpFlag == lir_cond_equal) {
-+      oop_equal(op1, op2, label, is_far);
++      beq(op1, op2, label, is_far);
 +    } else {
-+      oop_nequal(op1, op2, label, is_far);
++      bne(op1, op2, label, is_far);
 +    }
 +  } else {
 +    assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(c1_cond_branch) / sizeof(c1_cond_branch[0])),
@@ -9559,14 +10315,14 @@ index 000000000..370ec45c6
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
 new file mode 100644
-index 000000000..5d0cefe89
+index 00000000000..dfd3c17d7c7
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
-@@ -0,0 +1,121 @@
+@@ -0,0 +1,120 @@
 +/*
-+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -9614,7 +10370,7 @@ index 000000000..5d0cefe89
 +  );
 +
 +  void initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2);
-+  void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1);
++  void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp);
 +
 +  void float_cmp(bool is_float, int unordered_result,
 +                 FloatRegister f0, FloatRegister f1,
@@ -9624,9 +10380,8 @@ index 000000000..5d0cefe89
 +  // hdr     : must be x10, contents destroyed
 +  // obj     : must point to the object to lock, contents preserved
 +  // disp_hdr: must point to the displaced header location, contents preserved
-+  // tmp     : temporary register, contents destroyed
 +  // returns code offset at which to add null check debug information
-+  int lock_object  (Register swap, Register obj, Register disp_hdr, Register tmp, Label& slow_case);
++  int lock_object  (Register swap, Register obj, Register disp_hdr, Label& slow_case);
 +
 +  // unlocking
 +  // hdr     : contents destroyed
@@ -9686,14 +10441,14 @@ index 000000000..5d0cefe89
 +#endif // CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
 new file mode 100644
-index 000000000..f06e7b51c
+index 00000000000..f523c9ed50a
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-@@ -0,0 +1,1206 @@
+@@ -0,0 +1,1172 @@
 +/*
 + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -9723,9 +10478,11 @@ index 000000000..f06e7b51c
 +#include "c1/c1_MacroAssembler.hpp"
 +#include "c1/c1_Runtime1.hpp"
 +#include "compiler/disassembler.hpp"
++#include "compiler/oopMap.hpp"
 +#include "gc/shared/cardTable.hpp"
 +#include "gc/shared/cardTableBarrierSet.hpp"
 +#include "interpreter/interpreter.hpp"
++#include "memory/universe.hpp"
 +#include "nativeInst_riscv.hpp"
 +#include "oops/compiledICHolder.hpp"
 +#include "oops/oop.inline.hpp"
@@ -9733,18 +10490,20 @@ index 000000000..f06e7b51c
 +#include "register_riscv.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/signature.hpp"
++#include "runtime/stubRoutines.hpp"
 +#include "runtime/vframe.hpp"
 +#include "runtime/vframeArray.hpp"
++#include "utilities/powerOfTwo.hpp"
 +#include "vmreg_riscv.inline.hpp"
 +
 +
 +// Implementation of StubAssembler
 +
-+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) {
++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, int args_size) {
 +  // setup registers
-+  assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result,
++  assert(!(oop_result->is_valid() || metadata_result->is_valid()) || oop_result != metadata_result,
 +         "registers must be different");
-+  assert(oop_result1 != xthread && metadata_result != xthread, "registers must be different");
++  assert(oop_result != xthread && metadata_result != xthread, "registers must be different");
 +  assert(args_size >= 0, "illegal args_size");
 +  bool align_stack = false;
 +
@@ -9780,7 +10539,7 @@ index 000000000..f06e7b51c
 +    beqz(t0, L);
 +    // exception pending => remove activation and forward to exception handler
 +    // make sure that the vm_results are cleared
-+    if (oop_result1->is_valid()) {
++    if (oop_result->is_valid()) {
 +      sd(zr, Address(xthread, JavaThread::vm_result_offset()));
 +    }
 +    if (metadata_result->is_valid()) {
@@ -9797,8 +10556,8 @@ index 000000000..f06e7b51c
 +    bind(L);
 +  }
 +  // get oop results if there are any and reset the values in the thread
-+  if (oop_result1->is_valid()) {
-+    get_vm_result(oop_result1, xthread);
++  if (oop_result->is_valid()) {
++    get_vm_result(oop_result, xthread);
 +  }
 +  if (metadata_result->is_valid()) {
 +    get_vm_result_2(metadata_result, xthread);
@@ -9806,12 +10565,12 @@ index 000000000..f06e7b51c
 +  return call_offset;
 +}
 +
-+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) {
++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1) {
 +  mv(c_rarg1, arg1);
-+  return call_RT(oop_result1, metadata_result, entry, 1);
++  return call_RT(oop_result, metadata_result, entry, 1);
 +}
 +
-+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) {
++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2) {
 +  const int arg_num = 2;
 +  if (c_rarg1 == arg2) {
 +    if (c_rarg2 == arg1) {
@@ -9826,10 +10585,10 @@ index 000000000..f06e7b51c
 +    mv(c_rarg1, arg1);
 +    mv(c_rarg2, arg2);
 +  }
-+  return call_RT(oop_result1, metadata_result, entry, arg_num);
++  return call_RT(oop_result, metadata_result, entry, arg_num);
 +}
 +
-+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) {
++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) {
 +  const int arg_num = 3;
 +  // if there is any conflict use the stack
 +  if (arg1 == c_rarg2 || arg1 == c_rarg3 ||
@@ -9838,31 +10597,36 @@ index 000000000..f06e7b51c
 +    const int arg1_sp_offset = 0;
 +    const int arg2_sp_offset = 1;
 +    const int arg3_sp_offset = 2;
-+    addi(sp, sp, -(arg_num * wordSize));
-+    sd(arg3, Address(sp, arg3_sp_offset * wordSize));
-+    sd(arg2, Address(sp, arg2_sp_offset * wordSize));
++    addi(sp, sp, -(arg_num + 1) * wordSize);
 +    sd(arg1, Address(sp, arg1_sp_offset * wordSize));
++    sd(arg2, Address(sp, arg2_sp_offset * wordSize));
++    sd(arg3, Address(sp, arg3_sp_offset * wordSize));
 +
 +    ld(c_rarg1, Address(sp, arg1_sp_offset * wordSize));
 +    ld(c_rarg2, Address(sp, arg2_sp_offset * wordSize));
 +    ld(c_rarg3, Address(sp, arg3_sp_offset * wordSize));
-+    addi(sp, sp, arg_num * wordSize);
++    addi(sp, sp, (arg_num + 1) * wordSize);
 +  } else {
 +    mv(c_rarg1, arg1);
 +    mv(c_rarg2, arg2);
 +    mv(c_rarg3, arg3);
 +  }
-+  return call_RT(oop_result1, metadata_result, entry, arg_num);
++  return call_RT(oop_result, metadata_result, entry, arg_num);
 +}
 +
++enum return_state_t {
++  does_not_return, requires_return
++};
++
 +// Implementation of StubFrame
 +
 +class StubFrame: public StackObj {
 + private:
 +  StubAssembler* _sasm;
++  bool _return_state;
 +
 + public:
-+  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
++  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return);
 +  void load_argument(int offset_in_words, Register reg);
 +
 +  ~StubFrame();
@@ -9880,8 +10644,9 @@ index 000000000..f06e7b51c
 +
 +#define __ _sasm->
 +
-+StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state) {
 +  _sasm = sasm;
++  _return_state = return_state;
 +  __ prologue(name, must_gc_arguments);
 +}
 +
@@ -9893,7 +10658,11 @@ index 000000000..f06e7b51c
 +
 +
 +StubFrame::~StubFrame() {
-+  __ epilogue();
++  if (_return_state == requires_return) {
++    __ epilogue();
++  } else {
++    __ should_not_reach_here();
++  }
 +  _sasm = NULL;
 +}
 +
@@ -9919,7 +10688,7 @@ index 000000000..f06e7b51c
 +};
 +
 +// Save off registers which might be killed by calls into the runtime.
-+// Tries to smart of about FP registers.  In particular we separate
++// Tries to smart of about FPU registers.  In particular we separate
 +// saving and describing the FPU registers for deoptimization since we
 +// have to save the FPU registers twice if we describe them.  The
 +// deopt blob is the only thing which needs to describe FPU registers.
@@ -9936,11 +10705,12 @@ index 000000000..f06e7b51c
 +  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
 +  assert_cond(oop_map != NULL);
 +
-+  // cpu_regs, caller save registers only, see FrameMap::initialize
++  // caller save registers only, see FrameMap::initialize
 +  // in c1_FrameMap_riscv.cpp for detail.
-+  const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = {x7, x10, x11, x12,
-+                                                                                        x13, x14, x15, x16, x17,
-+                                                                                        x28, x29, x30, x31};
++  const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = {
++    x7, x10, x11, x12, x13, x14, x15, x16, x17, x28, x29, x30, x31
++  };
++
 +  for (int i = 0; i < FrameMap::max_nof_caller_save_cpu_regs; i++) {
 +    Register r = caller_save_cpu_regs[i];
 +    int sp_offset = cpu_reg_save_offsets[r->encoding()];
@@ -10055,7 +10825,6 @@ index 000000000..f06e7b51c
 +  assert_cond(oop_maps != NULL);
 +  oop_maps->add_gc_map(call_offset, oop_map);
 +
-+  __ should_not_reach_here();
 +  return oop_maps;
 +}
 +
@@ -10103,9 +10872,7 @@ index 000000000..f06e7b51c
 +      sasm->set_frame_size(frame_size);
 +      break;
 +    }
-+    default:
-+      __ should_not_reach_here();
-+      break;
++    default: ShouldNotReachHere();
 +  }
 +
 +  // verify that only x10 and x13 are valid at this time
@@ -10161,11 +10928,8 @@ index 000000000..f06e7b51c
 +      restore_live_registers(sasm, id != handle_exception_nofpu_id);
 +      break;
 +    case handle_exception_from_callee_id:
-+      // Pop the return address.
-+      __ leave();
-+      __ ret();  // jump to exception handler
 +      break;
-+    default:  ShouldNotReachHere();
++    default: ShouldNotReachHere();
 +  }
 +
 +  return oop_maps;
@@ -10268,80 +11032,37 @@ index 000000000..f06e7b51c
 +#endif
 +  __ reset_last_Java_frame(true);
 +
-+  // check for pending exceptions
-+  {
-+    Label L;
-+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+    __ beqz(t0, L);
-+    // exception pending => remove activation and forward to exception handler
-+
-+    { Label L1;
-+      __ bnez(x10, L1);                                  // have we deoptimized?
-+      __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
-+      __ bind(L1);
-+    }
-+
-+    // the deopt blob expects exceptions in the special fields of
-+    // JavaThread, so copy and clear pending exception.
-+
-+    // load and clear pending exception
-+    __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
-+    __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
-+
-+    // check that there is really a valid exception
-+    __ verify_not_null_oop(x10);
-+
-+    // load throwing pc: this is the return address of the stub
-+    __ ld(x13, Address(fp, wordSize));
-+
 +#ifdef ASSERT
-+    // check that fields in JavaThread for exception oop and issuing pc are empty
-+    Label oop_empty;
-+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+    __ beqz(t0, oop_empty);
-+    __ stop("exception oop must be empty");
-+    __ bind(oop_empty);
++  // Check that fields in JavaThread for exception oop and issuing pc are empty
++  Label oop_empty;
++  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++  __ beqz(t0, oop_empty);
++  __ stop("exception oop must be empty");
++  __ bind(oop_empty);
 +
-+    Label pc_empty;
-+    __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
-+    __ beqz(t0, pc_empty);
-+    __ stop("exception pc must be empty");
-+    __ bind(pc_empty);
++  Label pc_empty;
++  __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
++  __ beqz(t0, pc_empty);
++  __ stop("exception pc must be empty");
++  __ bind(pc_empty);
 +#endif
 +
-+    // store exception oop and throwing pc to JavaThread
-+    __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
-+    __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
-+
-+    restore_live_registers(sasm);
-+
-+    __ leave();
-+
-+    // Forward the exception directly to deopt blob. We can blow no
-+    // registers and must leave throwing pc on the stack.  A patch may
-+    // have values live in registers so the entry point with the
-+    // exception in tls.
-+    __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
-+
-+    __ bind(L);
-+  }
-+
-+  // Runtime will return true if the nmethod has been deoptimized during
-+  // the patching process. In that case we must do a deopt reexecute instead.
-+  Label cont;
++  // Runtime will return true if the nmethod has been deoptimized, this is the
++  // expected scenario and anything else is an error. Note that we maintain a
++  // check on the result purely as a defensive measure.
++  Label no_deopt;
++  __ beqz(x10, no_deopt);                                // Have we deoptimized?
 +
-+  __ beqz(x10, cont);                                 // have we deoptimized?
++  // Perform a re-execute. The proper return address is already on the stack,
++  // we just need to restore registers, pop all of our frames but the return
++  // address and jump to the deopt blob.
 +
-+  // Will reexecute. Proper return address is already on the stack we just restore
-+  // registers, pop all of our frame but the return address and jump to the deopt blob
 +  restore_live_registers(sasm);
 +  __ leave();
 +  __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
 +
-+  __ bind(cont);
-+  restore_live_registers(sasm);
-+  __ leave();
-+  __ ret();
++  __ bind(no_deopt);
++  __ stop("deopt not performed");
 +
 +  return oop_maps;
 +}
@@ -10367,13 +11088,13 @@ index 000000000..f06e7b51c
 +
 +    case throw_div0_exception_id:
 +      {
-+        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
++        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return);
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
 +      }
 +      break;
 +
 +    case throw_null_pointer_exception_id:
-+      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
++      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return);
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
 +      }
 +      break;
@@ -10652,14 +11373,14 @@ index 000000000..f06e7b51c
 +
 +    case throw_class_cast_exception_id:
 +      {
-+        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
++        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return);
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
 +      }
 +      break;
 +
 +    case throw_incompatible_class_change_error_id:
 +      {
-+        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
++        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return);
 +        oop_maps = generate_exception_throw(sasm,
 +                                            CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
 +      }
@@ -10693,7 +11414,7 @@ index 000000000..f06e7b51c
 +        __ check_klass_subtype_slow_path(x14, x10, x12, x15, NULL, &miss);
 +
 +        // fallthrough on success:
-+        __ mv(t0, 1);
++        __ li(t0, 1);
 +        __ sd(t0, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result
 +        __ pop_reg(RegSet::of(x10, x12, x14, x15), sp);
 +        __ ret();
@@ -10753,7 +11474,7 @@ index 000000000..f06e7b51c
 +
 +    case deoptimize_id:
 +      {
-+        StubFrame f(sasm, "deoptimize", dont_gc_arguments);
++        StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return);
 +        OopMap* oop_map = save_live_registers(sasm);
 +        assert_cond(oop_map != NULL);
 +        f.load_argument(0, c_rarg1);
@@ -10772,7 +11493,7 @@ index 000000000..f06e7b51c
 +
 +    case throw_range_check_failed_id:
 +      {
-+        StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
++        StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return);
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
 +      }
 +      break;
@@ -10788,7 +11509,7 @@ index 000000000..f06e7b51c
 +
 +    case access_field_patching_id:
 +      {
-+        StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
++        StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return);
 +        // we should set up register map
 +        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
 +      }
@@ -10796,7 +11517,7 @@ index 000000000..f06e7b51c
 +
 +    case load_klass_patching_id:
 +      {
-+        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
++        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return);
 +        // we should set up register map
 +        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
 +      }
@@ -10804,7 +11525,7 @@ index 000000000..f06e7b51c
 +
 +    case load_mirror_patching_id:
 +      {
-+        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments);
++        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return);
 +        // we should set up register map
 +        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
 +      }
@@ -10812,7 +11533,7 @@ index 000000000..f06e7b51c
 +
 +    case load_appendix_patching_id:
 +      {
-+        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
++        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return);
 +        // we should set up register map
 +        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
 +      }
@@ -10835,14 +11556,14 @@ index 000000000..f06e7b51c
 +
 +    case throw_index_exception_id:
 +      {
-+        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
++        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return);
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
 +      }
 +      break;
 +
 +    case throw_array_store_exception_id:
 +      {
-+        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
++        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return);
 +        // tos + 0: link
 +        //     + 1: return address
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
@@ -10851,7 +11572,7 @@ index 000000000..f06e7b51c
 +
 +    case predicate_failed_trap_id:
 +      {
-+        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments);
++        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return);
 +
 +        OopMap* map = save_live_registers(sasm);
 +        assert_cond(map != NULL);
@@ -10874,7 +11595,7 @@ index 000000000..f06e7b51c
 +        StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
 +        save_live_registers(sasm);
 +
-+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0);
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(oopDesc*)>(SharedRuntime::dtrace_object_alloc)), c_rarg0);
 +
 +        restore_live_registers(sasm);
 +      }
@@ -10882,8 +11603,8 @@ index 000000000..f06e7b51c
 +
 +    default:
 +      {
-+        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
-+        __ mv(x10, (int)id);
++        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return);
++        __ li(x10, (int) id);
 +        __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10);
 +        __ should_not_reach_here();
 +      }
@@ -10898,14 +11619,13 @@ index 000000000..f06e7b51c
 +const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; }
 diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
 new file mode 100644
-index 000000000..974c8fe76
+index 00000000000..fe46f7b21c8
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -0,0 +1,72 @@
+@@ -0,0 +1,65 @@
 +/*
-+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -10937,10 +11657,8 @@ index 000000000..974c8fe76
 +// Sets the default values for platform dependent flags used by the client compiler.
 +// (see c1_globals.hpp)
 +
-+#ifndef TIERED
++#ifndef COMPILER2
 +define_pd_global(bool, BackgroundCompilation,        true );
-+define_pd_global(bool, UseTLAB,                      true );
-+define_pd_global(bool, ResizeTLAB,                   true );
 +define_pd_global(bool, InlineIntrinsics,             true );
 +define_pd_global(bool, PreferInterpreterNativeStubs, false);
 +define_pd_global(bool, ProfileTraps,                 false);
@@ -10949,7 +11667,6 @@ index 000000000..974c8fe76
 +define_pd_global(intx, CompileThreshold,             1500 );
 +
 +define_pd_global(intx, OnStackReplacePercentage,     933  );
-+define_pd_global(intx, FreqInlineSize,               325  );
 +define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
 +define_pd_global(intx, InitialCodeCacheSize,         160*K);
 +define_pd_global(intx, ReservedCodeCacheSize,        32*M );
@@ -10960,126 +11677,25 @@ index 000000000..974c8fe76
 +define_pd_global(intx, CodeCacheExpansionSize,       32*K );
 +define_pd_global(uintx, CodeCacheMinBlockLength,     1);
 +define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
-+define_pd_global(uintx, MetaspaceSize,               12*M );
 +define_pd_global(bool, NeverActAsServerClassMachine, true );
-+define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
++define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
 +define_pd_global(bool, CICompileOSR,                 true );
-+#endif // !TIERED
++#endif // !COMPILER2
 +define_pd_global(bool, UseTypeProfile,               false);
-+define_pd_global(bool, RoundFPResults,               true );
 +
-+define_pd_global(bool, LIRFillDelaySlots,            false);
 +define_pd_global(bool, OptimizeSinglePrecision,      true );
 +define_pd_global(bool, CSEArrayLength,               false);
-+define_pd_global(bool, TwoOperandLIRForm,            false );
++define_pd_global(bool, TwoOperandLIRForm,            false);
 +
 +#endif // CPU_RISCV_C1_GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-new file mode 100644
-index 000000000..bf4efa629
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-@@ -0,0 +1,91 @@
-+/*
-+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP
-+#define CPU_RISCV_C2_GLOBALS_RISCV_HPP
-+
-+#include "utilities/globalDefinitions.hpp"
-+#include "utilities/macros.hpp"
-+
-+// Sets the default values for platform dependent flags used by the server compiler.
-+// (see c2_globals.hpp).  Alpha-sorted.
-+
-+define_pd_global(bool, BackgroundCompilation,        true);
-+define_pd_global(bool, UseTLAB,                      true);
-+define_pd_global(bool, ResizeTLAB,                   true);
-+define_pd_global(bool, CICompileOSR,                 true);
-+define_pd_global(bool, InlineIntrinsics,             true);
-+define_pd_global(bool, PreferInterpreterNativeStubs, false);
-+define_pd_global(bool, ProfileTraps,                 true);
-+define_pd_global(bool, UseOnStackReplacement,        true);
-+define_pd_global(bool, ProfileInterpreter,           true);
-+define_pd_global(bool, TieredCompilation,            trueInTiered);
-+define_pd_global(intx, CompileThreshold,             10000);
-+
-+define_pd_global(intx, OnStackReplacePercentage,     140);
-+define_pd_global(intx, ConditionalMoveLimit,         0);
-+define_pd_global(intx, FLOATPRESSURE,                64);
-+define_pd_global(intx, FreqInlineSize,               325);
-+define_pd_global(intx, MinJumpTableSize,             10);
-+define_pd_global(intx, INTPRESSURE,                  24);
-+define_pd_global(intx, InteriorEntryAlignment,       16);
-+define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
-+define_pd_global(intx, LoopUnrollLimit,              60);
-+define_pd_global(intx, LoopPercentProfileLimit,      10);
-+// InitialCodeCacheSize derived from specjbb2000 run.
-+define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
-+define_pd_global(intx, CodeCacheExpansionSize,       64*K);
-+
-+// Ergonomics related flags
-+define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
-+define_pd_global(intx, RegisterCostAreaRatio,        16000);
-+
-+// Peephole and CISC spilling both break the graph, and so makes the
-+// scheduler sick.
-+define_pd_global(bool, OptoPeephole,                 false);
-+define_pd_global(bool, UseCISCSpill,                 false);
-+define_pd_global(bool, OptoScheduling,               true);
-+define_pd_global(bool, OptoBundling,                 false);
-+define_pd_global(bool, OptoRegScheduling,            false);
-+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
-+define_pd_global(bool, IdealizeClearArrayNode,       true);
-+
-+define_pd_global(intx, ReservedCodeCacheSize,        48*M);
-+define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
-+define_pd_global(intx, ProfiledCodeHeapSize,         22*M);
-+define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
-+define_pd_global(uintx, CodeCacheMinBlockLength,     4);
-+define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
-+
-+// Heap related flags
-+define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
-+
-+// Ergonomics related flags
-+define_pd_global(bool, NeverActAsServerClassMachine, false);
-+
-+define_pd_global(bool,  TrapBasedRangeChecks,        false); // Not needed.
-+
-+#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
+diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
 new file mode 100644
-index 000000000..3cb4a4995
+index 00000000000..27770dc17aa
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
-@@ -0,0 +1,38 @@
++++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+@@ -0,0 +1,1646 @@
 +/*
-+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -11104,1270 +11720,1634 @@ index 000000000..3cb4a4995
 + */
 +
 +#include "precompiled.hpp"
-+#include "opto/compile.hpp"
-+#include "opto/node.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "opto/c2_MacroAssembler.hpp"
++#include "opto/intrinsicnode.hpp"
++#include "opto/subnode.hpp"
++#include "runtime/stubRoutines.hpp"
 +
-+// processor dependent initialization for riscv
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#define STOP(error) block_comment(error); stop(error)
++#endif
 +
-+extern void reg_mask_init();
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 +
-+void Compile::pd_compiler2_init() {
-+  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
-+  reg_mask_init();
-+}
-diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
-new file mode 100644
-index 000000000..881900892
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
-@@ -0,0 +1,36 @@
-+/*
-+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++// short string
++// StringUTF16.indexOfChar
++// StringLatin1.indexOfChar
++void C2_MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
++                                                  Register ch, Register result,
++                                                  bool isL)
++{
++  Register ch1 = t0;
++  Register index = t1;
 +
-+#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP
-+#define CPU_RISCV_CODEBUFFER_RISCV_HPP
++  BLOCK_COMMENT("string_indexof_char_short {");
 +
-+private:
-+  void pd_initialize() {}
++  Label LOOP, LOOP1, LOOP4, LOOP8;
++  Label MATCH,  MATCH1, MATCH2, MATCH3,
++        MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
 +
-+public:
-+  void flush_bundle(bool start_new_bundle) {}
++  mv(result, -1);
++  mv(index, zr);
 +
-+#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-new file mode 100644
-index 000000000..0354a93a0
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-@@ -0,0 +1,154 @@
-+/*
-+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  bind(LOOP);
++  addi(t0, index, 8);
++  ble(t0, cnt1, LOOP8);
++  addi(t0, index, 4);
++  ble(t0, cnt1, LOOP4);
++  j(LOOP1);
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "code/compiledIC.hpp"
-+#include "code/icBuffer.hpp"
-+#include "code/nmethod.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "runtime/mutexLocker.hpp"
-+#include "runtime/safepoint.hpp"
++  bind(LOOP8);
++  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
++  beq(ch, ch1, MATCH);
++  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
++  beq(ch, ch1, MATCH1);
++  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
++  beq(ch, ch1, MATCH2);
++  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
++  beq(ch, ch1, MATCH3);
++  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
++  beq(ch, ch1, MATCH4);
++  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
++  beq(ch, ch1, MATCH5);
++  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
++  beq(ch, ch1, MATCH6);
++  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
++  beq(ch, ch1, MATCH7);
++  addi(index, index, 8);
++  addi(str1, str1, isL ? 8 : 16);
++  blt(index, cnt1, LOOP);
++  j(NOMATCH);
 +
-+// ----------------------------------------------------------------------------
++  bind(LOOP4);
++  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
++  beq(ch, ch1, MATCH);
++  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
++  beq(ch, ch1, MATCH1);
++  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
++  beq(ch, ch1, MATCH2);
++  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
++  beq(ch, ch1, MATCH3);
++  addi(index, index, 4);
++  addi(str1, str1, isL ? 4 : 8);
++  bge(index, cnt1, NOMATCH);
 +
-+#define __ _masm.
-+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
-+  precond(cbuf.stubs()->start() != badAddress);
-+  precond(cbuf.stubs()->end() != badAddress);
-+  // Stub is fixed up when the corresponding call is converted from
-+  // calling compiled code to calling interpreted code.
-+  // mv xmethod, 0
-+  // jalr -4 # to self
++  bind(LOOP1);
++  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
++  beq(ch, ch1, MATCH);
++  addi(index, index, 1);
++  addi(str1, str1, isL ? 1 : 2);
++  blt(index, cnt1, LOOP1);
++  j(NOMATCH);
 +
-+  if (mark == NULL) {
-+    mark = cbuf.insts_mark();  // Get mark within main instrs section.
-+  }
++  bind(MATCH1);
++  addi(index, index, 1);
++  j(MATCH);
 +
-+  // Note that the code buffer's insts_mark is always relative to insts.
-+  // That's why we must use the macroassembler to generate a stub.
-+  MacroAssembler _masm(&cbuf);
++  bind(MATCH2);
++  addi(index, index, 2);
++  j(MATCH);
 +
-+  address base = __ start_a_stub(to_interp_stub_size());
-+  int offset = __ offset();
-+  if (base == NULL) {
-+    return NULL;  // CodeBuffer::expand failed
-+  }
-+  // static stub relocation stores the instruction address of the call
-+  __ relocate(static_stub_Relocation::spec(mark));
++  bind(MATCH3);
++  addi(index, index, 3);
++  j(MATCH);
 +
-+  __ emit_static_call_stub();
++  bind(MATCH4);
++  addi(index, index, 4);
++  j(MATCH);
 +
-+  assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big");
-+  __ end_a_stub();
-+  return base;
-+}
-+#undef __
++  bind(MATCH5);
++  addi(index, index, 5);
++  j(MATCH);
 +
-+int CompiledStaticCall::to_interp_stub_size() {
-+  // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr
-+  return 12 * NativeInstruction::instruction_size;
-+}
++  bind(MATCH6);
++  addi(index, index, 6);
++  j(MATCH);
 +
-+int CompiledStaticCall::to_trampoline_stub_size() {
-+  // Somewhat pessimistically, we count four instructions here (although
-+  // there are only three) because we sometimes emit an alignment nop.
-+  // Trampoline stubs are always word aligned.
-+  return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size;
-+}
++  bind(MATCH7);
++  addi(index, index, 7);
 +
-+// Relocation entries for call stub, compiled java to interpreter.
-+int CompiledStaticCall::reloc_to_interp_stub() {
-+  return 4; // 3 in emit_to_interp_stub + 1 in emit_call
++  bind(MATCH);
++  mv(result, index);
++  bind(NOMATCH);
++  BLOCK_COMMENT("} string_indexof_char_short");
 +}
 +
-+void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
-+  address stub = find_stub(false /* is_aot */);
-+  guarantee(stub != NULL, "stub not found");
-+
-+  if (TraceICs) {
-+    ResourceMark rm;
-+    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
-+                  p2i(instruction_address()),
-+                  callee->name_and_sig_as_C_string());
-+  }
-+
-+  // Creation also verifies the object.
-+  NativeMovConstReg* method_holder
-+    = nativeMovConstReg_at(stub);
-+#ifndef PRODUCT
-+  NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
-+
-+  // read the value once
-+  volatile intptr_t data = method_holder->data();
-+  assert(data == 0 || data == (intptr_t)callee(),
-+         "a) MT-unsafe modification of inline cache");
-+  assert(data == 0 || jump->jump_destination() == entry,
-+         "b) MT-unsafe modification of inline cache");
-+#endif
-+  // Update stub.
-+  method_holder->set_data((intptr_t)callee());
-+  NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry);
-+  ICache::invalidate_range(stub, to_interp_stub_size());
-+  // Update jump to call.
-+  set_destination_mt_safe(stub);
-+}
++// StringUTF16.indexOfChar
++// StringLatin1.indexOfChar
++void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
++                                            Register ch, Register result,
++                                            Register tmp1, Register tmp2,
++                                            Register tmp3, Register tmp4,
++                                            bool isL)
++{
++  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
++  Register ch1 = t0;
++  Register orig_cnt = t1;
++  Register mask1 = tmp3;
++  Register mask2 = tmp2;
++  Register match_mask = tmp1;
++  Register trailing_char = tmp4;
++  Register unaligned_elems = tmp4;
 +
-+void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
-+  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
-+  // Reset stub.
-+  address stub = static_stub->addr();
-+  assert(stub != NULL, "stub not found");
-+  // Creation also verifies the object.
-+  NativeMovConstReg* method_holder
-+    = nativeMovConstReg_at(stub);
-+  method_holder->set_data(0);
-+}
++  BLOCK_COMMENT("string_indexof_char {");
++  beqz(cnt1, NOMATCH);
 +
-+//-----------------------------------------------------------------------------
-+// Non-product mode code
-+#ifndef PRODUCT
++  addi(t0, cnt1, isL ? -32 : -16);
++  bgtz(t0, DO_LONG);
++  string_indexof_char_short(str1, cnt1, ch, result, isL);
++  j(DONE);
 +
-+void CompiledDirectStaticCall::verify() {
-+  // Verify call.
-+  _call->verify();
-+  if (os::is_MP()) {
-+    _call->verify_alignment();
++  bind(DO_LONG);
++  mv(orig_cnt, cnt1);
++  if (AvoidUnalignedAccesses) {
++    Label ALIGNED;
++    andi(unaligned_elems, str1, 0x7);
++    beqz(unaligned_elems, ALIGNED);
++    sub(unaligned_elems, unaligned_elems, 8);
++    neg(unaligned_elems, unaligned_elems);
++    if (!isL) {
++      srli(unaligned_elems, unaligned_elems, 1);
++    }
++    // do unaligned part per element
++    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
++    bgez(result, DONE);
++    mv(orig_cnt, cnt1);
++    sub(cnt1, cnt1, unaligned_elems);
++    bind(ALIGNED);
 +  }
 +
-+  // Verify stub.
-+  address stub = find_stub(false /* is_aot */);
-+  assert(stub != NULL, "no stub found for static call");
-+  // Creation also verifies the object.
-+  NativeMovConstReg* method_holder
-+    = nativeMovConstReg_at(stub);
-+  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
++  // duplicate ch
++  if (isL) {
++    slli(ch1, ch, 8);
++    orr(ch, ch1, ch);
++  }
++  slli(ch1, ch, 16);
++  orr(ch, ch1, ch);
++  slli(ch1, ch, 32);
++  orr(ch, ch1, ch);
 +
-+  // Verify state.
-+  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
-+}
++  if (!isL) {
++    slli(cnt1, cnt1, 1);
++  }
 +
-+#endif // !PRODUCT
-diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp
-new file mode 100644
-index 000000000..011e965ad
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/copy_riscv.hpp
-@@ -0,0 +1,60 @@
-+/*
-+ * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  uint64_t mask0101 = UCONST64(0x0101010101010101);
++  uint64_t mask0001 = UCONST64(0x0001000100010001);
++  mv(mask1, isL ? mask0101 : mask0001);
++  uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
++  uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
++  mv(mask2, isL ? mask7f7f : mask7fff);
 +
-+#ifndef CPU_RISCV_COPY_RISCV_HPP
-+#define CPU_RISCV_COPY_RISCV_HPP
++  bind(CH1_LOOP);
++  ld(ch1, Address(str1));
++  addi(str1, str1, 8);
++  addi(cnt1, cnt1, -8);
++  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
++  bnez(match_mask, HIT);
++  bgtz(cnt1, CH1_LOOP);
++  j(NOMATCH);
 +
-+// Inline functions for memory copy and fill.
++  bind(HIT);
++  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
++  srli(trailing_char, trailing_char, 3);
++  addi(cnt1, cnt1, 8);
++  ble(cnt1, trailing_char, NOMATCH);
++  // match case
++  if (!isL) {
++    srli(cnt1, cnt1, 1);
++    srli(trailing_char, trailing_char, 1);
++  }
 +
-+// Contains inline asm implementations
-+#include OS_CPU_HEADER_INLINE(copy)
++  sub(result, orig_cnt, cnt1);
++  add(result, result, trailing_char);
++  j(DONE);
 +
++  bind(NOMATCH);
++  mv(result, -1);
 +
-+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
-+  julong* to = (julong*) tohw;
-+  julong  v  = ((julong) value << 32) | value;
-+  while (count-- > 0) {
-+    *to++ = v;
-+  }
++  bind(DONE);
++  BLOCK_COMMENT("} string_indexof_char");
 +}
 +
-+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
-+  pd_fill_to_words(tohw, count, value);
-+}
++typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
 +
-+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
-+  (void)memset(to, value, count);
-+}
++// Search for needle in haystack and return index or -1
++// x10: result
++// x11: haystack
++// x12: haystack_len
++// x13: needle
++// x14: needle_len
++void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
++                                       Register haystack_len, Register needle_len,
++                                       Register tmp1, Register tmp2,
++                                       Register tmp3, Register tmp4,
++                                       Register tmp5, Register tmp6,
++                                       Register result, int ae)
++{
++  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
 +
-+static void pd_zero_to_words(HeapWord* tohw, size_t count) {
-+  pd_fill_to_words(tohw, count, 0);
-+}
++  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
 +
-+static void pd_zero_to_bytes(void* to, size_t count) {
-+  (void)memset(to, 0, count);
-+}
++  Register ch1 = t0;
++  Register ch2 = t1;
++  Register nlen_tmp = tmp1; // needle len tmp
++  Register hlen_tmp = tmp2; // haystack len tmp
++  Register result_tmp = tmp4;
 +
-+#endif // CPU_RISCV_COPY_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
-new file mode 100644
-index 000000000..31cee7103
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
-@@ -0,0 +1,32 @@
-+/*
-+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  bool isLL = ae == StrIntrinsicNode::LL;
 +
-+#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
-+#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
++  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
++  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
++  int needle_chr_shift = needle_isL ? 0 : 1;
++  int haystack_chr_shift = haystack_isL ? 0 : 1;
++  int needle_chr_size = needle_isL ? 1 : 2;
++  int haystack_chr_size = haystack_isL ? 1 : 2;
++  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                              (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                (load_chr_insn)&MacroAssembler::lhu;
 +
-+// Nothing to do on riscv
++  BLOCK_COMMENT("string_indexof {");
 +
-+#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
-new file mode 100644
-index 000000000..e97b89327
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
-@@ -0,0 +1,37 @@
-+/*
-+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // Note, inline_string_indexOf() generates checks:
++  // if (pattern.count > src.count) return -1;
++  // if (pattern.count == 0) return 0;
 +
-+#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_DISASSEMBLER_RISCV_HPP
++  // We have two strings, a source string in haystack, haystack_len and a pattern string
++  // in needle, needle_len. Find the first occurence of pattern in source or return -1.
 +
-+  static int pd_instruction_alignment() {
-+    return 1;
-+  }
++  // For larger pattern and source we use a simplified Boyer Moore algorithm.
++  // With a small pattern and source we use linear scan.
 +
-+  static const char* pd_cpu_opts() {
-+    return "";
-+  }
++  // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
++  sub(result_tmp, haystack_len, needle_len);
++  // needle_len < 8, use linear scan
++  sub(t0, needle_len, 8);
++  bltz(t0, LINEARSEARCH);
++  // needle_len >= 256, use linear scan
++  sub(t0, needle_len, 256);
++  bgez(t0, LINEARSTUB);
++  // needle_len >= haystack_len/4, use linear scan
++  srli(t0, haystack_len, 2);
++  bge(needle_len, t0, LINEARSTUB);
 +
-+#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-new file mode 100644
-index 000000000..be6f1a67f
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -0,0 +1,683 @@
-+/*
-+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // Boyer-Moore-Horspool introduction:
++  // The Boyer Moore alogorithm is based on the description here:-
++  //
++  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
++  //
++  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
++  // and the 'Good Suffix' rule.
++  //
++  // These rules are essentially heuristics for how far we can shift the
++  // pattern along the search string.
++  //
++  // The implementation here uses the 'Bad Character' rule only because of the
++  // complexity of initialisation for the 'Good Suffix' rule.
++  //
++  // This is also known as the Boyer-Moore-Horspool algorithm:
++  //
++  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
++  //
++  // #define ASIZE 256
++  //
++  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
++  //      int i, j;
++  //      unsigned c;
++  //      unsigned char bc[ASIZE];
++  //
++  //      /* Preprocessing */
++  //      for (i = 0; i < ASIZE; ++i)
++  //        bc[i] = m;
++  //      for (i = 0; i < m - 1; ) {
++  //        c = pattern[i];
++  //        ++i;
++  //        // c < 256 for Latin1 string, so, no need for branch
++  //        #ifdef PATTERN_STRING_IS_LATIN1
++  //        bc[c] = m - i;
++  //        #else
++  //        if (c < ASIZE) bc[c] = m - i;
++  //        #endif
++  //      }
++  //
++  //      /* Searching */
++  //      j = 0;
++  //      while (j <= n - m) {
++  //        c = src[i+j];
++  //        if (pattern[m-1] == c)
++  //          int k;
++  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
++  //          if (k < 0) return j;
++  //          // c < 256 for Latin1 string, so, no need for branch
++  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
++  //          // LL case: (c< 256) always true. Remove branch
++  //          j += bc[pattern[j+m-1]];
++  //          #endif
++  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
++  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
++  //          if (c < ASIZE)
++  //            j += bc[pattern[j+m-1]];
++  //          else
++  //            j += 1
++  //          #endif
++  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
++  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
++  //          if (c < ASIZE)
++  //            j += bc[pattern[j+m-1]];
++  //          else
++  //            j += m
++  //          #endif
++  //      }
++  //      return -1;
++  //    }
 +
-+#include "precompiled.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "oops/markOop.hpp"
-+#include "oops/method.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "prims/methodHandles.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/handles.inline.hpp"
-+#include "runtime/javaCalls.hpp"
-+#include "runtime/monitorChunk.hpp"
-+#include "runtime/os.hpp"
-+#include "runtime/signature.hpp"
-+#include "runtime/stubCodeGenerator.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "vmreg_riscv.inline.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_Runtime1.hpp"
-+#include "runtime/vframeArray.hpp"
-+#endif
++  // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
++  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
++        BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
 +
-+#ifdef ASSERT
-+void RegisterMap::check_location_valid() {
-+}
-+#endif
++  Register haystack_end = haystack_len;
++  Register skipch = tmp2;
 +
++  // pattern length is >=8, so, we can read at least 1 register for cases when
++  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
++  // UL case. We'll re-read last character in inner pre-loop code to have
++  // single outer pre-loop load
++  const int firstStep = isLL ? 7 : 3;
 +
-+// Profiling/safepoint support
++  const int ASIZE = 256;
++  const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
 +
-+bool frame::safe_for_sender(JavaThread *thread) {
-+  address   addr_sp = (address)_sp;
-+  address   addr_fp = (address)_fp;
-+  address   unextended_sp = (address)_unextended_sp;
++  sub(sp, sp, ASIZE);
 +
-+  // consider stack guards when trying to determine "safe" stack pointers
-+  static size_t stack_guard_size = os::uses_stack_guard_pages() ?
-+    (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0;
-+  assert_cond(thread != NULL);
-+  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
++  // init BC offset table with default value: needle_len
++  slli(t0, needle_len, 8);
++  orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
++  slli(tmp1, t0, 16);
++  orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
++  slli(tmp1, t0, 32);
++  orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
 +
-+  // sp must be within the usable part of the stack (not in guards)
-+  bool sp_safe = (addr_sp < thread->stack_base()) &&
-+                 (addr_sp >= thread->stack_base() - usable_stack_size);
++  mv(ch1, sp);  // ch1 is t0
++  mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
 +
-+  if (!sp_safe) {
-+    return false;
++  bind(BM_INIT_LOOP);
++  // for (i = 0; i < ASIZE; ++i)
++  //   bc[i] = m;
++  for (int i = 0; i < 4; i++) {
++    sd(tmp5, Address(ch1, i * wordSize));
 +  }
++  add(ch1, ch1, 32);
++  sub(tmp6, tmp6, 4);
++  bgtz(tmp6, BM_INIT_LOOP);
 +
-+  // When we are running interpreted code the machine stack pointer, SP, is
-+  // set low enough so that the Java expression stack can grow and shrink
-+  // without ever exceeding the machine stack bounds.  So, ESP >= SP.
-+
-+  // When we call out of an interpreted method, SP is incremented so that
-+  // the space between SP and ESP is removed.  The SP saved in the callee's
-+  // frame is the SP *before* this increment.  So, when we walk a stack of
-+  // interpreter frames the sender's SP saved in a frame might be less than
-+  // the SP at the point of call.
-+
-+  // So unextended sp must be within the stack but we need not to check
-+  // that unextended sp >= sp
-+
-+  bool unextended_sp_safe = (unextended_sp < thread->stack_base());
++  sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
++  Register orig_haystack = tmp5;
++  mv(orig_haystack, haystack);
++  // result_tmp = tmp4
++  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
++  sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
++  mv(tmp3, needle);
 +
-+  if (!unextended_sp_safe) {
-+    return false;
++  //  for (i = 0; i < m - 1; ) {
++  //    c = pattern[i];
++  //    ++i;
++  //    // c < 256 for Latin1 string, so, no need for branch
++  //    #ifdef PATTERN_STRING_IS_LATIN1
++  //    bc[c] = m - i;
++  //    #else
++  //    if (c < ASIZE) bc[c] = m - i;
++  //    #endif
++  //  }
++  bind(BCLOOP);
++  (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
++  add(tmp3, tmp3, needle_chr_size);
++  if (!needle_isL) {
++    // ae == StrIntrinsicNode::UU
++    mv(tmp6, ASIZE);
++    bgeu(ch1, tmp6, BCSKIP);
 +  }
++  add(tmp4, sp, ch1);
++  sb(ch2, Address(tmp4)); // store skip offset to BC offset table
 +
-+  // an fp must be within the stack and above (but not equal) sp
-+  // second evaluation on fp+ is added to handle situation where fp is -1
-+  bool fp_safe = (addr_fp < thread->stack_base() && (addr_fp > addr_sp) &&
-+                  (((addr_fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
++  bind(BCSKIP);
++  sub(ch2, ch2, 1); // for next pattern element, skip distance -1
++  bgtz(ch2, BCLOOP);
 +
-+  // We know sp/unextended_sp are safe only fp is questionable here
++  // tmp6: pattern end, address after needle
++  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
++  if (needle_isL == haystack_isL) {
++    // load last 8 bytes (8LL/4UU symbols)
++    ld(tmp6, Address(tmp6, -wordSize));
++  } else {
++    // UL: from UTF-16(source) search Latin1(pattern)
++    lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
++    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
++    // We'll have to wait until load completed, but it's still faster than per-character loads+checks
++    srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
++    slli(ch2, tmp6, XLEN - 24);
++    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
++    slli(ch1, tmp6, XLEN - 16);
++    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
++    andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
++    slli(ch2, ch2, 16);
++    orr(ch2, ch2, ch1); // 0x00000b0c
++    slli(result, tmp3, 48); // use result as temp register
++    orr(tmp6, tmp6, result); // 0x0a00000d
++    slli(result, ch2, 16);
++    orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
++  }
 +
-+  // If the current frame is known to the code cache then we can attempt to
-+  // to construct the sender and do some validation of it. This goes a long way
-+  // toward eliminating issues when we get in frame construction code
++  // i = m - 1;
++  // skipch = j + i;
++  // if (skipch == pattern[m - 1]
++  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
++  // else
++  //   move j with bad char offset table
++  bind(BMLOOPSTR2);
++  // compare pattern to source string backward
++  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
++  (this->*haystack_load_1chr)(skipch, Address(result), noreg);
++  sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
++  if (needle_isL == haystack_isL) {
++    // re-init tmp3. It's for free because it's executed in parallel with
++    // load above. Alternative is to initialize it before loop, but it'll
++    // affect performance on in-order systems with 2 or more ld/st pipelines
++    srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
++  }
++  if (!isLL) { // UU/UL case
++    slli(ch2, nlen_tmp, 1); // offsets in bytes
++  }
++  bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
++  add(result, haystack, isLL ? nlen_tmp : ch2);
++  ld(ch2, Address(result)); // load 8 bytes from source string
++  mv(ch1, tmp6);
++  if (isLL) {
++    j(BMLOOPSTR1_AFTER_LOAD);
++  } else {
++    sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
++    j(BMLOOPSTR1_CMP);
++  }
 +
-+  if (_cb != NULL) {
++  bind(BMLOOPSTR1);
++  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
++  (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
++  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
++  (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
 +
-+    // First check if frame is complete and tester is reliable
-+    // Unfortunately we can only check frame complete for runtime stubs and nmethod
-+    // other generic buffer blobs are more problematic so we just assume they are
-+    // ok. adapter blobs never have a frame complete and are never ok.
++  bind(BMLOOPSTR1_AFTER_LOAD);
++  sub(nlen_tmp, nlen_tmp, 1);
++  bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
 +
-+    if (!_cb->is_frame_complete_at(_pc)) {
-+      if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
-+        return false;
-+      }
-+    }
++  bind(BMLOOPSTR1_CMP);
++  beq(ch1, ch2, BMLOOPSTR1);
 +
-+    // Could just be some random pointer within the codeBlob
-+    if (!_cb->code_contains(_pc)) {
-+      return false;
++  bind(BMSKIP);
++  if (!isLL) {
++    // if we've met UTF symbol while searching Latin1 pattern, then we can
++    // skip needle_len symbols
++    if (needle_isL != haystack_isL) {
++      mv(result_tmp, needle_len);
++    } else {
++      mv(result_tmp, 1);
 +    }
++    mv(t0, ASIZE);
++    bgeu(skipch, t0, BMADV);
++  }
++  add(result_tmp, sp, skipch);
++  lbu(result_tmp, Address(result_tmp)); // load skip offset
 +
-+    // Entry frame checks
-+    if (is_entry_frame()) {
-+      // an entry frame must have a valid fp.
-+      return fp_safe && is_entry_frame_valid(thread);
-+    }
++  bind(BMADV);
++  sub(nlen_tmp, needle_len, 1);
++  // move haystack after bad char skip offset
++  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
++  ble(haystack, haystack_end, BMLOOPSTR2);
++  add(sp, sp, ASIZE);
++  j(NOMATCH);
 +
-+    intptr_t* sender_sp = NULL;
-+    intptr_t* sender_unextended_sp = NULL;
-+    address   sender_pc = NULL;
-+    intptr_t* saved_fp =  NULL;
++  bind(BMLOOPSTR1_LASTCMP);
++  bne(ch1, ch2, BMSKIP);
 +
-+    if (is_interpreted_frame()) {
-+      // fp must be safe
-+      if (!fp_safe) {
-+        return false;
-+      }
++  bind(BMMATCH);
++  sub(result, haystack, orig_haystack);
++  if (!haystack_isL) {
++    srli(result, result, 1);
++  }
++  add(sp, sp, ASIZE);
++  j(DONE);
 +
-+      sender_pc = (address)this->fp()[return_addr_offset];
-+      // for interpreted frames, the value below is the sender "raw" sp,
-+      // which can be different from the sender unextended sp (the sp seen
-+      // by the sender) because of current frame local variables
-+      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
-+      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
-+      saved_fp = (intptr_t*) this->fp()[link_offset];
-+    } else {
-+      // must be some sort of compiled/runtime frame
-+      // fp does not have to be safe (although it could be check for c1?)
++  bind(LINEARSTUB);
++  sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
++  bltz(t0, LINEARSEARCH);
++  mv(result, zr);
++  RuntimeAddress stub = NULL;
++  if (isLL) {
++    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
++    assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
++  } else if (needle_isL) {
++    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
++    assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
++  } else {
++    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
++    assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
++  }
++  trampoline_call(stub);
++  j(DONE);
 +
-+      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
-+      if (_cb->frame_size() <= 0) {
-+        return false;
-+      }
++  bind(NOMATCH);
++  mv(result, -1);
++  j(DONE);
 +
-+      sender_sp = _unextended_sp + _cb->frame_size();
-+      // Is sender_sp safe?
-+      if ((address)sender_sp >= thread->stack_base()) {
-+        return false;
-+      }
-+      sender_unextended_sp = sender_sp;
-+      sender_pc = (address) *(sender_sp + frame::return_addr_offset);
-+      saved_fp = (intptr_t*) *(sender_sp + frame::link_offset);
-+    }
++  bind(LINEARSEARCH);
++  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
 +
++  bind(DONE);
++  BLOCK_COMMENT("} string_indexof");
++}
 +
-+    // If the potential sender is the interpreter then we can do some more checking
-+    if (Interpreter::contains(sender_pc)) {
++// string_indexof
++// result: x10
++// src: x11
++// src_count: x12
++// pattern: x13
++// pattern_count: x14 or 1/2/3/4
++void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
++                                               Register haystack_len, Register needle_len,
++                                               Register tmp1, Register tmp2,
++                                               Register tmp3, Register tmp4,
++                                               int needle_con_cnt, Register result, int ae)
++{
++  // Note:
++  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
++  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
++  assert(needle_con_cnt <= 4, "Invalid needle constant count");
++  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
 +
-+      // fp is always saved in a recognizable place in any code we generate. However
-+      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp
-+      // is really a frame pointer.
-+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++  Register ch1 = t0;
++  Register ch2 = t1;
++  Register hlen_neg = haystack_len, nlen_neg = needle_len;
++  Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
 +
-+      if (!saved_fp_safe) {
-+        return false;
-+      }
++  bool isLL = ae == StrIntrinsicNode::LL;
 +
-+      // construct the potential sender
-+      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
++  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
++  int needle_chr_shift = needle_isL ? 0 : 1;
++  int haystack_chr_shift = haystack_isL ? 0 : 1;
++  int needle_chr_size = needle_isL ? 1 : 2;
++  int haystack_chr_size = haystack_isL ? 1 : 2;
 +
-+      return sender.is_interpreted_frame_valid(thread);
-+    }
++  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                              (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
++  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
 +
-+    // We must always be able to find a recognizable pc
-+    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
-+    if (sender_pc == NULL || sender_blob == NULL) {
-+      return false;
-+    }
++  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
 +
-+    // Could be a zombie method
-+    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
-+      return false;
-+    }
++  Register first = tmp3;
 +
-+    // Could just be some random pointer within the codeBlob
-+    if (!sender_blob->code_contains(sender_pc)) {
-+      return false;
-+    }
++  if (needle_con_cnt == -1) {
++    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
 +
-+    // We should never be able to see an adapter if the current frame is something from code cache
-+    if (sender_blob->is_adapter_blob()) {
-+      return false;
-+    }
++    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
++    bltz(t0, DOSHORT);
 +
-+    // Could be the call_stub
-+    if (StubRoutines::returns_to_call_stub(sender_pc)) {
-+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++    (this->*needle_load_1chr)(first, Address(needle), noreg);
++    slli(t0, needle_len, needle_chr_shift);
++    add(needle, needle, t0);
++    neg(nlen_neg, t0);
++    slli(t0, result_tmp, haystack_chr_shift);
++    add(haystack, haystack, t0);
++    neg(hlen_neg, t0);
 +
-+      if (!saved_fp_safe) {
-+        return false;
-+      }
++    bind(FIRST_LOOP);
++    add(t0, haystack, hlen_neg);
++    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
++    beq(first, ch2, STR1_LOOP);
 +
-+      // construct the potential sender
-+      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++    bind(STR2_NEXT);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, FIRST_LOOP);
++    j(NOMATCH);
 +
-+      // Validate the JavaCallWrapper an entry frame must have
-+      address jcw = (address)sender.entry_frame_call_wrapper();
++    bind(STR1_LOOP);
++    add(nlen_tmp, nlen_neg, needle_chr_size);
++    add(hlen_tmp, hlen_neg, haystack_chr_size);
++    bgez(nlen_tmp, MATCH);
 +
-+      bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp());
++    bind(STR1_NEXT);
++    add(ch1, needle, nlen_tmp);
++    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
++    add(ch2, haystack, hlen_tmp);
++    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++    bne(ch1, ch2, STR2_NEXT);
++    add(nlen_tmp, nlen_tmp, needle_chr_size);
++    add(hlen_tmp, hlen_tmp, haystack_chr_size);
++    bltz(nlen_tmp, STR1_NEXT);
++    j(MATCH);
 +
-+      return jcw_safe;
++    bind(DOSHORT);
++    if (needle_isL == haystack_isL) {
++      sub(t0, needle_len, 2);
++      bltz(t0, DO1);
++      bgtz(t0, DO3);
 +    }
++  }
 +
-+    CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
-+    if (nm != NULL) {
-+      if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
-+          nm->method()->is_method_handle_intrinsic()) {
-+        return false;
-+      }
-+    }
++  if (needle_con_cnt == 4) {
++    Label CH1_LOOP;
++    (this->*load_4chr)(ch1, Address(needle), noreg);
++    sub(result_tmp, haystack_len, 4);
++    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
++    add(haystack, haystack, tmp3);
++    neg(hlen_neg, tmp3);
 +
-+    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
-+    // because the return address counts against the callee's frame.
-+    if (sender_blob->frame_size() <= 0) {
-+      assert(!sender_blob->is_compiled(), "should count return address at least");
-+      return false;
-+    }
++    bind(CH1_LOOP);
++    add(ch2, haystack, hlen_neg);
++    (this->*load_4chr)(ch2, Address(ch2), noreg);
++    beq(ch1, ch2, MATCH);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, CH1_LOOP);
++    j(NOMATCH);
++  }
 +
-+    // We should never be able to see anything here except an nmethod. If something in the
-+    // code cache (current frame) is called by an entity within the code cache that entity
-+    // should not be anything but the call stub (already covered), the interpreter (already covered)
-+    // or an nmethod.
-+    if (!sender_blob->is_compiled()) {
-+        return false;
++  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
++    Label CH1_LOOP;
++    BLOCK_COMMENT("string_indexof DO2 {");
++    bind(DO2);
++    (this->*load_2chr)(ch1, Address(needle), noreg);
++    if (needle_con_cnt == 2) {
++      sub(result_tmp, haystack_len, 2);
 +    }
++    slli(tmp3, result_tmp, haystack_chr_shift);
++    add(haystack, haystack, tmp3);
++    neg(hlen_neg, tmp3);
 +
-+    // Could put some more validation for the potential non-interpreted sender
-+    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
-+
-+    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
-+
-+    // We've validated the potential sender that would be created
-+    return true;
++    bind(CH1_LOOP);
++    add(tmp3, haystack, hlen_neg);
++    (this->*load_2chr)(ch2, Address(tmp3), noreg);
++    beq(ch1, ch2, MATCH);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, CH1_LOOP);
++    j(NOMATCH);
++    BLOCK_COMMENT("} string_indexof DO2");
 +  }
 +
-+  // Must be native-compiled frame. Since sender will try and use fp to find
-+  // linkages it must be safe
-+  if (!fp_safe) {
-+    return false;
-+  }
++  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
++    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
++    BLOCK_COMMENT("string_indexof DO3 {");
 +
-+  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
-+  if ((address)this->fp()[return_addr_offset] == NULL) { return false; }
++    bind(DO3);
++    (this->*load_2chr)(first, Address(needle), noreg);
++    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
++    if (needle_con_cnt == 3) {
++      sub(result_tmp, haystack_len, 3);
++    }
++    slli(hlen_tmp, result_tmp, haystack_chr_shift);
++    add(haystack, haystack, hlen_tmp);
++    neg(hlen_neg, hlen_tmp);
 +
-+  return true;
-+}
++    bind(FIRST_LOOP);
++    add(ch2, haystack, hlen_neg);
++    (this->*load_2chr)(ch2, Address(ch2), noreg);
++    beq(first, ch2, STR1_LOOP);
 +
-+void frame::patch_pc(Thread* thread, address pc) {
-+  address* pc_addr = &(((address*) sp())[-1]);
-+  if (TracePcPatching) {
-+    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
-+                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
-+  }
-+  // Either the return address is the original one or we are going to
-+  // patch in the same address that's already there.
-+  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
-+  *pc_addr = pc;
-+  _cb = CodeCache::find_blob(pc);
-+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
-+  if (original_pc != NULL) {
-+    assert(original_pc == _pc, "expected original PC to be stored before patching");
-+    _deopt_state = is_deoptimized;
-+    // leave _pc as is
-+  } else {
-+    _deopt_state = not_deoptimized;
-+    _pc = pc;
++    bind(STR2_NEXT);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, FIRST_LOOP);
++    j(NOMATCH);
++
++    bind(STR1_LOOP);
++    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
++    add(ch2, haystack, hlen_tmp);
++    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++    bne(ch1, ch2, STR2_NEXT);
++    j(MATCH);
++    BLOCK_COMMENT("} string_indexof DO3");
 +  }
-+}
 +
-+bool frame::is_interpreted_frame() const  {
-+  return Interpreter::contains(pc());
-+}
++  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
++    Label DO1_LOOP;
 +
-+int frame::frame_size(RegisterMap* map) const {
-+  frame sender = this->sender(map);
-+  return sender.sp() - sp();
-+}
++    BLOCK_COMMENT("string_indexof DO1 {");
++    bind(DO1);
++    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
++    sub(result_tmp, haystack_len, 1);
++    mv(tmp3, result_tmp);
++    if (haystack_chr_shift) {
++      slli(tmp3, result_tmp, haystack_chr_shift);
++    }
++    add(haystack, haystack, tmp3);
++    neg(hlen_neg, tmp3);
 +
-+intptr_t* frame::entry_frame_argument_at(int offset) const {
-+  // convert offset to index to deal with tsi
-+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
-+  // Entry frame's arguments are always in relation to unextended_sp()
-+  return &unextended_sp()[index];
-+}
++    bind(DO1_LOOP);
++    add(tmp3, haystack, hlen_neg);
++    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
++    beq(ch1, ch2, MATCH);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, DO1_LOOP);
++    BLOCK_COMMENT("} string_indexof DO1");
++  }
 +
-+// sender_sp
-+intptr_t* frame::interpreter_frame_sender_sp() const {
-+  assert(is_interpreted_frame(), "interpreted frame expected");
-+  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
-+}
++  bind(NOMATCH);
++  mv(result, -1);
++  j(DONE);
 +
-+void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
-+  assert(is_interpreted_frame(), "interpreted frame expected");
-+  ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
++  bind(MATCH);
++  srai(t0, hlen_neg, haystack_chr_shift);
++  add(result, result_tmp, t0);
++
++  bind(DONE);
 +}
 +
++// Compare strings.
++void C2_MacroAssembler::string_compare(Register str1, Register str2,
++                                    Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
++                                    Register tmp3, int ae)
++{
++  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
++      DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
++      SHORT_LOOP_START, TAIL_CHECK, L;
 +
-+// monitor elements
++  const int STUB_THRESHOLD = 64 + 8;
++  bool isLL = ae == StrIntrinsicNode::LL;
++  bool isLU = ae == StrIntrinsicNode::LU;
++  bool isUL = ae == StrIntrinsicNode::UL;
 +
-+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
-+  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
-+}
++  bool str1_isL = isLL || isLU;
++  bool str2_isL = isLL || isUL;
 +
-+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
-+  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
-+  // make sure the pointer points inside the frame
-+  assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer");
-+  assert((intptr_t*) result < fp(),  "monitor end should be strictly below the frame pointer");
-+  return result;
-+}
++  // for L strings, 1 byte for 1 character
++  // for U strings, 2 bytes for 1 character
++  int str1_chr_size = str1_isL ? 1 : 2;
++  int str2_chr_size = str2_isL ? 1 : 2;
++  int minCharsInWord = isLL ? wordSize : wordSize / 2;
 +
-+void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
-+  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
-+}
++  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
 +
-+// Used by template based interpreter deoptimization
-+void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) {
-+  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp;
-+}
++  BLOCK_COMMENT("string_compare {");
 +
-+frame frame::sender_for_entry_frame(RegisterMap* map) const {
-+  assert(map != NULL, "map must be set");
-+  // Java frame called from C; skip all C frames and return top C
-+  // frame of that chunk as the sender
-+  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
-+  assert(!entry_frame_is_first(), "next Java fp must be non zero");
-+  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
-+  // Since we are walking the stack now this nested anchor is obviously walkable
-+  // even if it wasn't when it was stacked.
-+  if (!jfa->walkable()) {
-+    // Capture _last_Java_pc (if needed) and mark anchor walkable.
-+    jfa->capture_last_Java_pc();
++  // Bizzarely, the counts are passed in bytes, regardless of whether they
++  // are L or U strings, however the result is always in characters.
++  if (!str1_isL) {
++    sraiw(cnt1, cnt1, 1);
++  }
++  if (!str2_isL) {
++    sraiw(cnt2, cnt2, 1);
 +  }
-+  map->clear();
-+  assert(map->include_argument_oops(), "should be set by clear");
-+  vmassert(jfa->last_Java_pc() != NULL, "not walkable");
-+  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
-+  return fr;
-+}
-+
-+//------------------------------------------------------------------------------
-+// frame::verify_deopt_original_pc
-+//
-+// Verifies the calculated original PC of a deoptimization PC for the
-+// given unextended SP.
-+#ifdef ASSERT
-+void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
-+  frame fr;
-+
-+  // This is ugly but it's better than to change {get,set}_original_pc
-+  // to take an SP value as argument.  And it's only a debugging
-+  // method anyway.
-+  fr._unextended_sp = unextended_sp;
 +
-+  assert_cond(nm != NULL);
-+  address original_pc = nm->get_original_pc(&fr);
-+  assert(nm->insts_contains_inclusive(original_pc),
-+         "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
-+}
-+#endif
++  // Compute the minimum of the string lengths and save the difference in result.
++  sub(result, cnt1, cnt2);
++  bgt(cnt1, cnt2, L);
++  mv(cnt2, cnt1);
++  bind(L);
 +
-+//------------------------------------------------------------------------------
-+// frame::adjust_unextended_sp
-+void frame::adjust_unextended_sp() {
-+  // On riscv, sites calling method handle intrinsics and lambda forms are treated
-+  // as any other call site. Therefore, no special action is needed when we are
-+  // returning to any of these call sites.
++  // A very short string
++  li(t0, minCharsInWord);
++  ble(cnt2, t0, SHORT_STRING);
 +
-+  if (_cb != NULL) {
-+    CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
-+    if (sender_cm != NULL) {
-+      // If the sender PC is a deoptimization point, get the original PC.
-+      if (sender_cm->is_deopt_entry(_pc) ||
-+          sender_cm->is_deopt_mh_entry(_pc)) {
-+        DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
++  // Compare longwords
++  // load first parts of strings and finish initialization while loading
++  {
++    if (str1_isL == str2_isL) { // LL or UU
++      // load 8 bytes once to compare
++      ld(tmp1, Address(str1));
++      beq(str1, str2, DONE);
++      ld(tmp2, Address(str2));
++      li(t0, STUB_THRESHOLD);
++      bge(cnt2, t0, STUB);
++      sub(cnt2, cnt2, minCharsInWord);
++      beqz(cnt2, TAIL_CHECK);
++      // convert cnt2 from characters to bytes
++      if (!str1_isL) {
++        slli(cnt2, cnt2, 1);
 +      }
++      add(str2, str2, cnt2);
++      add(str1, str1, cnt2);
++      sub(cnt2, zr, cnt2);
++    } else if (isLU) { // LU case
++      lwu(tmp1, Address(str1));
++      ld(tmp2, Address(str2));
++      li(t0, STUB_THRESHOLD);
++      bge(cnt2, t0, STUB);
++      addi(cnt2, cnt2, -4);
++      add(str1, str1, cnt2);
++      sub(cnt1, zr, cnt2);
++      slli(cnt2, cnt2, 1);
++      add(str2, str2, cnt2);
++      inflate_lo32(tmp3, tmp1);
++      mv(tmp1, tmp3);
++      sub(cnt2, zr, cnt2);
++      addi(cnt1, cnt1, 4);
++    } else { // UL case
++      ld(tmp1, Address(str1));
++      lwu(tmp2, Address(str2));
++      li(t0, STUB_THRESHOLD);
++      bge(cnt2, t0, STUB);
++      addi(cnt2, cnt2, -4);
++      slli(t0, cnt2, 1);
++      sub(cnt1, zr, t0);
++      add(str1, str1, t0);
++      add(str2, str2, cnt2);
++      inflate_lo32(tmp3, tmp2);
++      mv(tmp2, tmp3);
++      sub(cnt2, zr, cnt2);
++      addi(cnt1, cnt1, 8);
 +    }
-+  }
-+}
-+
-+//------------------------------------------------------------------------------
-+// frame::update_map_with_saved_link
-+void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
-+  // The interpreter and compiler(s) always save fp in a known
-+  // location on entry. We must record where that location is
-+  // so that if fp was live on callout from c2 we can find
-+  // the saved copy no matter what it called.
-+
-+  // Since the interpreter always saves fp if we record where it is then
-+  // we don't have to always save fp on entry and exit to c2 compiled
-+  // code, on entry will be enough.
-+  assert(map != NULL, "map must be set");
-+  map->set_location(::fp->as_VMReg(), (address) link_addr);
-+  // this is weird "H" ought to be at a higher address however the
-+  // oopMaps seems to have the "H" regs at the same address and the
-+  // vanilla register.
-+  map->set_location(::fp->as_VMReg()->next(), (address) link_addr);
-+}
++    addi(cnt2, cnt2, isUL ? 4 : 8);
++    bgez(cnt2, TAIL);
++    xorr(tmp3, tmp1, tmp2);
++    bnez(tmp3, DIFFERENCE);
 +
++    // main loop
++    bind(NEXT_WORD);
++    if (str1_isL == str2_isL) { // LL or UU
++      add(t0, str1, cnt2);
++      ld(tmp1, Address(t0));
++      add(t0, str2, cnt2);
++      ld(tmp2, Address(t0));
++      addi(cnt2, cnt2, 8);
++    } else if (isLU) { // LU case
++      add(t0, str1, cnt1);
++      lwu(tmp1, Address(t0));
++      add(t0, str2, cnt2);
++      ld(tmp2, Address(t0));
++      addi(cnt1, cnt1, 4);
++      inflate_lo32(tmp3, tmp1);
++      mv(tmp1, tmp3);
++      addi(cnt2, cnt2, 8);
++    } else { // UL case
++      add(t0, str2, cnt2);
++      lwu(tmp2, Address(t0));
++      add(t0, str1, cnt1);
++      ld(tmp1, Address(t0));
++      inflate_lo32(tmp3, tmp2);
++      mv(tmp2, tmp3);
++      addi(cnt1, cnt1, 8);
++      addi(cnt2, cnt2, 4);
++    }
++    bgez(cnt2, TAIL);
 +
-+//------------------------------------------------------------------------------
-+// frame::sender_for_interpreter_frame
-+frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
-+  // SP is the raw SP from the sender after adapter or interpreter
-+  // extension.
-+  intptr_t* sender_sp = this->sender_sp();
++    xorr(tmp3, tmp1, tmp2);
++    beqz(tmp3, NEXT_WORD);
++    j(DIFFERENCE);
++    bind(TAIL);
++    xorr(tmp3, tmp1, tmp2);
++    bnez(tmp3, DIFFERENCE);
++    // Last longword.  In the case where length == 4 we compare the
++    // same longword twice, but that's still faster than another
++    // conditional branch.
++    if (str1_isL == str2_isL) { // LL or UU
++      ld(tmp1, Address(str1));
++      ld(tmp2, Address(str2));
++    } else if (isLU) { // LU case
++      lwu(tmp1, Address(str1));
++      ld(tmp2, Address(str2));
++      inflate_lo32(tmp3, tmp1);
++      mv(tmp1, tmp3);
++    } else { // UL case
++      lwu(tmp2, Address(str2));
++      ld(tmp1, Address(str1));
++      inflate_lo32(tmp3, tmp2);
++      mv(tmp2, tmp3);
++    }
++    bind(TAIL_CHECK);
++    xorr(tmp3, tmp1, tmp2);
++    beqz(tmp3, DONE);
 +
-+  // This is the sp before any possible extension (adapter/locals).
-+  intptr_t* unextended_sp = interpreter_frame_sender_sp();
++    // Find the first different characters in the longwords and
++    // compute their difference.
++    bind(DIFFERENCE);
++    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
++    srl(tmp1, tmp1, result);
++    srl(tmp2, tmp2, result);
++    if (isLL) {
++      andi(tmp1, tmp1, 0xFF);
++      andi(tmp2, tmp2, 0xFF);
++    } else {
++      andi(tmp1, tmp1, 0xFFFF);
++      andi(tmp2, tmp2, 0xFFFF);
++    }
++    sub(result, tmp1, tmp2);
++    j(DONE);
++  }
 +
-+#ifdef COMPILER2
-+  assert(map != NULL, "map must be set");
-+  if (map->update_map()) {
-+    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
++  bind(STUB);
++  RuntimeAddress stub = NULL;
++  switch (ae) {
++    case StrIntrinsicNode::LL:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
++      break;
++    case StrIntrinsicNode::UU:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
++      break;
++    case StrIntrinsicNode::LU:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
++      break;
++    case StrIntrinsicNode::UL:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
-+#endif // COMPILER2
++  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
++  trampoline_call(stub);
++  j(DONE);
 +
-+  return frame(sender_sp, unextended_sp, link(), sender_pc());
-+}
++  bind(SHORT_STRING);
++  // Is the minimum length zero?
++  beqz(cnt2, DONE);
++  // arrange code to do most branches while loading and loading next characters
++  // while comparing previous
++  (this->*str1_load_chr)(tmp1, Address(str1), t0);
++  addi(str1, str1, str1_chr_size);
++  addi(cnt2, cnt2, -1);
++  beqz(cnt2, SHORT_LAST_INIT);
++  (this->*str2_load_chr)(cnt1, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  j(SHORT_LOOP_START);
++  bind(SHORT_LOOP);
++  addi(cnt2, cnt2, -1);
++  beqz(cnt2, SHORT_LAST);
++  bind(SHORT_LOOP_START);
++  (this->*str1_load_chr)(tmp2, Address(str1), t0);
++  addi(str1, str1, str1_chr_size);
++  (this->*str2_load_chr)(t0, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
++  addi(cnt2, cnt2, -1);
++  beqz(cnt2, SHORT_LAST2);
++  (this->*str1_load_chr)(tmp1, Address(str1), t0);
++  addi(str1, str1, str1_chr_size);
++  (this->*str2_load_chr)(cnt1, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  beq(tmp2, t0, SHORT_LOOP);
++  sub(result, tmp2, t0);
++  j(DONE);
++  bind(SHORT_LOOP_TAIL);
++  sub(result, tmp1, cnt1);
++  j(DONE);
++  bind(SHORT_LAST2);
++  beq(tmp2, t0, DONE);
++  sub(result, tmp2, t0);
 +
++  j(DONE);
++  bind(SHORT_LAST_INIT);
++  (this->*str2_load_chr)(cnt1, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  bind(SHORT_LAST);
++  beq(tmp1, cnt1, DONE);
++  sub(result, tmp1, cnt1);
 +
-+//------------------------------------------------------------------------------
-+// frame::sender_for_compiled_frame
-+frame frame::sender_for_compiled_frame(RegisterMap* map) const {
-+  // we cannot rely upon the last fp having been saved to the thread
-+  // in C2 code but it will have been pushed onto the stack. so we
-+  // have to find it relative to the unextended sp
++  bind(DONE);
 +
-+  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
-+  intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size();
-+  intptr_t* unextended_sp = l_sender_sp;
++  BLOCK_COMMENT("} string_compare");
++}
 +
-+  // the return_address is always the word on the stack
-+  address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset);
++void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
++                                      Register tmp4, Register tmp5, Register tmp6, Register result,
++                                      Register cnt1, int elem_size) {
++  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
++  Register tmp1 = t0;
++  Register tmp2 = t1;
++  Register cnt2 = tmp2;  // cnt2 only used in array length compare
++  Register elem_per_word = tmp6;
++  int log_elem_size = exact_log2(elem_size);
++  int length_offset = arrayOopDesc::length_offset_in_bytes();
++  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
 +
-+  intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset);
++  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
++  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
++  li(elem_per_word, wordSize / elem_size);
 +
-+  assert(map != NULL, "map must be set");
-+  if (map->update_map()) {
-+    // Tell GC to use argument oopmaps for some runtime stubs that need it.
-+    // For C1, the runtime stub might not have oop maps, so set this flag
-+    // outside of update_register_map.
-+    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
-+    if (_cb->oop_maps() != NULL) {
-+      OopMapSet::update_register_map(this, map);
-+    }
++  BLOCK_COMMENT("arrays_equals {");
 +
-+    // Since the prolog does the save and restore of FP there is no
-+    // oopmap for it so we must fill in its location as if there was
-+    // an oopmap entry since if our caller was compiled code there
-+    // could be live jvm state in it.
-+    update_map_with_saved_link(map, saved_fp_addr);
-+  }
++  // if (a1 == a2), return true
++  beq(a1, a2, SAME);
 +
-+  return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
-+}
++  mv(result, false);
++  beqz(a1, DONE);
++  beqz(a2, DONE);
++  lwu(cnt1, Address(a1, length_offset));
++  lwu(cnt2, Address(a2, length_offset));
++  bne(cnt2, cnt1, DONE);
++  beqz(cnt1, SAME);
 +
-+//------------------------------------------------------------------------------
-+// frame::sender
-+frame frame::sender(RegisterMap* map) const {
-+  // Default is we done have to follow them. The sender_for_xxx will
-+  // update it accordingly
-+  assert(map != NULL, "map must be set");
-+  map->set_include_argument_oops(false);
++  slli(tmp5, cnt1, 3 + log_elem_size);
++  sub(tmp5, zr, tmp5);
++  add(a1, a1, base_offset);
++  add(a2, a2, base_offset);
++  ld(tmp3, Address(a1, 0));
++  ld(tmp4, Address(a2, 0));
++  ble(cnt1, elem_per_word, SHORT); // short or same
 +
-+  if (is_entry_frame()) {
-+    return sender_for_entry_frame(map);
-+  }
-+  if (is_interpreted_frame()) {
-+    return sender_for_interpreter_frame(map);
-+  }
-+  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
++  // Main 16 byte comparison loop with 2 exits
++  bind(NEXT_DWORD); {
++    ld(tmp1, Address(a1, wordSize));
++    ld(tmp2, Address(a2, wordSize));
++    sub(cnt1, cnt1, 2 * wordSize / elem_size);
++    blez(cnt1, TAIL);
++    bne(tmp3, tmp4, DONE);
++    ld(tmp3, Address(a1, 2 * wordSize));
++    ld(tmp4, Address(a2, 2 * wordSize));
++    add(a1, a1, 2 * wordSize);
++    add(a2, a2, 2 * wordSize);
++    ble(cnt1, elem_per_word, TAIL2);
++  } beq(tmp1, tmp2, NEXT_DWORD);
++  j(DONE);
 +
-+  // This test looks odd: why is it not is_compiled_frame() ?  That's
-+  // because stubs also have OOP maps.
-+  if (_cb != NULL) {
-+    return sender_for_compiled_frame(map);
-+  }
++  bind(TAIL);
++  xorr(tmp4, tmp3, tmp4);
++  xorr(tmp2, tmp1, tmp2);
++  sll(tmp2, tmp2, tmp5);
++  orr(tmp5, tmp4, tmp2);
++  j(IS_TMP5_ZR);
 +
-+  // Must be native-compiled frame, i.e. the marshaling code for native
-+  // methods that exists in the core system.
-+  return frame(sender_sp(), link(), sender_pc());
++  bind(TAIL2);
++  bne(tmp1, tmp2, DONE);
++
++  bind(SHORT);
++  xorr(tmp4, tmp3, tmp4);
++  sll(tmp5, tmp4, tmp5);
++
++  bind(IS_TMP5_ZR);
++  bnez(tmp5, DONE);
++
++  bind(SAME);
++  mv(result, true);
++  // That's it.
++  bind(DONE);
++
++  BLOCK_COMMENT("} array_equals");
 +}
 +
-+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
-+  assert(is_interpreted_frame(), "Not an interpreted frame");
-+  // These are reasonable sanity checks
-+  if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) {
-+    return false;
-+  }
-+  if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) {
-+    return false;
-+  }
-+  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
-+    return false;
-+  }
-+  // These are hacks to keep us out of trouble.
-+  // The problem with these is that they mask other problems
-+  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
-+    return false;
-+  }
++// Compare Strings
 +
-+  // do some validation of frame elements
++// For Strings we're passed the address of the first characters in a1
++// and a2 and the length in cnt1.
++// elem_size is the element size in bytes: either 1 or 2.
++// There are two implementations.  For arrays >= 8 bytes, all
++// comparisons (including the final one, which may overlap) are
++// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
++// halfword, then a short, and then a byte.
 +
-+  // first the method
++void C2_MacroAssembler::string_equals(Register a1, Register a2,
++                                      Register result, Register cnt1, int elem_size)
++{
++  Label SAME, DONE, SHORT, NEXT_WORD;
++  Register tmp1 = t0;
++  Register tmp2 = t1;
 +
-+  Method* m = *interpreter_frame_method_addr();
++  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
++  assert_different_registers(a1, a2, result, cnt1, t0, t1);
 +
-+  // validate the method we'd find in this potential sender
-+  if (!Method::is_valid_method(m)) {
-+    return false;
-+  }
-+  // stack frames shouldn't be much larger than max_stack elements
-+  // this test requires the use of unextended_sp which is the sp as seen by
-+  // the current frame, and not sp which is the "raw" pc which could point
-+  // further because of local variables of the callee method inserted after
-+  // method arguments
-+  if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) {
-+    return false;
-+  }
++  BLOCK_COMMENT("string_equals {");
 +
-+  // validate bci/bcx
-+  address  bcp    = interpreter_frame_bcp();
-+  if (m->validate_bci_from_bcp(bcp) < 0) {
-+    return false;
-+  }
++  mv(result, false);
 +
-+  // validate constantPoolCache*
-+  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
-+  if (MetaspaceObj::is_valid(cp) == false) {
-+    return false;
-+  }
-+  // validate locals
-+  address locals =  (address) *interpreter_frame_locals_addr();
++  // Check for short strings, i.e. smaller than wordSize.
++  sub(cnt1, cnt1, wordSize);
++  bltz(cnt1, SHORT);
 +
-+  if (locals > thread->stack_base() || locals < (address) fp()) {
-+    return false;
-+  }
-+  // We'd have to be pretty unlucky to be mislead at this point
-+  return true;
-+}
++  // Main 8 byte comparison loop.
++  bind(NEXT_WORD); {
++    ld(tmp1, Address(a1, 0));
++    add(a1, a1, wordSize);
++    ld(tmp2, Address(a2, 0));
++    add(a2, a2, wordSize);
++    sub(cnt1, cnt1, wordSize);
++    bne(tmp1, tmp2, DONE);
++  } bgtz(cnt1, NEXT_WORD);
 +
-+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
-+  assert(is_interpreted_frame(), "interpreted frame expected");
-+  Method* method = interpreter_frame_method();
-+  BasicType type = method->result_type();
++  // Last longword.  In the case where length == 4 we compare the
++  // same longword twice, but that's still faster than another
++  // conditional branch.
++  // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
++  // length == 4.
++  add(tmp1, a1, cnt1);
++  ld(tmp1, Address(tmp1, 0));
++  add(tmp2, a2, cnt1);
++  ld(tmp2, Address(tmp2, 0));
++  bne(tmp1, tmp2, DONE);
++  j(SAME);
 +
-+  intptr_t* tos_addr = NULL;
-+  if (method->is_native()) {
-+    tos_addr = (intptr_t*)sp();
-+    if (type == T_FLOAT || type == T_DOUBLE) {
-+      // This is because we do a push(ltos) after push(dtos) in generate_native_entry.
-+      tos_addr += 2 * Interpreter::stackElementWords;
-+    }
-+  } else {
-+    tos_addr = (intptr_t*)interpreter_frame_tos_address();
++  bind(SHORT);
++  Label TAIL03, TAIL01;
++
++  // 0-7 bytes left.
++  andi(t0, cnt1, 4);
++  beqz(t0, TAIL03);
++  {
++    lwu(tmp1, Address(a1, 0));
++    add(a1, a1, 4);
++    lwu(tmp2, Address(a2, 0));
++    add(a2, a2, 4);
++    bne(tmp1, tmp2, DONE);
 +  }
 +
-+  switch (type) {
-+    case T_OBJECT  :
-+    case T_ARRAY   : {
-+      oop obj;
-+      if (method->is_native()) {
-+        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
-+      } else {
-+        oop* obj_p = (oop*)tos_addr;
-+        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
-+      }
-+      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
-+      *oop_result = obj;
-+      break;
-+    }
-+    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
-+    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
-+    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
-+    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
-+    case T_INT     : value_result->i = *(jint*)tos_addr; break;
-+    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
-+    case T_FLOAT   : {
-+        value_result->f = *(jfloat*)tos_addr;
-+      break;
++  bind(TAIL03);
++  // 0-3 bytes left.
++  andi(t0, cnt1, 2);
++  beqz(t0, TAIL01);
++  {
++    lhu(tmp1, Address(a1, 0));
++    add(a1, a1, 2);
++    lhu(tmp2, Address(a2, 0));
++    add(a2, a2, 2);
++    bne(tmp1, tmp2, DONE);
++  }
++
++  bind(TAIL01);
++  if (elem_size == 1) { // Only needed when comparing 1-byte elements
++    // 0-1 bytes left.
++    andi(t0, cnt1, 1);
++    beqz(t0, SAME);
++    {
++      lbu(tmp1, a1, 0);
++      lbu(tmp2, a2, 0);
++      bne(tmp1, tmp2, DONE);
 +    }
-+    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
-+    case T_VOID    : /* Nothing to do */ break;
-+    default        : ShouldNotReachHere();
 +  }
 +
-+  return type;
++  // Arrays are equal.
++  bind(SAME);
++  mv(result, true);
++
++  // That's it.
++  bind(DONE);
++  BLOCK_COMMENT("} string_equals");
 +}
 +
++typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
++typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
++                                                              bool is_far, bool is_unordered);
++
++static conditional_branch_insn conditional_branches[] =
++{
++  /* SHORT branches */
++  (conditional_branch_insn)&Assembler::beq,
++  (conditional_branch_insn)&Assembler::bgt,
++  NULL, // BoolTest::overflow
++  (conditional_branch_insn)&Assembler::blt,
++  (conditional_branch_insn)&Assembler::bne,
++  (conditional_branch_insn)&Assembler::ble,
++  NULL, // BoolTest::no_overflow
++  (conditional_branch_insn)&Assembler::bge,
 +
-+intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
-+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
-+  return &interpreter_frame_tos_address()[index];
-+}
++  /* UNSIGNED branches */
++  (conditional_branch_insn)&Assembler::beq,
++  (conditional_branch_insn)&Assembler::bgtu,
++  NULL,
++  (conditional_branch_insn)&Assembler::bltu,
++  (conditional_branch_insn)&Assembler::bne,
++  (conditional_branch_insn)&Assembler::bleu,
++  NULL,
++  (conditional_branch_insn)&Assembler::bgeu
++};
 +
-+#ifndef PRODUCT
++static float_conditional_branch_insn float_conditional_branches[] =
++{
++  /* FLOAT SHORT branches */
++  (float_conditional_branch_insn)&MacroAssembler::float_beq,
++  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
++  NULL,  // BoolTest::overflow
++  (float_conditional_branch_insn)&MacroAssembler::float_blt,
++  (float_conditional_branch_insn)&MacroAssembler::float_bne,
++  (float_conditional_branch_insn)&MacroAssembler::float_ble,
++  NULL, // BoolTest::no_overflow
++  (float_conditional_branch_insn)&MacroAssembler::float_bge,
 +
-+#define DESCRIBE_FP_OFFSET(name) \
-+  values.describe(frame_no, fp() + frame::name##_offset, #name)
++  /* DOUBLE SHORT branches */
++  (float_conditional_branch_insn)&MacroAssembler::double_beq,
++  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
++  NULL,
++  (float_conditional_branch_insn)&MacroAssembler::double_blt,
++  (float_conditional_branch_insn)&MacroAssembler::double_bne,
++  (float_conditional_branch_insn)&MacroAssembler::double_ble,
++  NULL,
++  (float_conditional_branch_insn)&MacroAssembler::double_bge
++};
 +
-+void frame::describe_pd(FrameValues& values, int frame_no) {
-+  if (is_interpreted_frame()) {
-+    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_method);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
-+  }
++void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
++  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
++         "invalid conditional branch index");
++  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
 +}
-+#endif
 +
-+intptr_t *frame::initial_deoptimization_info() {
-+  // Not used on riscv, but we must return something.
-+  return NULL;
++// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
++// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
++void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
++  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
++         "invalid float conditional branch index");
++  int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask);
++  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
++    (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
 +}
 +
-+intptr_t* frame::real_fp() const {
-+  if (_cb != NULL) {
-+    // use the frame size if valid
-+    int size = _cb->frame_size();
-+    if (size > 0) {
-+      return unextended_sp() + size;
-+    }
++void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
++  switch (cmpFlag) {
++    case BoolTest::eq:
++    case BoolTest::le:
++      beqz(op1, L, is_far);
++      break;
++    case BoolTest::ne:
++    case BoolTest::gt:
++      bnez(op1, L, is_far);
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
-+  // else rely on fp()
-+  assert(!is_compiled_frame(), "unknown compiled frame size");
-+  return fp();
 +}
 +
-+#undef DESCRIBE_FP_OFFSET
++void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
++  switch (cmpFlag) {
++    case BoolTest::eq:
++      beqz(op1, L, is_far);
++      break;
++    case BoolTest::ne:
++      bnez(op1, L, is_far);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
 +
-+#ifndef PRODUCT
-+// This is a generic constructor which is only used by pns() in debug.cpp.
-+frame::frame(void* ptr_sp, void* ptr_fp, void* pc) {
-+  init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc);
++void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
++  Label L;
++  cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
++  mv(dst, src);
++  bind(L);
 +}
 +
-+void frame::pd_ps() {}
-+#endif
++// Set dst to NaN if any NaN input.
++void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
++                                  bool is_double, bool is_min) {
++  assert_different_registers(dst, src1, src2);
 +
-+void JavaFrameAnchor::make_walkable(JavaThread* thread) {
-+  // last frame set?
-+  if (last_Java_sp() == NULL) { return; }
-+  // already walkable?
-+  if (walkable()) { return; }
-+  vmassert(Thread::current() == (Thread*)thread, "not current thread");
-+  vmassert(last_Java_sp() != NULL, "not called from Java code?");
-+  vmassert(last_Java_pc() == NULL, "already walkable");
-+  capture_last_Java_pc();
-+  vmassert(walkable(), "something went wrong");
-+}
++  Label Done;
++  fsflags(zr);
++  if (is_double) {
++    is_min ? fmin_d(dst, src1, src2)
++           : fmax_d(dst, src1, src2);
++    // Checking NaNs
++    flt_d(zr, src1, src2);
++  } else {
++    is_min ? fmin_s(dst, src1, src2)
++           : fmax_s(dst, src1, src2);
++    // Checking NaNs
++    flt_s(zr, src1, src2);
++  }
 +
-+void JavaFrameAnchor::capture_last_Java_pc() {
-+  vmassert(_last_Java_sp != NULL, "no last frame set");
-+  vmassert(_last_Java_pc == NULL, "already walkable");
-+  _last_Java_pc = (address)_last_Java_sp[-1];
++  frflags(t0);
++  beqz(t0, Done);
++
++  // In case of NaNs
++  is_double ? fadd_d(dst, src1, src2)
++            : fadd_s(dst, src1, src2);
++
++  bind(Done);
 +}
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
-new file mode 100644
-index 000000000..7acabcbba
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
-@@ -0,0 +1,200 @@
-+/*
-+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#ifndef CPU_RISCV_FRAME_RISCV_HPP
-+#define CPU_RISCV_FRAME_RISCV_HPP
++void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
++                                        VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) {
++  Label loop;
++  Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16;
 +
-+#include "runtime/synchronizer.hpp"
++  bind(loop);
++  vsetvli(tmp1, cnt, sew, Assembler::m2);
++  vlex_v(vr1, a1, sew);
++  vlex_v(vr2, a2, sew);
++  vmsne_vv(vrs, vr1, vr2);
++  vfirst_m(tmp2, vrs);
++  bgez(tmp2, DONE);
++  sub(cnt, cnt, tmp1);
++  if (!islatin) {
++    slli(tmp1, tmp1, 1); // get byte counts
++  }
++  add(a1, a1, tmp1);
++  add(a2, a2, tmp1);
++  bnez(cnt, loop);
 +
-+// A frame represents a physical stack frame (an activation).  Frames can be
-+// C or Java frames, and the Java frames can be interpreted or compiled.
-+// In contrast, vframes represent source-level activations, so that one physical frame
-+// can correspond to multiple source level frames because of inlining.
-+// A frame is comprised of {pc, fp, sp}
-+// ------------------------------ Asm interpreter ----------------------------------------
-+// Layout of asm interpreter frame:
-+//    [expression stack      ] * <- sp
++  mv(result, true);
++}
 +
-+//    [monitors[0]           ]   \
-+//     ...                        | monitor block size = k
-+//    [monitors[k-1]         ]   /
-+//    [frame initial esp     ] ( == &monitors[0], initially here)       initial_sp_offset
-+//    [byte code index/pointr]                   = bcx()                bcx_offset
++void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) {
++  Label DONE;
++  Register tmp1 = t0;
++  Register tmp2 = t1;
 +
-+//    [pointer to locals     ]                   = locals()             locals_offset
-+//    [constant pool cache   ]                   = cache()              cache_offset
++  BLOCK_COMMENT("string_equals_v {");
 +
-+//    [klass of method       ]                   = mirror()             mirror_offset
-+//    [padding               ]
++  mv(result, false);
 +
-+//    [methodData            ]                   = mdp()                mdx_offset
-+//    [methodOop             ]                   = method()             method_offset
++  if (elem_size == 2) {
++    srli(cnt, cnt, 1);
++  }
 +
-+//    [last esp              ]                   = last_sp()            last_sp_offset
-+//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
++  element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
 +
-+//    [old frame pointer     ]
-+//    [return pc             ]
++  bind(DONE);
++  BLOCK_COMMENT("} string_equals_v");
++}
 +
-+//    [last sp               ]   <- fp           = link()
-+//    [oop temp              ]                     (only for native calls)
++// used by C2 ClearArray patterns.
++// base: Address of a buffer to be zeroed
++// cnt: Count in HeapWords
++//
++// base, cnt, v0, v1 and t0 are clobbered.
++void C2_MacroAssembler::clear_array_v(Register base, Register cnt) {
++  Label loop;
 +
-+//    [padding               ]                     (to preserve machine SP alignment)
-+//    [locals and parameters ]
-+//                               <- sender sp
-+// ------------------------------ Asm interpreter ----------------------------------------
++  // making zero words
++  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
++  vxor_vv(v0, v0, v0);
 +
-+// ------------------------------ C Frame ------------------------------------------------
-+// Stack: gcc with -fno-omit-frame-pointer
-+//                    .
-+//                    .
-+//       +->          .
-+//       |   +-----------------+   |
-+//       |   | return address  |   |
-+//       |   |   previous fp ------+
-+//       |   | saved registers |
-+//       |   | local variables |
-+//       |   |       ...       | <-+
-+//       |   +-----------------+   |
-+//       |   | return address  |   |
-+//       +------ previous fp   |   |
-+//           | saved registers |   |
-+//           | local variables |   |
-+//       +-> |       ...       |   |
-+//       |   +-----------------+   |
-+//       |   | return address  |   |
-+//       |   |   previous fp ------+
-+//       |   | saved registers |
-+//       |   | local variables |
-+//       |   |       ...       | <-+
-+//       |   +-----------------+   |
-+//       |   | return address  |   |
-+//       +------ previous fp   |   |
-+//           | saved registers |   |
-+//           | local variables |   |
-+//   $fp --> |       ...       |   |
-+//           +-----------------+   |
-+//           | return address  |   |
-+//           |   previous fp ------+
-+//           | saved registers |
-+//   $sp --> | local variables |
-+//           +-----------------+
-+// ------------------------------ C Frame ------------------------------------------------
++  bind(loop);
++  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
++  vse64_v(v0, base);
++  sub(cnt, cnt, t0);
++  shadd(base, t0, base, t0, 3);
++  bnez(cnt, loop);
++}
 +
-+ public:
-+  enum {
-+    pc_return_offset                                 =  0,
-+    // All frames
-+    link_offset                                      = -2,
-+    return_addr_offset                               = -1,
-+    sender_sp_offset                                 =  0,
-+    // Interpreter frames
-+    interpreter_frame_oop_temp_offset                =  1, // for native calls only
++void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result,
++                                        Register cnt1, int elem_size) {
++  Label DONE;
++  Register tmp1 = t0;
++  Register tmp2 = t1;
++  Register cnt2 = tmp2;
++  int length_offset = arrayOopDesc::length_offset_in_bytes();
++  int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
 +
-+    interpreter_frame_sender_sp_offset               = -3,
-+    // outgoing sp before a call to an invoked method
-+    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
-+    interpreter_frame_method_offset                  = interpreter_frame_last_sp_offset - 1,
-+    interpreter_frame_mdp_offset                     = interpreter_frame_method_offset - 1,
-+    interpreter_frame_padding_offset                 = interpreter_frame_mdp_offset - 1,
-+    interpreter_frame_mirror_offset                  = interpreter_frame_padding_offset - 1,
-+    interpreter_frame_cache_offset                   = interpreter_frame_mirror_offset - 1,
-+    interpreter_frame_locals_offset                  = interpreter_frame_cache_offset - 1,
-+    interpreter_frame_bcp_offset                     = interpreter_frame_locals_offset - 1,
-+    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
++  BLOCK_COMMENT("arrays_equals_v {");
 +
-+    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
-+    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
++  // if (a1 == a2), return true
++  mv(result, true);
++  beq(a1, a2, DONE);
 +
-+    // Entry frames
-+    // n.b. these values are determined by the layout defined in
-+    // stubGenerator for the Java call stub
-+    entry_frame_after_call_words                     =  34,
-+    entry_frame_call_wrapper_offset                  = -10,
++  mv(result, false);
++  // if a1 == null or a2 == null, return false
++  beqz(a1, DONE);
++  beqz(a2, DONE);
++  // if (a1.length != a2.length), return false
++  lwu(cnt1, Address(a1, length_offset));
++  lwu(cnt2, Address(a2, length_offset));
++  bne(cnt1, cnt2, DONE);
 +
-+    // we don't need a save area
-+    arg_reg_save_area_bytes                          =  0
-+  };
++  la(a1, Address(a1, base_offset));
++  la(a2, Address(a2, base_offset));
 +
-+  intptr_t ptr_at(int offset) const {
-+    return *ptr_at_addr(offset);
-+  }
++  element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
 +
-+  void ptr_at_put(int offset, intptr_t value) {
-+    *ptr_at_addr(offset) = value;
-+  }
++  bind(DONE);
 +
-+ private:
-+  // an additional field beyond _sp and _pc:
-+  intptr_t*   _fp; // frame pointer
-+  // The interpreter and adapters will extend the frame of the caller.
-+  // Since oopMaps are based on the sp of the caller before extension
-+  // we need to know that value. However in order to compute the address
-+  // of the return address we need the real "raw" sp. Since sparc already
-+  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
-+  // original sp we use that convention.
++  BLOCK_COMMENT("} arrays_equals_v");
++}
 +
-+  intptr_t*     _unextended_sp;
-+  void adjust_unextended_sp();
++void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2,
++                                         Register result, Register tmp1, Register tmp2, int encForm) {
++  Label DIFFERENCE, DONE, L, loop;
++  bool encLL = encForm == StrIntrinsicNode::LL;
++  bool encLU = encForm == StrIntrinsicNode::LU;
++  bool encUL = encForm == StrIntrinsicNode::UL;
 +
-+  intptr_t* ptr_at_addr(int offset) const {
-+    return (intptr_t*) addr_at(offset);
++  bool str1_isL = encLL || encLU;
++  bool str2_isL = encLL || encUL;
++
++  int minCharsInWord = encLL ? wordSize : wordSize / 2;
++
++  BLOCK_COMMENT("string_compare {");
++
++  // for Lating strings, 1 byte for 1 character
++  // for UTF16 strings, 2 bytes for 1 character
++  if (!str1_isL)
++    sraiw(cnt1, cnt1, 1);
++  if (!str2_isL)
++    sraiw(cnt2, cnt2, 1);
++
++  // if str1 == str2, return the difference
++  // save the minimum of the string lengths in cnt2.
++  sub(result, cnt1, cnt2);
++  bgt(cnt1, cnt2, L);
++  mv(cnt2, cnt1);
++  bind(L);
++
++  if (str1_isL == str2_isL) { // LL or UU
++    element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE);
++    j(DONE);
++  } else { // LU or UL
++    Register strL = encLU ? str1 : str2;
++    Register strU = encLU ? str2 : str1;
++    VectorRegister vstr1 = encLU ? v4 : v0;
++    VectorRegister vstr2 = encLU ? v0 : v4;
++
++    bind(loop);
++    vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2);
++    vle8_v(vstr1, strL);
++    vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4);
++    vzext_vf2(vstr2, vstr1);
++    vle16_v(vstr1, strU);
++    vmsne_vv(v0, vstr2, vstr1);
++    vfirst_m(tmp2, v0);
++    bgez(tmp2, DIFFERENCE);
++    sub(cnt2, cnt2, tmp1);
++    add(strL, strL, tmp1);
++    shadd(strU, tmp1, strU, tmp1, 1);
++    bnez(cnt2, loop);
++    j(DONE);
 +  }
++  bind(DIFFERENCE);
++  slli(tmp1, tmp2, 1);
++  add(str1, str1, str1_isL ? tmp2 : tmp1);
++  add(str2, str2, str2_isL ? tmp2 : tmp1);
++  str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0));
++  str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0));
++  sub(result, tmp1, tmp2);
 +
-+#ifdef ASSERT
-+  // Used in frame::sender_for_{interpreter,compiled}_frame
-+  static void verify_deopt_original_pc(   CompiledMethod* nm, intptr_t* unextended_sp);
-+#endif
++  bind(DONE);
++}
 +
-+ public:
-+  // Constructors
++void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) {
++  Label loop;
++  assert_different_registers(src, dst, len, tmp, t0);
 +
-+  frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
++  BLOCK_COMMENT("byte_array_inflate_v {");
++  bind(loop);
++  vsetvli(tmp, len, Assembler::e8, Assembler::m2);
++  vle8_v(v2, src);
++  vsetvli(t0, len, Assembler::e16, Assembler::m4);
++  vzext_vf2(v0, v2);
++  vse16_v(v0, dst);
++  sub(len, len, tmp);
++  add(src, src, tmp);
++  shadd(dst, tmp, dst, tmp, 1);
++  bnez(len, loop);
++  BLOCK_COMMENT("} byte_array_inflate_v");
++}
 +
-+  frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc);
++// Compress char[] array to byte[].
++// result: the array length if every element in array can be encoded; 0, otherwise.
++void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) {
++  Label done;
++  encode_iso_array_v(src, dst, len, result, tmp);
++  beqz(len, done);
++  mv(result, zr);
++  bind(done);
++}
 +
-+  frame(intptr_t* ptr_sp, intptr_t* ptr_fp);
++// result: the number of elements had been encoded.
++void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) {
++  Label loop, DIFFERENCE, DONE;
 +
-+  void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
++  BLOCK_COMMENT("encode_iso_array_v {");
++  mv(result, 0);
 +
-+  // accessors for the instance variables
-+  // Note: not necessarily the real 'frame pointer' (see real_fp)
-+  intptr_t*   fp() const { return _fp; }
++  bind(loop);
++  mv(tmp, 0xff);
++  vsetvli(t0, len, Assembler::e16, Assembler::m2);
++  vle16_v(v2, src);
++  // if element > 0xff, stop
++  vmsgtu_vx(v1, v2, tmp);
++  vfirst_m(tmp, v1);
++  vmsbf_m(v0, v1);
++  // compress char to byte
++  vsetvli(t0, len, Assembler::e8);
++  vncvt_x_x_w(v1, v2, Assembler::v0_t);
++  vse8_v(v1, dst, Assembler::v0_t);
 +
-+  inline address* sender_pc_addr() const;
++  bgez(tmp, DIFFERENCE);
++  add(result, result, t0);
++  add(dst, dst, t0);
++  sub(len, len, t0);
++  shadd(src, t0, src, t0, 1);
++  bnez(len, loop);
++  j(DONE);
 +
-+  // expression stack tos if we are nested in a java call
-+  intptr_t* interpreter_frame_last_sp() const;
++  bind(DIFFERENCE);
++  add(result, result, tmp);
 +
-+  // helper to update a map with callee-saved RBP
-+  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
++  bind(DONE);
++  BLOCK_COMMENT("} encode_iso_array_v");
++}
 +
-+  // deoptimization support
-+  void interpreter_frame_set_last_sp(intptr_t* ptr_sp);
++void C2_MacroAssembler::count_positives_v(Register ary, Register len, Register result, Register tmp) {
++  Label LOOP, SET_RESULT, DONE;
 +
-+  static jint interpreter_frame_expression_stack_direction() { return -1; }
++  BLOCK_COMMENT("count_positives_v {");
++  mv(result, zr);
 +
-+#endif // CPU_RISCV_FRAME_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
++  bind(LOOP);
++  vsetvli(t0, len, Assembler::e8, Assembler::m4);
++  vle8_v(v0, ary);
++  vmslt_vx(v0, v0, zr);
++  vfirst_m(tmp, v0);
++  bgez(tmp, SET_RESULT);
++  // if tmp == -1, all bytes are positive
++  add(result, result, t0);
++
++  sub(len, len, t0);
++  add(ary, ary, t0);
++  bnez(len, LOOP);
++  j(DONE);
++
++  // add remaining positive bytes count
++  bind(SET_RESULT);
++  add(result, result, tmp);
++
++  bind(DONE);
++  BLOCK_COMMENT("} count_positives_v");
++}
++
++void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1,
++                                              Register ch, Register result,
++                                              Register tmp1, Register tmp2,
++                                              bool isL) {
++  mv(result, zr);
++
++  Label loop, MATCH, DONE;
++  Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16;
++  bind(loop);
++  vsetvli(tmp1, cnt1, sew, Assembler::m4);
++  vlex_v(v0, str1, sew);
++  vmseq_vx(v0, v0, ch);
++  vfirst_m(tmp2, v0);
++  bgez(tmp2, MATCH); // if equal, return index
++
++  add(result, result, tmp1);
++  sub(cnt1, cnt1, tmp1);
++  if (!isL) slli(tmp1, tmp1, 1);
++  add(str1, str1, tmp1);
++  bnez(cnt1, loop);
++
++  mv(result, -1);
++  j(DONE);
++
++  bind(MATCH);
++  add(result, result, tmp2);
++
++  bind(DONE);
++}
++
++// Set dst to NaN if any NaN input.
++void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2,
++                                    bool is_double, bool is_min) {
++  assert_different_registers(dst, src1, src2);
++
++  vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
++
++  is_min ? vfmin_vv(dst, src1, src2)
++         : vfmax_vv(dst, src1, src2);
++
++  vmfne_vv(v0,  src1, src1);
++  vfadd_vv(dst, src1, src1, Assembler::v0_t);
++  vmfne_vv(v0,  src2, src2);
++  vfadd_vv(dst, src2, src2, Assembler::v0_t);
++}
++
++// Set dst to NaN if any NaN input.
++void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst,
++                                           FloatRegister src1, VectorRegister src2,
++                                           VectorRegister tmp1, VectorRegister tmp2,
++                                           bool is_double, bool is_min) {
++  assert_different_registers(src2, tmp1, tmp2);
++
++  Label L_done, L_NaN;
++  vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
++  vfmv_s_f(tmp2, src1);
++
++  is_min ? vfredmin_vs(tmp1, src2, tmp2)
++         : vfredmax_vs(tmp1, src2, tmp2);
++
++  fsflags(zr);
++  // Checking NaNs
++  vmflt_vf(tmp2, src2, src1);
++  frflags(t0);
++  bnez(t0, L_NaN);
++  j(L_done);
++
++  bind(L_NaN);
++  vfmv_s_f(tmp2, src1);
++  vfredsum_vs(tmp1, src2, tmp2);
++
++  bind(L_done);
++  vfmv_f_s(dst, tmp1);
++}
+diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
 new file mode 100644
-index 000000000..5bc6b430c
+index 00000000000..c71df4c101b
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
-@@ -0,0 +1,257 @@
++++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
+@@ -0,0 +1,193 @@
 +/*
-+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -12391,246 +13371,183 @@ index 000000000..5bc6b430c
 + *
 + */
 +
-+#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP
-+#define CPU_RISCV_FRAME_RISCV_INLINE_HPP
-+
-+#include "code/codeCache.hpp"
-+#include "code/vmreg.inline.hpp"
-+
-+// Inline functions for RISCV frames:
-+
-+// Constructors:
-+
-+inline frame::frame() {
-+  _pc = NULL;
-+  _sp = NULL;
-+  _unextended_sp = NULL;
-+  _fp = NULL;
-+  _cb = NULL;
-+  _deopt_state = unknown;
-+}
-+
-+static int spin;
-+
-+inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
-+  intptr_t a = intptr_t(ptr_sp);
-+  intptr_t b = intptr_t(ptr_fp);
-+  _sp = ptr_sp;
-+  _unextended_sp = ptr_sp;
-+  _fp = ptr_fp;
-+  _pc = pc;
-+  assert(pc != NULL, "no pc?");
-+  _cb = CodeCache::find_blob(pc);
-+  adjust_unextended_sp();
-+
-+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
-+  if (original_pc != NULL) {
-+    _pc = original_pc;
-+    _deopt_state = is_deoptimized;
-+  } else {
-+    _deopt_state = not_deoptimized;
-+  }
-+}
-+
-+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
-+  init(ptr_sp, ptr_fp, pc);
-+}
-+
-+inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) {
-+  intptr_t a = intptr_t(ptr_sp);
-+  intptr_t b = intptr_t(ptr_fp);
-+  _sp = ptr_sp;
-+  _unextended_sp = unextended_sp;
-+  _fp = ptr_fp;
-+  _pc = pc;
-+  assert(pc != NULL, "no pc?");
-+  _cb = CodeCache::find_blob(pc);
-+  adjust_unextended_sp();
-+
-+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
-+  if (original_pc != NULL) {
-+    _pc = original_pc;
-+    assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc),
-+           "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
-+    _deopt_state = is_deoptimized;
-+  } else {
-+    _deopt_state = not_deoptimized;
-+  }
-+}
-+
-+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) {
-+  intptr_t a = intptr_t(ptr_sp);
-+  intptr_t b = intptr_t(ptr_fp);
-+  _sp = ptr_sp;
-+  _unextended_sp = ptr_sp;
-+  _fp = ptr_fp;
-+  _pc = (address)(ptr_sp[-1]);
-+
-+  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
-+  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
-+  // unlucky the junk value could be to a zombied method and we'll die on the
-+  // find_blob call. This is also why we can have no asserts on the validity
-+  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
-+  // -> pd_last_frame should use a specialized version of pd_last_frame which could
-+  // call a specilaized frame constructor instead of this one.
-+  // Then we could use the assert below. However this assert is of somewhat dubious
-+  // value.
++#ifndef CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
++#define CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
 +
-+  _cb = CodeCache::find_blob(_pc);
-+  adjust_unextended_sp();
++// C2_MacroAssembler contains high-level macros for C2
 +
-+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
-+  if (original_pc != NULL) {
-+    _pc = original_pc;
-+    _deopt_state = is_deoptimized;
-+  } else {
-+    _deopt_state = not_deoptimized;
-+  }
-+}
++ private:
++  void element_compare(Register r1, Register r2,
++                       Register result, Register cnt,
++                       Register tmp1, Register tmp2,
++                       VectorRegister vr1, VectorRegister vr2,
++                       VectorRegister vrs,
++                       bool is_latin, Label& DONE);
++ public:
 +
-+// Accessors
++  void string_compare(Register str1, Register str2,
++                      Register cnt1, Register cnt2, Register result,
++                      Register tmp1, Register tmp2, Register tmp3,
++                      int ae);
 +
-+inline bool frame::equal(frame other) const {
-+  bool ret =  sp() == other.sp() &&
-+              unextended_sp() == other.unextended_sp() &&
-+              fp() == other.fp() &&
-+              pc() == other.pc();
-+  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
-+  return ret;
-+}
++  void string_indexof_char_short(Register str1, Register cnt1,
++                                 Register ch, Register result,
++                                 bool isL);
 +
-+// Return unique id for this frame. The id must have a value where we can distinguish
-+// identity and younger/older relationship. NULL represents an invalid (incomparable)
-+// frame.
-+inline intptr_t* frame::id(void) const { return unextended_sp(); }
++  void string_indexof_char(Register str1, Register cnt1,
++                           Register ch, Register result,
++                           Register tmp1, Register tmp2,
++                           Register tmp3, Register tmp4,
++                           bool isL);
 +
-+// Relationals on frames based
++  void string_indexof(Register str1, Register str2,
++                      Register cnt1, Register cnt2,
++                      Register tmp1, Register tmp2,
++                      Register tmp3, Register tmp4,
++                      Register tmp5, Register tmp6,
++                      Register result, int ae);
 +
-+// Return true if the frame is younger (more recent activation) than the frame represented by id
-+inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
-+                                                    return this->id() < id ; }
-+// Return true if the frame is older (less recent activation) than the frame represented by id
-+inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
-+                                                    return this->id() > id ; }
++  void string_indexof_linearscan(Register haystack, Register needle,
++                                 Register haystack_len, Register needle_len,
++                                 Register tmp1, Register tmp2,
++                                 Register tmp3, Register tmp4,
++                                 int needle_con_cnt, Register result, int ae);
 +
-+inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
++  void arrays_equals(Register r1, Register r2,
++                     Register tmp3, Register tmp4,
++                     Register tmp5, Register tmp6,
++                     Register result, Register cnt1,
++                     int elem_size);
 +
-+inline intptr_t* frame::link_or_null() const {
-+  intptr_t** ptr = (intptr_t **)addr_at(link_offset);
-+  return os::is_readable_pointer(ptr) ? *ptr : NULL;
-+}
++  void string_equals(Register r1, Register r2,
++                     Register result, Register cnt1,
++                     int elem_size);
 +
-+inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
++  // refer to conditional_branches and float_conditional_branches
++  static const int bool_test_bits = 3;
++  static const int neg_cond_bits = 2;
++  static const int unsigned_branch_mask = 1 << bool_test_bits;
++  static const int double_branch_mask = 1 << bool_test_bits;
 +
-+// Return address
-+inline address* frame::sender_pc_addr() const     { return (address*) addr_at(return_addr_offset); }
-+inline address  frame::sender_pc() const          { return *sender_pc_addr(); }
-+inline intptr_t* frame::sender_sp() const         { return addr_at(sender_sp_offset); }
++  // cmp
++  void cmp_branch(int cmpFlag,
++                  Register op1, Register op2,
++                  Label& label, bool is_far = false);
 +
-+inline intptr_t** frame::interpreter_frame_locals_addr() const {
-+  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
-+}
++  void float_cmp_branch(int cmpFlag,
++                        FloatRegister op1, FloatRegister op2,
++                        Label& label, bool is_far = false);
 +
-+inline intptr_t* frame::interpreter_frame_last_sp() const {
-+  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
-+}
++  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
++                                    Label& L, bool is_far = false);
 +
-+inline intptr_t* frame::interpreter_frame_bcp_addr() const {
-+  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
-+}
++  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
++                               Label& L, bool is_far = false);
 +
-+inline intptr_t* frame::interpreter_frame_mdp_addr() const {
-+  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
-+}
++  void enc_cmove(int cmpFlag,
++                 Register op1, Register op2,
++                 Register dst, Register src);
 +
++  void spill(Register r, bool is64, int offset) {
++    is64 ? sd(r, Address(sp, offset))
++         : sw(r, Address(sp, offset));
++  }
 +
-+// Constant pool cache
++  void spill(FloatRegister f, bool is64, int offset) {
++    is64 ? fsd(f, Address(sp, offset))
++         : fsw(f, Address(sp, offset));
++  }
 +
-+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
-+  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
-+}
++  void spill(VectorRegister v, int offset) {
++    add(t0, sp, offset);
++    vs1r_v(v, t0);
++  }
 +
-+// Method
++  void unspill(Register r, bool is64, int offset) {
++    is64 ? ld(r, Address(sp, offset))
++         : lw(r, Address(sp, offset));
++  }
 +
-+inline Method** frame::interpreter_frame_method_addr() const {
-+  return (Method**)addr_at(interpreter_frame_method_offset);
-+}
++  void unspillu(Register r, bool is64, int offset) {
++    is64 ? ld(r, Address(sp, offset))
++         : lwu(r, Address(sp, offset));
++  }
 +
-+// Mirror
++  void unspill(FloatRegister f, bool is64, int offset) {
++    is64 ? fld(f, Address(sp, offset))
++         : flw(f, Address(sp, offset));
++  }
 +
-+inline oop* frame::interpreter_frame_mirror_addr() const {
-+  return (oop*)addr_at(interpreter_frame_mirror_offset);
-+}
++  void unspill(VectorRegister v, int offset) {
++    add(t0, sp, offset);
++    vl1r_v(v, t0);
++  }
 +
-+// top of expression stack
-+inline intptr_t* frame::interpreter_frame_tos_address() const {
-+  intptr_t* last_sp = interpreter_frame_last_sp();
-+  if (last_sp == NULL) {
-+    return sp();
-+  } else {
-+    // sp() may have been extended or shrunk by an adapter.  At least
-+    // check that we don't fall behind the legal region.
-+    // For top deoptimized frame last_sp == interpreter_frame_monitor_end.
-+    assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
-+    return last_sp;
++  void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) {
++    assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size");
++    unspill(v0, src_offset);
++    spill(v0, dst_offset);
 +  }
-+}
 +
-+inline oop* frame::interpreter_frame_temp_oop_addr() const {
-+  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
-+}
++  void minmax_FD(FloatRegister dst,
++                 FloatRegister src1, FloatRegister src2,
++                 bool is_double, bool is_min);
 +
-+inline int frame::interpreter_frame_monitor_size() {
-+  return BasicObjectLock::size();
-+}
++  // intrinsic methods implemented by rvv instructions
++  void string_equals_v(Register r1, Register r2,
++                       Register result, Register cnt1,
++                       int elem_size);
 +
++  void arrays_equals_v(Register r1, Register r2,
++                       Register result, Register cnt1,
++                       int elem_size);
 +
-+// expression stack
-+// (the max_stack arguments are used by the GC; see class FrameClosure)
++  void string_compare_v(Register str1, Register str2,
++                        Register cnt1, Register cnt2,
++                        Register result,
++                        Register tmp1, Register tmp2,
++                        int encForm);
 +
-+inline intptr_t* frame::interpreter_frame_expression_stack() const {
-+  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
-+  return monitor_end-1;
-+}
++ void clear_array_v(Register base, Register cnt);
 +
++ void byte_array_inflate_v(Register src, Register dst,
++                           Register len, Register tmp);
 +
-+// Entry frames
++ void char_array_compress_v(Register src, Register dst,
++                            Register len, Register result,
++                            Register tmp);
 +
-+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
-+ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
-+}
++ void encode_iso_array_v(Register src, Register dst,
++                         Register len, Register result,
++                         Register tmp);
 +
++ void count_positives_v(Register ary, Register len,
++                        Register result, Register tmp);
 +
-+// Compiled frames
-+inline oop frame::saved_oop_result(RegisterMap* map) const {
-+  oop* result_adr = (oop *)map->location(x10->as_VMReg());
-+  if(result_adr != NULL) {
-+    return (*result_adr);
-+  } else {
-+    ShouldNotReachHere();
-+    return NULL;
-+  }
-+}
++ void string_indexof_char_v(Register str1, Register cnt1,
++                            Register ch, Register result,
++                            Register tmp1, Register tmp2,
++                            bool isL);
 +
-+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
-+  oop* result_adr = (oop *)map->location(x10->as_VMReg());
-+  if(result_adr != NULL) {
-+    *result_adr = obj;
-+  } else {
-+    ShouldNotReachHere();
-+  }
-+}
++ void minmax_FD_v(VectorRegister dst,
++                  VectorRegister src1, VectorRegister src2,
++                  bool is_double, bool is_min);
 +
-+#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
++ void reduce_minmax_FD_v(FloatRegister dst,
++                         FloatRegister src1, VectorRegister src2,
++                         VectorRegister tmp1, VectorRegister tmp2,
++                         bool is_double, bool is_min);
++
++#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
 new file mode 100644
-index 000000000..6f778956d
+index 00000000000..53a41665f4b
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,479 @@
++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
+@@ -0,0 +1,83 @@
 +/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -12653,468 +13570,73 @@ index 000000000..6f778956d
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "gc/g1/g1BarrierSet.hpp"
-+#include "gc/g1/g1BarrierSetAssembler.hpp"
-+#include "gc/g1/g1BarrierSetRuntime.hpp"
-+#include "gc/g1/g1CardTable.hpp"
-+#include "gc/g1/g1ThreadLocalData.hpp"
-+#include "gc/g1/heapRegion.hpp"
-+#include "gc/shared/collectedHeap.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/thread.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_LIRAssembler.hpp"
-+#include "c1/c1_MacroAssembler.hpp"
-+#include "gc/g1/c1/g1BarrierSetC1.hpp"
-+#endif
++#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP
++#define CPU_RISCV_C2_GLOBALS_RISCV_HPP
 +
-+#define __ masm->
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
 +
-+void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                                            Register addr, Register count, RegSet saved_regs) {
-+  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
-+  if (!dest_uninitialized) {
-+    Label done;
-+    Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++// Sets the default values for platform dependent flags used by the server compiler.
++// (see c2_globals.hpp).  Alpha-sorted.
 +
-+    // Is marking active?
-+    if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-+      __ lwu(t0, in_progress);
-+    } else {
-+      assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-+      __ lbu(t0, in_progress);
-+    }
-+    __ beqz(t0, done);
++define_pd_global(bool, BackgroundCompilation,        true);
++define_pd_global(bool, CICompileOSR,                 true);
++define_pd_global(bool, InlineIntrinsics,             true);
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 true);
++define_pd_global(bool, UseOnStackReplacement,        true);
++define_pd_global(bool, ProfileInterpreter,           true);
++define_pd_global(bool, TieredCompilation,            COMPILER1_PRESENT(true) NOT_COMPILER1(false));
++define_pd_global(intx, CompileThreshold,             10000);
 +
-+    __ push_reg(saved_regs, sp);
-+    if (count == c_rarg0) {
-+      if (addr == c_rarg1) {
-+        // exactly backwards!!
-+        __ mv(t0, c_rarg0);
-+        __ mv(c_rarg0, c_rarg1);
-+        __ mv(c_rarg1, t0);
-+      } else {
-+        __ mv(c_rarg1, count);
-+        __ mv(c_rarg0, addr);
-+      }
-+    } else {
-+      __ mv(c_rarg0, addr);
-+      __ mv(c_rarg1, count);
-+    }
-+    if (UseCompressedOops) {
-+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
-+    } else {
-+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
-+    }
-+    __ pop_reg(saved_regs, sp);
-+
-+    __ bind(done);
-+  }
-+}
-+
-+void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                                             Register start, Register count, Register tmp, RegSet saved_regs) {
-+  __ push_reg(saved_regs, sp);
-+  assert_different_registers(start, count, tmp);
-+  assert_different_registers(c_rarg0, count);
-+  __ mv(c_rarg0, start);
-+  __ mv(c_rarg1, count);
-+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
-+  __ pop_reg(saved_regs, sp);
-+}
-+
-+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
-+                                                 Register obj,
-+                                                 Register pre_val,
-+                                                 Register thread,
-+                                                 Register tmp,
-+                                                 bool tosca_live,
-+                                                 bool expand_call) {
-+  // If expand_call is true then we expand the call_VM_leaf macro
-+  // directly to skip generating the check by
-+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
-+  
-+  assert(thread == xthread, "must be");
-+
-+  Label done;
-+  Label runtime;
-+
-+  assert_different_registers(obj, pre_val, tmp, t0);
-+  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
-+
-+  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
-+  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
-+  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
-+
-+  // Is marking active?
-+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width
-+    __ lwu(tmp, in_progress);
-+  } else {
-+    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-+    __ lbu(tmp, in_progress);
-+  }
-+  __ beqz(tmp, done);
-+
-+  // Do we need to load the previous value?
-+  if (obj != noreg) {
-+    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
-+  }
-+
-+  // Is the previous value null?
-+  __ beqz(pre_val, done);
-+
-+  // Can we store original value in the thread's buffer?
-+  // Is index == 0?
-+  // (The index field is typed as size_t.)
-+
-+  __ ld(tmp, index);                       // tmp := *index_adr
-+  __ beqz(tmp, runtime);                   // tmp == 0?
-+                                           // If yes, goto runtime
-+
-+  __ sub(tmp, tmp, wordSize);              // tmp := tmp - wordSize
-+  __ sd(tmp, index);                       // *index_adr := tmp
-+  __ ld(t0, buffer);
-+  __ add(tmp, tmp, t0);                    // tmp := tmp + *buffer_adr
-+
-+  // Record the previous value
-+  __ sd(pre_val, Address(tmp, 0));
-+  __ j(done);
-+
-+  __ bind(runtime);
-+  // save the live input values
-+  RegSet saved = RegSet::of(pre_val);
-+  if (tosca_live) { saved += RegSet::of(x10); }
-+  if (obj != noreg) { saved += RegSet::of(obj); }
-+
-+  __ push_reg(saved, sp);
-+
-+  if (expand_call) {
-+    assert(pre_val != c_rarg1, "smashed arg");
-+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
-+  } else {
-+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
-+  }
-+
-+  __ pop_reg(saved, sp);
-+
-+  __ bind(done);
-+
-+}
-+
-+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
-+                                                  Register store_addr,
-+                                                  Register new_val,
-+                                                  Register thread,
-+                                                  Register tmp,
-+                                                  Register tmp2) {
-+  assert(thread == xthread, "must be");
-+  assert_different_registers(store_addr, new_val, thread, tmp, tmp2,
-+                             t0);
-+  assert(store_addr != noreg && new_val != noreg && tmp != noreg &&
-+         tmp2 != noreg, "expecting a register");
-+
-+  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
-+  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
-+
-+  BarrierSet* bs = BarrierSet::barrier_set();
-+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-+  CardTable* ct = ctbs->card_table();
-+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
-+
-+  Label done;
-+  Label runtime;
-+
-+  // Does store cross heap regions?
-+
-+  __ xorr(tmp, store_addr, new_val);
-+  __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
-+  __ beqz(tmp, done);
-+
-+  // crosses regions, storing NULL?
-+
-+  __ beqz(new_val, done);
-+
-+  // storing region crossing non-NULL, is card already dirty?
-+
-+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
-+  const Register card_addr = tmp;
-+
-+  __ srli(card_addr, store_addr, CardTable::card_shift);
-+
-+  // get the address of the card
-+  __ load_byte_map_base(tmp2);
-+  __ add(card_addr, card_addr, tmp2);
-+  __ lbu(tmp2, Address(card_addr));
-+  __ mv(t0, (int)G1CardTable::g1_young_card_val());
-+  __ beq(tmp2, t0, done);
-+
-+  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
-+
-+  __ membar(MacroAssembler::StoreLoad);
-+
-+  __ lbu(tmp2, Address(card_addr));
-+  __ beqz(tmp2, done);
-+
-+  // storing a region crossing, non-NULL oop, card is clean.
-+  // dirty card and log.
-+
-+  __ sb(zr, Address(card_addr));
-+
-+  __ ld(t0, queue_index);
-+  __ beqz(t0, runtime);
-+  __ sub(t0, t0, wordSize);
-+  __ sd(t0, queue_index);
-+
-+  __ ld(tmp2, buffer);
-+  __ add(t0, tmp2, t0);
-+  __ sd(card_addr, Address(t0, 0));
-+  __ j(done);
-+
-+  __ bind(runtime);
-+  // save the live input values
-+  RegSet saved = RegSet::of(store_addr, new_val);
-+  __ push_reg(saved, sp);
-+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
-+  __ pop_reg(saved, sp);
-+
-+  __ bind(done);
-+}
-+
-+void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
-+  bool on_oop = type == T_OBJECT || type == T_ARRAY;
-+  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
-+  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
-+  bool on_reference = on_weak || on_phantom;
-+  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
-+  if (on_oop && on_reference) {
-+    // RA is live.  It must be saved around calls.
-+    __ enter(); // barrier may call runtime
-+    // Generate the G1 pre-barrier code to log the value of
-+    // the referent field in an SATB buffer.
-+    g1_write_barrier_pre(masm /* masm */,
-+                         noreg /* obj */,
-+                         dst /* pre_val */,
-+                         xthread /* thread */,
-+                         tmp1 /* tmp */,
-+                         true /* tosca_live */,
-+                         true /* expand_call */);
-+    __ leave();
-+  }
-+}
-+
-+void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                         Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
-+  // flatten object address if needed
-+  if (dst.offset() == 0) {
-+    __ mv(tmp3, dst.base());
-+  } else {
-+    __ la(tmp3, dst);
-+  }
-+
-+  g1_write_barrier_pre(masm,
-+                       tmp3 /* obj */,
-+                       tmp2 /* pre_val */,
-+                       xthread /* thread */,
-+                       tmp1  /* tmp */,
-+                       val != noreg /* tosca_live */,
-+                       false /* expand_call */);
-+
-+  if (val == noreg) {
-+    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg, noreg);
-+  } else {
-+    // G1 barrier needs uncompressed oop for region cross check.
-+    Register new_val = val;
-+    if (UseCompressedOops) {
-+      new_val = t1;
-+      __ mv(new_val, val);
-+    }
-+    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
-+    g1_write_barrier_post(masm,
-+                          tmp3 /* store_adr */,
-+                          new_val /* new_val */,
-+                          xthread /* thread */,
-+                          tmp1 /* tmp */,
-+                          tmp2 /* tmp2 */);
-+  }
-+}
-+
-+#ifdef COMPILER1
-+
-+#undef __
-+#define __ ce->masm()->
-+
-+void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
-+  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
-+
-+  // At this point we know that marking is in progress.
-+  // If do_load() is true then we have to emit the
-+  // load of the previous value; otherwise it has already
-+  // been loaded into _pre_val.
-+  __ bind(*stub->entry());
-+
-+  assert(stub->pre_val()->is_register(), "Precondition.");
-+
-+  Register pre_val_reg = stub->pre_val()->as_register();
-+
-+  if (stub->do_load()) {
-+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(),
-+                false /* wide */, false /* unaligned */);
-+  }
-+  __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
-+  ce->store_parameter(stub->pre_val()->as_register(), 0);
-+  __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
-+  __ j(*stub->continuation());
-+}
-+
-+void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
-+  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
-+  __ bind(*stub->entry());
-+  assert(stub->addr()->is_register(), "Precondition");
-+  assert(stub->new_val()->is_register(), "Precondition");
-+  Register new_val_reg = stub->new_val()->as_register();
-+  __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true);
-+  ce->store_parameter(stub->addr()->as_pointer_register(), 0);
-+  __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
-+  __ j(*stub->continuation());
-+}
-+
-+#undef __
-+
-+#define __ sasm->
-+
-+void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
-+  __ prologue("g1_pre_barrier", false);
-+
-+  BarrierSet* bs = BarrierSet::barrier_set();
-+
-+  // arg0 : previous value of memory
-+  const Register pre_val = x10;
-+  const Register thread = xthread;
-+  const Register tmp = t0;
-+
-+  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
-+  Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
-+  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
-+
-+  Label done;
-+  Label runtime;
-+
-+  // Is marking still active?
-+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {  // 4-byte width
-+    __ lwu(tmp, in_progress);
-+  } else {
-+    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-+    __ lbu(tmp, in_progress);
-+  }
-+  __ beqz(tmp, done);
-+
-+  // Can we store original value in the thread's buffer?
-+  __ ld(tmp, queue_index);
-+  __ beqz(tmp, runtime);
-+
-+  __ sub(tmp, tmp, wordSize);
-+  __ sd(tmp, queue_index);
-+  __ ld(t1, buffer);
-+  __ add(tmp, tmp, t1);
-+  __ load_parameter(0, t1);
-+  __ sd(t1, Address(tmp, 0));
-+  __ j(done);
-+
-+  __ bind(runtime);
-+  __ push_call_clobbered_registers();
-+  __ load_parameter(0, pre_val);
-+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
-+  __ pop_call_clobbered_registers();
-+  __ bind(done);
-+
-+  __ epilogue();
-+}
-+
-+void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
-+  __ prologue("g1_post_barrier", false);
-+
-+  // arg0 : store_address
-+  Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp
-+
-+  BarrierSet* bs = BarrierSet::barrier_set();
-+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-+  CardTable* ct = ctbs->card_table();
-+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
-+
-+  Label done;
-+  Label runtime;
-+
-+  // At this point we know new_value is non-NULL and the new_value crosses regions.
-+  // Must check to see if card is already dirty
-+  const Register thread = xthread;
-+
-+  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
-+  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
-+
-+  const Register card_offset = t1;
-+  // RA is free here, so we can use it to hold the byte_map_base.
-+  const Register byte_map_base = ra;
-+
-+  assert_different_registers(card_offset, byte_map_base, t0);
-+
-+  __ load_parameter(0, card_offset);
-+  __ srli(card_offset, card_offset, CardTable::card_shift);
-+  __ load_byte_map_base(byte_map_base);
-+
-+  // Convert card offset into an address in card_addr
-+  Register card_addr = card_offset;
-+  __ add(card_addr, byte_map_base, card_addr);
-+
-+  __ lbu(t0, Address(card_addr, 0));
-+  __ sub(t0, t0, (int)G1CardTable::g1_young_card_val());
-+  __ beqz(t0, done);
-+
-+  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
-+
-+  __ membar(MacroAssembler::StoreLoad);
-+  __ lbu(t0, Address(card_addr, 0));
-+  __ beqz(t0, done);
-+
-+  // storing region crossing non-NULL, card is clean.
-+  // dirty card and log.
-+  __ sb(zr, Address(card_addr, 0));
++define_pd_global(intx, OnStackReplacePercentage,     140);
++define_pd_global(intx, ConditionalMoveLimit,         0);
++define_pd_global(intx, FreqInlineSize,               325);
++define_pd_global(intx, MinJumpTableSize,             10);
++define_pd_global(intx, InteriorEntryAlignment,       16);
++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
++define_pd_global(intx, LoopUnrollLimit,              60);
++define_pd_global(intx, LoopPercentProfileLimit,      10);
++// InitialCodeCacheSize derived from specjbb2000 run.
++define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
++define_pd_global(intx, CodeCacheExpansionSize,       64*K);
 +
-+  __ ld(t0, queue_index);
-+  __ beqz(t0, runtime);
-+  __ sub(t0, t0, wordSize);
-+  __ sd(t0, queue_index);
++// Ergonomics related flags
++define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
++define_pd_global(intx, RegisterCostAreaRatio,        16000);
 +
-+  // Reuse RA to hold buffer_addr
-+  const Register buffer_addr = ra;
++// Peephole and CISC spilling both break the graph, and so makes the
++// scheduler sick.
++define_pd_global(bool, OptoPeephole,                 false);
++define_pd_global(bool, UseCISCSpill,                 false);
++define_pd_global(bool, OptoScheduling,               true);
++define_pd_global(bool, OptoBundling,                 false);
++define_pd_global(bool, OptoRegScheduling,            false);
++define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
++define_pd_global(bool, IdealizeClearArrayNode,       true);
 +
-+  __ ld(buffer_addr, buffer);
-+  __ add(t0, buffer_addr, t0);
-+  __ sd(card_addr, Address(t0, 0));
-+  __ j(done);
++define_pd_global(intx, ReservedCodeCacheSize,        48*M);
++define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
++define_pd_global(intx, ProfiledCodeHeapSize,         22*M);
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
++define_pd_global(uintx, CodeCacheMinBlockLength,     6);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
 +
-+  __ bind(runtime);
-+  __ push_call_clobbered_registers();
-+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
-+  __ pop_call_clobbered_registers();
-+  __ bind(done);
-+  __ epilogue();
-+}
++// Ergonomics related flags
++define_pd_global(bool, NeverActAsServerClassMachine, false);
 +
-+#undef __
++define_pd_global(bool, TrapBasedRangeChecks,         false); // Not needed.
 +
-+#endif // COMPILER1
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
++#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
 new file mode 100644
-index 000000000..7f85e002d
+index 00000000000..cdbd69807be
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,78 @@
++++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
+@@ -0,0 +1,38 @@
 +/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -13138,68 +13660,27 @@ index 000000000..7f85e002d
 + *
 + */
 +
-+#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
-+
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/modRefBarrierSetAssembler.hpp"
-+#include "utilities/macros.hpp"
-+
-+#ifdef COMPILER1
-+class LIR_Assembler;
-+#endif
-+class StubAssembler;
-+class G1PreBarrierStub;
-+class G1PostBarrierStub;
-+
-+class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
-+protected:
-+  void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                       Register addr, Register count, RegSet saved_regs);
-+  void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                        Register start, Register count, Register tmp, RegSet saved_regs);
-+
-+  void g1_write_barrier_pre(MacroAssembler* masm,
-+                            Register obj,
-+                            Register pre_val,
-+                            Register thread,
-+                            Register tmp,
-+                            bool tosca_live,
-+                            bool expand_call);
-+
-+  void g1_write_barrier_post(MacroAssembler* masm,
-+                             Register store_addr,
-+                             Register new_val,
-+                             Register thread,
-+                             Register tmp,
-+                             Register tmp2);
-+
-+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                            Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
-+
-+public:
-+#ifdef COMPILER1
-+  void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
-+  void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
++#include "precompiled.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
 +
-+  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
-+  void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
-+#endif
++// processor dependent initialization for riscv
 +
-+  void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+               Register dst, Address src, Register tmp1, Register tmp_thread);
-+};
++extern void reg_mask_init();
 +
-+#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
++void Compile::pd_compiler2_init() {
++  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
++  reg_mask_init();
++}
+diff --git a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
 new file mode 100644
-index 000000000..203b82744
+index 00000000000..a90d9fdc160
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
-@@ -0,0 +1,226 @@
++++ b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
+@@ -0,0 +1,47 @@
 +/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -13223,215 +13704,37 @@ index 000000000..203b82744
 + */
 +
 +#include "precompiled.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "gc/shared/collectedHeap.hpp"
-+#include "runtime/jniHandles.hpp"
-+#include "runtime/thread.hpp"
-+
-+#define __ masm->
++#include "asm/macroAssembler.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
++#include "opto/output.hpp"
++#include "runtime/sharedRuntime.hpp"
 +
-+void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
-+  // RA is live. It must be saved around calls.
++#define __ masm.
++void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const {
++  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
++         "polling page return stub not created yet");
++  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
++  RuntimeAddress callback_addr(stub);
 +
-+  bool in_heap = (decorators & IN_HEAP) != 0;
-+  bool in_native = (decorators & IN_NATIVE) != 0;
-+  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
-+  switch (type) {
-+    case T_OBJECT:  // fall through
-+    case T_ARRAY: {
-+      if (in_heap) {
-+        if (UseCompressedOops) {
-+          __ lwu(dst, src);
-+          if (is_not_null) {
-+            __ decode_heap_oop_not_null(dst);
-+          } else {
-+            __ decode_heap_oop(dst);
-+          }
-+        } else {
-+          __ ld(dst, src);
-+        }
-+      } else {
-+        assert(in_native, "why else?");
-+        __ ld(dst, src);
-+      }
-+      break;
-+    }
-+    case T_BOOLEAN: __ load_unsigned_byte (dst, src); break;
-+    case T_BYTE:    __ load_signed_byte   (dst, src); break;
-+    case T_CHAR:    __ load_unsigned_short(dst, src); break;
-+    case T_SHORT:   __ load_signed_short  (dst, src); break;
-+    case T_INT:     __ lw                 (dst, src); break;
-+    case T_LONG:    __ ld                 (dst, src); break;
-+    case T_ADDRESS: __ ld                 (dst, src); break;
-+    case T_FLOAT:   __ flw                (f10, src); break;
-+    case T_DOUBLE:  __ fld                (f10, src); break;
-+    default: Unimplemented();
-+  }
-+}
-+
-+void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                   Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
-+  bool in_heap = (decorators & IN_HEAP) != 0;
-+  bool in_native = (decorators & IN_NATIVE) != 0;
-+  switch (type) {
-+    case T_OBJECT: // fall through
-+    case T_ARRAY: {
-+      val = val == noreg ? zr : val;
-+      if (in_heap) {
-+        if (UseCompressedOops) {
-+          assert(!dst.uses(val), "not enough registers");
-+          if (val != zr) {
-+            __ encode_heap_oop(val);
-+          }
-+          __ sw(val, dst);
-+        } else {
-+          __ sd(val, dst);
-+        }
-+      } else {
-+        assert(in_native, "why else?");
-+        __ sd(val, dst);
-+      }
-+      break;
-+    }
-+    case T_BOOLEAN:
-+      __ andi(val, val, 0x1);  // boolean is true if LSB is 1
-+      __ sb(val, dst);
-+      break;
-+    case T_BYTE:    __ sb(val, dst); break;
-+    case T_CHAR:    __ sh(val, dst); break;
-+    case T_SHORT:   __ sh(val, dst); break;
-+    case T_INT:     __ sw(val, dst); break;
-+    case T_LONG:    __ sd(val, dst); break;
-+    case T_ADDRESS: __ sd(val, dst); break;
-+    case T_FLOAT:   __ fsw(f10,  dst); break;
-+    case T_DOUBLE:  __ fsd(f10,  dst); break;
-+    default: Unimplemented();
-+  }
-+
-+}
-+
-+void BarrierSetAssembler::obj_equals(MacroAssembler* masm, Register obj1, Register obj2, Label& equal, bool is_far) {
-+  __ beq(obj1, obj2, equal, is_far);
-+}
-+
-+void BarrierSetAssembler::obj_nequals(MacroAssembler* masm, Register obj1, Register obj2, Label& nequal, bool is_far) {
-+  __ bne(obj1, obj2, nequal, is_far);
-+}
-+
-+void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-+                                                        Register obj, Register tmp, Label& slowpath) {
-+  // If mask changes we need to ensure that the inverse is still encodable as an immediate
-+  STATIC_ASSERT(JNIHandles::weak_tag_mask == 1);
-+  __ andi(obj, obj, ~JNIHandles::weak_tag_mask);
-+  __ ld(obj, Address(obj, 0));             // *obj
-+}
-+
-+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
-+void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
-+                                        Register var_size_in_bytes,
-+                                        int con_size_in_bytes,
-+                                        Register tmp1,
-+                                        Register tmp2,
-+                                        Label& slow_case,
-+                                        bool is_far) {
-+  assert_different_registers(obj, tmp2);
-+  assert_different_registers(obj, var_size_in_bytes);
-+  Register end = tmp2;
-+
-+  __ ld(obj, Address(xthread, JavaThread::tlab_top_offset()));
-+  if (var_size_in_bytes == noreg) {
-+    __ la(end, Address(obj, con_size_in_bytes));
-+  } else {
-+    __ add(end, obj, var_size_in_bytes);
-+  }
-+  __ ld(t0, Address(xthread, JavaThread::tlab_end_offset()));
-+  __ bgtu(end, t0, slow_case, is_far);
-+
-+  // update the tlab top pointer
-+  __ sd(end, Address(xthread, JavaThread::tlab_top_offset()));
-+
-+  // recover var_size_in_bytes if necessary
-+  if (var_size_in_bytes == end) {
-+    __ sub(var_size_in_bytes, var_size_in_bytes, obj);
-+  }
-+}
-+
-+// Defines obj, preserves var_size_in_bytes
-+void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
-+                                        Register var_size_in_bytes,
-+                                        int con_size_in_bytes,
-+                                        Register tmp1,
-+                                        Label& slow_case,
-+                                        bool is_far) {
-+  assert_different_registers(obj, var_size_in_bytes, tmp1);
-+  if (!Universe::heap()->supports_inline_contig_alloc()) {
-+    __ j(slow_case);
-+  } else {
-+    Register end = tmp1;
-+    Label retry;
-+    __ bind(retry);
-+
-+    // Get the current end of the heap
-+    ExternalAddress address_end((address) Universe::heap()->end_addr());
-+    {
-+      int32_t offset;
-+      __ la_patchable(t1, address_end, offset);
-+      __ ld(t1, Address(t1, offset));
-+    }
-+
-+    // Get the current top of the heap
-+    ExternalAddress address_top((address) Universe::heap()->top_addr());
-+    {
-+      int32_t offset;
-+      __ la_patchable(t0, address_top, offset);
-+      __ addi(t0, t0, offset);
-+      __ lr_d(obj, t0, Assembler::aqrl);
-+    }
-+
-+    // Adjust it my the size of our new object
-+    if (var_size_in_bytes == noreg) {
-+      __ la(end, Address(obj, con_size_in_bytes));
-+    } else {
-+      __ add(end, obj, var_size_in_bytes);
-+    }
-+
-+    // if end < obj then we wrapped around high memory
-+    __ bltu(end, obj, slow_case, is_far);
-+
-+    __ bgtu(end, t1, slow_case, is_far);
-+
-+    // If heap_top hasn't been changed by some other thread, update it.
-+    __ sc_d(t1, end, t0, Assembler::rl);
-+    __ bnez(t1, retry);
-+    
-+    incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1);
-+  }
-+}
-+
-+void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
-+                                               Register var_size_in_bytes,
-+                                               int con_size_in_bytes,
-+                                               Register tmp1) {
-+  assert(tmp1->is_valid(), "need temp reg");
-+
-+  __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
-+  if (var_size_in_bytes->is_valid()) {
-+    __ add(tmp1, tmp1, var_size_in_bytes);
-+  } else {
-+    __ add(tmp1, tmp1, con_size_in_bytes);
-+  }
-+  __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
++  __ bind(entry->_stub_label);
++  InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
++  masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec());
++  __ la(t0, safepoint_pc.target());
++  __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
++  __ far_jump(callback_addr);
 +}
-diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
++#undef __
+diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
 new file mode 100644
-index 000000000..964fc28be
+index 00000000000..14a68b45026
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
-@@ -0,0 +1,75 @@
++++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
+@@ -0,0 +1,36 @@
 +/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -13454,65 +13757,26 @@ index 000000000..964fc28be
 + *
 + */
 +
-+#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
-+
-+#include "asm/macroAssembler.hpp"
-+#include "memory/allocation.hpp"
-+#include "oops/access.hpp"
++#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP
++#define CPU_RISCV_CODEBUFFER_RISCV_HPP
 +
-+class BarrierSetAssembler: public CHeapObj<mtGC> {
 +private:
-+  void incr_allocated_bytes(MacroAssembler* masm,
-+                            Register var_size_in_bytes, int con_size_in_bytes,
-+                            Register t1 = noreg);
++  void pd_initialize() {}
 +
 +public:
-+  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register src, Register dst, Register count, RegSet saved_regs) {}
-+  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register start, Register end, Register tmp, RegSet saved_regs) {}
-+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                       Register dst, Address src, Register tmp1, Register tmp_thread);
-+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                        Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
-+  virtual void obj_equals(MacroAssembler* masm, Register obj1, Register obj2, Label& equal, bool is_far = false);
-+  virtual void obj_nequals(MacroAssembler* masm, Register obj1, Register obj2, Label& nequal, bool is_far = false);
-+  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-+                                             Register obj, Register tmp, Label& slowpath);
-+
-+  virtual void tlab_allocate(MacroAssembler* masm,
-+    Register obj,                      // result: pointer to object after successful allocation
-+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
-+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
-+    Register tmp1,                     // temp register
-+    Register tmp2,                     // temp register
-+    Label&   slow_case,                // continuation point if fast allocation fails
-+    bool is_far = false                // the distance of label slowcase could be more than 12KiB in C1
-+  );
-+
-+  void eden_allocate(MacroAssembler* masm,
-+    Register obj,                      // result: pointer to object after successful allocation
-+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
-+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
-+    Register tmp1,                     // temp register
-+    Label&   slow_case,                // continuation point if fast allocation fails
-+    bool is_far = false                // the distance of label slowcase could be more than 12KiB in C1
-+  );
-+  virtual void barrier_stubs_init() {}
-+  virtual ~BarrierSetAssembler() {}
-+};
++  void flush_bundle(bool start_new_bundle) {}
 +
-+#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
++#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
 new file mode 100644
-index 000000000..1720488fb
+index 00000000000..75bc4be7840
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,120 @@
++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+@@ -0,0 +1,149 @@
 +/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -13537,107 +13801,136 @@ index 000000000..1720488fb
 +
 +#include "precompiled.hpp"
 +#include "asm/macroAssembler.inline.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/cardTable.hpp"
-+#include "gc/shared/cardTableBarrierSet.hpp"
-+#include "gc/shared/cardTableBarrierSetAssembler.hpp"
-+#include "interpreter/interp_masm.hpp"
-+
-+#define __ masm->
++#include "code/compiledIC.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nmethod.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/safepoint.hpp"
 +
++// ----------------------------------------------------------------------------
 +
-+void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) {
-+  assert_different_registers(obj, tmp);
-+  BarrierSet* bs = BarrierSet::barrier_set();
-+  assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
++#define __ _masm.
++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
++  precond(cbuf.stubs()->start() != badAddress);
++  precond(cbuf.stubs()->end() != badAddress);
++  // Stub is fixed up when the corresponding call is converted from
++  // calling compiled code to calling interpreted code.
++  // mv xmethod, 0
++  // jalr -4 # to self
 +
-+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-+  CardTable* ct = ctbs->card_table();
-+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++  if (mark == NULL) {
++    mark = cbuf.insts_mark();  // Get mark within main instrs section.
++  }
 +
-+  __ srli(obj, obj, CardTable::card_shift);
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a stub.
++  MacroAssembler _masm(&cbuf);
 +
-+  assert(CardTable::dirty_card_val() == 0, "must be");
++  address base = __ start_a_stub(to_interp_stub_size());
++  int offset = __ offset();
++  if (base == NULL) {
++    return NULL;  // CodeBuffer::expand failed
++  }
++  // static stub relocation stores the instruction address of the call
++  __ relocate(static_stub_Relocation::spec(mark));
 +
-+  __ load_byte_map_base(tmp);
-+  __ add(tmp, obj, tmp);
++  __ emit_static_call_stub();
 +
-+  if (UseCondCardMark) {
-+    Label L_already_dirty;
-+    __ membar(MacroAssembler::StoreLoad);
-+    __ lbu(t1,  Address(tmp));
-+    __ beqz(t1, L_already_dirty);
-+    __ sb(zr, Address(tmp));
-+    __ bind(L_already_dirty);
-+  } else {
-+    if (ct->scanned_concurrently()) {
-+      __ membar(MacroAssembler::StoreStore);
-+    }
-+    __ sb(zr, Address(tmp));
-+  }
++  assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big");
++  __ end_a_stub();
++  return base;
 +}
++#undef __
 +
-+void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                                                    Register start, Register count, Register tmp, RegSet saved_regs) {
-+  assert_different_registers(start, tmp);
-+  assert_different_registers(count, tmp);
-+  BarrierSet* bs = BarrierSet::barrier_set();
-+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-+  CardTable* ct = ctbs->card_table();
++int CompiledStaticCall::to_interp_stub_size() {
++  // fence_i + fence* + (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr
++  return NativeFenceI::instruction_size() + 12 * NativeInstruction::instruction_size;
++}
 +
-+  Label L_loop, L_done;
-+  const Register end = count;
++int CompiledStaticCall::to_trampoline_stub_size() {
++  // Somewhat pessimistically, we count 4 instructions here (although
++  // there are only 3) because we sometimes emit an alignment nop.
++  // Trampoline stubs are always word aligned.
++  return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size;
++}
 +
-+  __ beqz(count, L_done); // zero count - nothing to do
-+  // end = start + count << LogBytesPerHeapOop
-+  __ shadd(end, count, start, count, LogBytesPerHeapOop);
-+  __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive
++// Relocation entries for call stub, compiled java to interpreter.
++int CompiledStaticCall::reloc_to_interp_stub() {
++  return 4; // 3 in emit_to_interp_stub + 1 in emit_call
++}
 +
-+  __ srli(start, start, CardTable::card_shift);
-+  __ srli(end, end, CardTable::card_shift);
-+  __ sub(count, end, start); // number of bytes to copy
++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
++  address stub = find_stub();
++  guarantee(stub != NULL, "stub not found");
 +
-+  __ load_byte_map_base(tmp);
-+  __ add(start, start, tmp);
-+  if (ct->scanned_concurrently()) {
-+    __ membar(MacroAssembler::StoreStore);
++  if (TraceICs) {
++    ResourceMark rm;
++    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
++                  p2i(instruction_address()),
++                  callee->name_and_sig_as_C_string());
 +  }
 +
-+  __ bind(L_loop);
-+  __ add(tmp, start, count);
-+  __ sb(zr, Address(tmp));
-+  __ sub(count, count, 1);
-+  __ bgez(count, L_loop);
-+  __ bind(L_done);
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder
++    = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
++#ifdef ASSERT
++  NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
++
++  verify_mt_safe(callee, entry, method_holder, jump);
++#endif
++  // Update stub.
++  method_holder->set_data((intptr_t)callee());
++  NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry);
++  ICache::invalidate_range(stub, to_interp_stub_size());
++  // Update jump to call.
++  set_destination_mt_safe(stub);
 +}
 +
-+void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                                Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
-+  bool in_heap = (decorators & IN_HEAP) != 0;
-+  bool is_array = (decorators & IS_ARRAY) != 0;
-+  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
-+  bool precise = is_array || on_anonymous;
++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++  // Reset stub.
++  address stub = static_stub->addr();
++  assert(stub != NULL, "stub not found");
++  assert(CompiledICLocker::is_safe(stub), "mt unsafe call");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder
++    = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
++  method_holder->set_data(0);
++  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
++  jump->set_jump_destination((address)-1);
++}
 +
-+  bool needs_post_barrier = val != noreg && in_heap;
-+  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg, noreg);
-+  if (needs_post_barrier) {
-+    // flatten object address if needed
-+    if (!precise || dst.offset() == 0) {
-+      store_check(masm, dst.base(), tmp3);
-+    } else {
-+      __ la(tmp3, dst);
-+      store_check(masm, tmp3, t0);
-+    }
-+  }
++//-----------------------------------------------------------------------------
++// Non-product mode code
++#ifndef PRODUCT
++
++void CompiledDirectStaticCall::verify() {
++  // Verify call.
++  _call->verify();
++  _call->verify_alignment();
++
++  // Verify stub.
++  address stub = find_stub();
++  assert(stub != NULL, "no stub found for static call");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder
++    = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
++  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
++
++  // Verify state.
++  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
 +}
-diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
++
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp
 new file mode 100644
-index 000000000..a5b3f9fe8
+index 00000000000..bceadcc5dcc
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,43 @@
++++ b/src/hotspot/cpu/riscv/copy_riscv.hpp
+@@ -0,0 +1,136 @@
 +/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -13661,92 +13954,125 @@ index 000000000..a5b3f9fe8
 + *
 + */
 +
-+#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
++#ifndef CPU_RISCV_COPY_RISCV_HPP
++#define CPU_RISCV_COPY_RISCV_HPP
 +
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/modRefBarrierSetAssembler.hpp"
++#include OS_CPU_HEADER(copy)
 +
-+class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
-+protected:
-+  void store_check(MacroAssembler* masm, Register obj, Register tmp);
++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
++  julong* to = (julong*) tohw;
++  julong  v  = ((julong) value << 32) | value;
++  while (count-- > 0) {
++    *to++ = v;
++  }
++}
 +
-+  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                                Register start, Register count, Register tmp, RegSet saved_regs);
-+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                            Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
++  pd_fill_to_words(tohw, count, value);
++}
 +
-+};
++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
++  (void)memset(to, value, count);
++}
 +
-+#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
-new file mode 100644
-index 000000000..b82275297
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,54 @@
-+/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++static void pd_zero_to_words(HeapWord* tohw, size_t count) {
++  pd_fill_to_words(tohw, count, 0);
++}
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "gc/shared/modRefBarrierSetAssembler.hpp"
++static void pd_zero_to_bytes(void* to, size_t count) {
++  (void)memset(to, 0, count);
++}
 +
-+#define __ masm->
++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  (void)memmove(to, from, count * HeapWordSize);
++}
 +
-+void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                                   Register src, Register dst, Register count, RegSet saved_regs) {
-+  if (is_oop) {
-+    gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs);
++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++    case 8:  to[7] = from[7];   // fall through
++    case 7:  to[6] = from[6];   // fall through
++    case 6:  to[5] = from[5];   // fall through
++    case 5:  to[4] = from[4];   // fall through
++    case 4:  to[3] = from[3];   // fall through
++    case 3:  to[2] = from[2];   // fall through
++    case 2:  to[1] = from[1];   // fall through
++    case 1:  to[0] = from[0];   // fall through
++    case 0:  break;
++    default:
++      memcpy(to, from, count * HeapWordSize);
++      break;
 +  }
 +}
 +
-+void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                                   Register start, Register count, Register tmp,
-+                                                   RegSet saved_regs) {
-+  if (is_oop) {
-+    gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs);
-+  }
++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
++  shared_disjoint_words_atomic(from, to, count);
 +}
 +
-+void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                         Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
-+  if (type == T_OBJECT || type == T_ARRAY) {
-+    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
-+  } else {
-+    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
-+  }
++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_words(from, to, count);
 +}
-diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
-new file mode 100644
-index 000000000..df206cc87
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,55 @@
-+/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++
++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_disjoint_words(from, to, count);
++}
++
++static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
++  (void)memmove(to, from, count);
++}
++
++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
++  pd_conjoint_bytes(from, to, count);
++}
++
++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
++  _Copy_conjoint_jshorts_atomic(from, to, count);
++}
++
++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
++  _Copy_conjoint_jints_atomic(from, to, count);
++}
++
++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
++  _Copy_conjoint_jlongs_atomic(from, to, count);
++}
++
++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size.");
++  _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
++}
++
++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_bytes(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_jshorts(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_jints(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_jlongs(from, to, count);
++}
++
++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
++  assert(!UseCompressedOops, "foo!");
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
++  _Copy_arrayof_conjoint_jlongs(from, to, count);
++}
++
++#endif // CPU_RISCV_COPY_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
+new file mode 100644
+index 00000000000..b0e5560c906
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
+@@ -0,0 +1,58 @@
++/*
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -13770,44 +14096,46 @@ index 000000000..df206cc87
 + *
 + */
 +
-+#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
++#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP
++#define CPU_RISCV_DISASSEMBLER_RISCV_HPP
 +
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
++static int pd_instruction_alignment() {
++  return 1;
++}
 +
-+// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
-+// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
-+// accesses, which are overridden in the concrete BarrierSetAssembler.
++static const char* pd_cpu_opts() {
++  return "";
++}
 +
-+class ModRefBarrierSetAssembler: public BarrierSetAssembler {
-+protected:
-+  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                               Register addr, Register count, RegSet saved_regs) {}
-+  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                                Register start, Register count, Register tmp, RegSet saved_regs) {}
++// Returns address of n-th instruction preceding addr,
++// NULL if no preceding instruction can be found.
++// On riscv, we assume a constant instruction length.
++// It might be beneficial to check "is_readable" as we do on ppc and s390.
++static address find_prev_instr(address addr, int n_instr) {
++  return addr - Assembler::instruction_size * n_instr;
++}
 +
-+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                            Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) = 0;
++// special-case instruction decoding.
++// There may be cases where the binutils disassembler doesn't do
++// the perfect job. In those cases, decode_instruction0 may kick in
++// and do it right.
++// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)"
++static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) {
++  return here;
++}
 +
-+public:
-+  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register src, Register dst, Register count, RegSet saved_regs);
-+  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register start, Register count, Register tmp, RegSet saved_regs);
-+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                        Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
-+};
++// platform-specific instruction annotations (like value of loaded constants)
++static void annotate(address pc, outputStream* st) {}
 +
-+#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
++#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
 new file mode 100644
-index 000000000..6657f1be0
+index 00000000000..5c700be9c91
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
-@@ -0,0 +1,124 @@
++++ b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
+@@ -0,0 +1,44 @@
 +/*
-+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -13832,113 +14160,72 @@ index 000000000..6657f1be0
 + */
 +
 +#include "precompiled.hpp"
-+#include "c1/c1_LIRAssembler.hpp"
-+#include "c1/c1_MacroAssembler.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
-+
-+#define __ masm->masm()->
-+
-+void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) {
-+  Register addr = _addr->as_register_lo();
-+  Register newval = _new_value->as_register();
-+  Register cmpval = _cmp_value->as_register();
-+  Register tmp1 = _tmp1->as_register();
-+  Register tmp2 = _tmp2->as_register();
-+  Register result = result_opr()->as_register();
-+
-+  ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1);
-+
-+  if (UseCompressedOops) {
-+    __ encode_heap_oop(tmp1, cmpval);
-+    cmpval = tmp1;
-+    __ encode_heap_oop(tmp2, newval);
-+    newval = tmp2;
-+  }
++#include "prims/foreign_globals.hpp"
++#include "utilities/debug.hpp"
 +
-+  ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq,
-+                                                 /* release */ Assembler::rl, /* is_cae */ false, result);
-+  if (UseBarriersForVolatile) {
-+    // The membar here is necessary to prevent reordering between the
-+    // release store in the CAS above and a subsequent volatile load.
-+    // However for !UseBarriersForVolatile, C1 inserts a full barrier before
-+    // volatile loads which means we don't need an additional barrier
-+    // here (see LIRGenerator::volatile_field_load()).
-+    __ membar(MacroAssembler::AnyAny);
-+  }
++// Stubbed out, implement later
++const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const {
++  Unimplemented();
++  return {};
 +}
 +
-+#undef __
-+
-+#ifdef ASSERT
-+#define __ gen->lir(__FILE__, __LINE__)->
-+#else
-+#define __ gen->lir()->
-+#endif
-+
-+LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) {
-+  BasicType bt = access.type();
-+  if (access.is_oop()) {
-+    LIRGenerator *gen = access.gen();
-+    if (ShenandoahSATBBarrier) {
-+      pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(),
-+                  LIR_OprFact::illegalOpr /* pre_val */);
-+    }
-+    if (ShenandoahCASBarrier) {
-+      cmp_value.load_item();
-+      new_value.load_item();
-+
-+      LIR_Opr tmp1 = gen->new_register(T_OBJECT);
-+      LIR_Opr tmp2 = gen->new_register(T_OBJECT);
-+      LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base();
-+      LIR_Opr result = gen->new_register(T_INT);
-+
-+      __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result));
-+      return result;
-+    }
-+  }
-+  return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
++const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const {
++  Unimplemented();
++  return {};
 +}
 +
-+LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) {
-+  LIRGenerator* gen = access.gen();
-+  BasicType type = access.type();
-+
-+  LIR_Opr result = gen->new_register(type);
-+  value.load_item();
-+  LIR_Opr value_opr = value.result();
-+
-+  if (access.is_oop()) {
-+    value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators());
-+  }
++const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const {
++  ShouldNotCallThis();
++  return {};
++}
+diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
+new file mode 100644
+index 00000000000..3ac89752c27
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
+@@ -0,0 +1,32 @@
++/*
++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type");
-+  LIR_Opr tmp = gen->new_register(T_INT);
-+  __ xchg(access.resolved_addr(), value_opr, result, tmp);
++#ifndef CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
++#define CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
 +
-+  if (access.is_oop()) {
-+    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0));
-+    LIR_Opr tmp_opr = gen->new_register(type);
-+    __ move(result, tmp_opr);
-+    result = tmp_opr;
-+    if (ShenandoahSATBBarrier) {
-+      pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr,
-+                  result /* pre_val */);
-+    }
-+  }
++class ABIDescriptor {};
++class BufferLayout {};
 +
-+  return result;
-+}
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
++#endif // CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
 new file mode 100644
-index 000000000..1bc01e454
+index 00000000000..6e38960598a
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,743 @@
++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
+@@ -0,0 +1,697 @@
 +/*
-+ * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -13962,731 +14249,684 @@ index 000000000..1bc01e454
 + */
 +
 +#include "precompiled.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-+#include "gc/shenandoah/shenandoahForwarding.hpp"
-+#include "gc/shenandoah/shenandoahHeap.hpp"
-+#include "gc/shenandoah/shenandoahHeapRegion.hpp"
-+#include "gc/shenandoah/shenandoahRuntime.hpp"
-+#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
-+#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
++#include "compiler/oopMap.hpp"
 +#include "interpreter/interpreter.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/thread.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "oops/markWord.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/monitorChunk.hpp"
++#include "runtime/os.inline.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stackWatermarkSet.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_riscv.inline.hpp"
 +#ifdef COMPILER1
-+#include "c1/c1_LIRAssembler.hpp"
-+#include "c1/c1_MacroAssembler.hpp"
-+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "runtime/vframeArray.hpp"
 +#endif
 +
-+#define __ masm->
++#ifdef ASSERT
++void RegisterMap::check_location_valid() {
++}
++#endif
 +
-+address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL;
 +
-+void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                                       Register src, Register dst, Register count, RegSet saved_regs) {
-+  if (is_oop) {
-+    bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
-+    if ((ShenandoahSATBBarrier && !dest_uninitialized) ||
-+        ShenandoahIUBarrier || ShenandoahLoadRefBarrier) {
-+      Label done;
++// Profiling/safepoint support
 +
-+      // Avoid calling runtime if count == 0
-+      __ beqz(count, done);
++bool frame::safe_for_sender(JavaThread *thread) {
++  address   addr_sp = (address)_sp;
++  address   addr_fp = (address)_fp;
++  address   unextended_sp = (address)_unextended_sp;
 +
-+      // Is GC active?
-+      Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
-+      assert_different_registers(src, dst, count, t0);
++  // consider stack guards when trying to determine "safe" stack pointers
++  // sp must be within the usable part of the stack (not in guards)
++  if (!thread->is_in_usable_stack(addr_sp)) {
++    return false;
++  }
 +
-+      __ lbu(t0, gc_state);
-+      if (ShenandoahSATBBarrier && dest_uninitialized) {
-+        __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED);
-+        __ beqz(t0, done);
-+      } else {
-+        __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
-+        __ beqz(t0, done);
-+      }
++  // When we are running interpreted code the machine stack pointer, SP, is
++  // set low enough so that the Java expression stack can grow and shrink
++  // without ever exceeding the machine stack bounds.  So, ESP >= SP.
 +
-+      __ push_reg(saved_regs, sp);
-+      if (UseCompressedOops) {
-+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),
-+                        src, dst, count);
-+      } else {
-+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count);
-+      }
-+      __ pop_reg(saved_regs, sp);
-+      __ bind(done);
-+    }
-+  }
-+}
++  // When we call out of an interpreted method, SP is incremented so that
++  // the space between SP and ESP is removed.  The SP saved in the callee's
++  // frame is the SP *before* this increment.  So, when we walk a stack of
++  // interpreter frames the sender's SP saved in a frame might be less than
++  // the SP at the point of call.
 +
-+void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
-+                                                                 Register obj,
-+                                                                 Register pre_val,
-+                                                                 Register thread,
-+                                                                 Register tmp,
-+                                                                 bool tosca_live,
-+                                                                 bool expand_call) {
-+  if (ShenandoahSATBBarrier) {
-+    satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call);
++  // So unextended sp must be within the stack but we need not to check
++  // that unextended sp >= sp
++
++  if (!thread->is_in_full_stack_checked(unextended_sp)) {
++    return false;
 +  }
-+}
 +
-+void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
-+                                                           Register obj,
-+                                                           Register pre_val,
-+                                                           Register thread,
-+                                                           Register tmp,
-+                                                           bool tosca_live,
-+                                                           bool expand_call) {
-+  // If expand_call is true then we expand the call_VM_leaf macro
-+  // directly to skip generating the check by
-+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
-+  assert(thread == xthread, "must be");
++  // an fp must be within the stack and above (but not equal) sp
++  // second evaluation on fp+ is added to handle situation where fp is -1
++  bool fp_safe = thread->is_in_stack_range_excl(addr_fp, addr_sp) &&
++                 thread->is_in_full_stack_checked(addr_fp + (return_addr_offset * sizeof(void*)));
 +
-+  Label done;
-+  Label runtime;
++  // We know sp/unextended_sp are safe only fp is questionable here
 +
-+  assert_different_registers(obj, pre_val, tmp, t0);
-+  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
++  // If the current frame is known to the code cache then we can attempt to
++  // to construct the sender and do some validation of it. This goes a long way
++  // toward eliminating issues when we get in frame construction code
 +
-+  Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset()));
-+  Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
-+  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
++  if (_cb != NULL) {
 +
-+  // Is marking active?
-+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-+    __ lwu(tmp, in_progress);
-+  } else {
-+    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-+    __ lbu(tmp, in_progress);
-+  }
-+  __ beqz(tmp, done);
++    // First check if frame is complete and tester is reliable
++    // Unfortunately we can only check frame complete for runtime stubs and nmethod
++    // other generic buffer blobs are more problematic so we just assume they are
++    // ok. adapter blobs never have a frame complete and are never ok.
 +
-+  // Do we need to load the previous value?
-+  if (obj != noreg) {
-+    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
-+  }
++    if (!_cb->is_frame_complete_at(_pc)) {
++      if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
++        return false;
++      }
++    }
 +
-+  // Is the previous value null?
-+  __ beqz(pre_val, done);
++    // Could just be some random pointer within the codeBlob
++    if (!_cb->code_contains(_pc)) {
++      return false;
++    }
 +
-+  // Can we store original value in the thread's buffer?
-+  // Is index == 0?
-+  // (The index field is typed as size_t.)
-+  __ ld(tmp, index);                        // tmp := *index_adr
-+  __ beqz(tmp, runtime);                    // tmp == 0? If yes, goto runtime
++    // Entry frame checks
++    if (is_entry_frame()) {
++      // an entry frame must have a valid fp.
++      return fp_safe && is_entry_frame_valid(thread);
++    }
 +
-+  __ sub(tmp, tmp, wordSize);               // tmp := tmp - wordSize
-+  __ sd(tmp, index);                        // *index_adr := tmp
-+  __ ld(t0, buffer);
-+  __ add(tmp, tmp, t0);                     // tmp := tmp + *buffer_adr
++    intptr_t* sender_sp = NULL;
++    intptr_t* sender_unextended_sp = NULL;
++    address   sender_pc = NULL;
++    intptr_t* saved_fp =  NULL;
 +
-+  // Record the previous value
-+  __ sd(pre_val, Address(tmp, 0));
-+  __ j(done);
++    if (is_interpreted_frame()) {
++      // fp must be safe
++      if (!fp_safe) {
++        return false;
++      }
 +
-+  __ bind(runtime);
-+  // save the live input values
-+  RegSet saved = RegSet::of(pre_val);
-+  if (tosca_live) saved += RegSet::of(x10);
-+  if (obj != noreg) saved += RegSet::of(obj);
++      sender_pc = (address)this->fp()[return_addr_offset];
++      // for interpreted frames, the value below is the sender "raw" sp,
++      // which can be different from the sender unextended sp (the sp seen
++      // by the sender) because of current frame local variables
++      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
++      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
++      saved_fp = (intptr_t*) this->fp()[link_offset];
++    } else {
++      // must be some sort of compiled/runtime frame
++      // fp does not have to be safe (although it could be check for c1?)
 +
-+  __ push_reg(saved, sp);
++      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
++      if (_cb->frame_size() <= 0) {
++        return false;
++      }
 +
-+  // Calling the runtime using the regular call_VM_leaf mechanism generates
-+  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
-+  // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL.
-+  //
-+  // If we care generating the pre-barrier without a frame (e.g. in the
-+  // intrinsified Reference.get() routine) then ebp might be pointing to
-+  // the caller frame and so this check will most likely fail at runtime.
-+  //
-+  // Expanding the call directly bypasses the generation of the check.
-+  // So when we do not have have a full interpreter frame on the stack
-+  // expand_call should be passed true.
-+  if (expand_call) {
-+    assert(pre_val != c_rarg1, "smashed arg");
-+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
-+  } else {
-+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
-+  }
++      sender_sp = _unextended_sp + _cb->frame_size();
++      // Is sender_sp safe?
++      if (!thread->is_in_full_stack_checked((address)sender_sp)) {
++        return false;
++      }
 +
-+  __ pop_reg(saved, sp);
++      sender_unextended_sp = sender_sp;
++      sender_pc = (address) *(sender_sp - 1);
++      saved_fp = (intptr_t*) *(sender_sp - 2);
++    }
 +
-+  __ bind(done);
-+}
 +
-+void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
-+  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
++    // If the potential sender is the interpreter then we can do some more checking
++    if (Interpreter::contains(sender_pc)) {
 +
-+  Label is_null;
-+  __ beqz(dst, is_null);
-+  resolve_forward_pointer_not_null(masm, dst, tmp);
-+  __ bind(is_null);
-+}
++      // fp is always saved in a recognizable place in any code we generate. However
++      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp
++      // is really a frame pointer.
++      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
++        return false;
++      }
 +
-+// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely
-+// passed in.
-+void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
-+  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
-+  // The below loads the mark word, checks if the lowest two bits are
-+  // set, and if so, clear the lowest two bits and copy the result
-+  // to dst. Otherwise it leaves dst alone.
-+  // Implementing this is surprisingly awkward. I do it here by:
-+  // - Inverting the mark word
-+  // - Test lowest two bits == 0
-+  // - If so, set the lowest two bits
-+  // - Invert the result back, and copy to dst
-+  RegSet savedRegs = RegSet::of(t2);
-+  bool borrow_reg = (tmp == noreg);
-+  if (borrow_reg) {
-+    // No free registers available. Make one useful.
-+    tmp = t0;
-+    if (tmp == dst) {
-+      tmp = t1;
++      // construct the potential sender
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      return sender.is_interpreted_frame_valid(thread);
 +    }
-+    savedRegs += RegSet::of(tmp);
-+  }
 +
-+  assert_different_registers(tmp, dst, t2);
-+  __ push_reg(savedRegs, sp);
++    // We must always be able to find a recognizable pc
++    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
++    if (sender_pc == NULL || sender_blob == NULL) {
++      return false;
++    }
 +
-+  Label done;
-+  __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
-+  __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1
-+  __ andi(t2, tmp, markOopDesc::lock_mask_in_place);
-+  __ bnez(t2, done);
-+  __ ori(tmp, tmp, markOopDesc::marked_value);
-+  __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1
-+  __ bind(done);
++    // Could be a zombie method
++    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
++      return false;
++    }
 +
-+  __ pop_reg(savedRegs, sp);
-+}
++    // Could just be some random pointer within the codeBlob
++    if (!sender_blob->code_contains(sender_pc)) {
++      return false;
++    }
 +
-+void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm,
-+                                                                    Register dst, Address load_addr) {
-+  assert(ShenandoahLoadRefBarrier, "Should be enabled");
-+  assert(dst != t1 && load_addr.base() != t1, "need t1");
-+  assert_different_registers(load_addr.base(), t1, t2);
++    // We should never be able to see an adapter if the current frame is something from code cache
++    if (sender_blob->is_adapter_blob()) {
++      return false;
++    }
 +
-+  Label done;
-+  __ enter();
-+  Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
-+  __ lbu(t1, gc_state);
++    // Could be the call_stub
++    if (StubRoutines::returns_to_call_stub(sender_pc)) {
++      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
++        return false;
++      }
 +
-+  // Check for heap stability
-+  __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED);
-+  __ beqz(t1, done);
-+
-+  // use x11 for load address
-+  Register result_dst = dst;
-+  if (dst == x11) {
-+    __ mv(t1, dst);
-+    dst = t1;
-+  }
-+
-+  // Save x10 and x11, unless it is an output register
-+  RegSet to_save = RegSet::of(x10, x11) - result_dst;
-+  __ push_reg(to_save, sp);
-+  __ la(x11, load_addr);
-+  __ mv(x10, dst);
-+
-+  __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
-+
-+  __ mv(result_dst, x10);
-+  __ pop_reg(to_save, sp);
-+
-+  __ bind(done);
-+  __ leave();
-+}
-+
-+void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) {
-+  if (ShenandoahIUBarrier) {
-+    __ push_call_clobbered_registers();
-+    satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false);
-+    __ pop_call_clobbered_registers();
-+  }
-+}
++      // construct the potential sender
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
 +
-+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) {
-+  if (ShenandoahLoadRefBarrier) {
-+    Label is_null;
-+    __ beqz(dst, is_null);
-+    load_reference_barrier_not_null(masm, dst, load_addr);
-+    __ bind(is_null);
-+  }
-+}
++      // Validate the JavaCallWrapper an entry frame must have
++      address jcw = (address)sender.entry_frame_call_wrapper();
 +
-+//
-+// Arguments:
-+//
-+// Inputs:
-+//   src:        oop location to load from, might be clobbered
-+//
-+// Output:
-+//   dst:        oop loaded from src location
-+//
-+// Kill:
-+//   x30 (tmp reg)
-+//
-+// Alias:
-+//   dst: x30 (might use x30 as temporary output register to avoid clobbering src)
-+//
-+void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
-+                                            DecoratorSet decorators,
-+                                            BasicType type,
-+                                            Register dst,
-+                                            Address src,
-+                                            Register tmp1,
-+                                            Register tmp_thread) {
-+  // 1: non-reference load, no additional barrier is needed
-+  if (!is_reference_type(type)) {
-+    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
-+    return;
-+  }
++      bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp());
 +
-+  // 2: load a reference from src location and apply LRB if needed
-+  if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
-+    guarantee(dst != x30 && src.base() != x30, "load_at need x30");
-+    bool ist5 = (dst == src.base());
-+    if (ist5) {
-+      __ push_reg(RegSet::of(x30), sp);
++      return jcw_safe;
 +    }
-+    Register result_dst = dst;
 +
-+    // Preserve src location for LRB
-+    if (dst == src.base()) {
-+      dst = x30;
++    CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
++    if (nm != NULL) {
++      if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
++          nm->method()->is_method_handle_intrinsic()) {
++        return false;
++      }
 +    }
-+    assert_different_registers(dst, src.base());
-+
-+    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
-+
-+    load_reference_barrier(masm, dst, src);
 +
-+    if (dst != result_dst) {
-+      __ mv(result_dst, dst);
-+      dst = result_dst;
++    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
++    // because the return address counts against the callee's frame.
++    if (sender_blob->frame_size() <= 0) {
++      assert(!sender_blob->is_compiled(), "should count return address at least");
++      return false;
 +    }
 +
-+    if (ist5) {
-+      __ pop_reg(RegSet::of(x30), sp);
++    // We should never be able to see anything here except an nmethod. If something in the
++    // code cache (current frame) is called by an entity within the code cache that entity
++    // should not be anything but the call stub (already covered), the interpreter (already covered)
++    // or an nmethod.
++    if (!sender_blob->is_compiled()) {
++        return false;
 +    }
-+  } else {
-+    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
-+  }
 +
-+  // 3: apply keep-alive barrier if needed
-+  if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
-+    __ enter();
-+    __ push_call_clobbered_registers();
-+    satb_write_barrier_pre(masm /* masm */,
-+                           noreg /* obj */,
-+                           dst /* pre_val */,
-+                           xthread /* thread */,
-+                           tmp1 /* tmp */,
-+                           true /* tosca_live */,
-+                           true /* expand_call */);
-+    __ pop_call_clobbered_registers();
-+    __ leave();
-+  }
-+}
++    // Could put some more validation for the potential non-interpreted sender
++    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
 +
-+void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                             Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
-+  bool on_oop = is_reference_type(type);
-+  if (!on_oop) {
-+    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
-+    return;
++    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
++
++    // We've validated the potential sender that would be created
++    return true;
 +  }
 +
-+  // flatten object address if needed
-+  if (dst.offset() == 0) {
-+    if (dst.base() != tmp3) {
-+      __ mv(tmp3, dst.base());
-+    }
-+  } else {
-+    __ la(tmp3, dst);
++  // Must be native-compiled frame. Since sender will try and use fp to find
++  // linkages it must be safe
++  if (!fp_safe) {
++    return false;
 +  }
 +
-+  shenandoah_write_barrier_pre(masm,
-+                               tmp3 /* obj */,
-+                               tmp2 /* pre_val */,
-+                               xthread /* thread */,
-+                               tmp1  /* tmp */,
-+                               val != noreg /* tosca_live */,
-+                               false /* expand_call */);
++  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
++  if ((address)this->fp()[return_addr_offset] == NULL) { return false; }
 +
-+  if (val == noreg) {
-+    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg);
++  return true;
++}
++
++void frame::patch_pc(Thread* thread, address pc) {
++  assert(_cb == CodeCache::find_blob(pc), "unexpected pc");
++  address* pc_addr = &(((address*) sp())[-1]);
++  if (TracePcPatching) {
++    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
++                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
++  }
++  // Either the return address is the original one or we are going to
++  // patch in the same address that's already there.
++  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
++  *pc_addr = pc;
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    assert(original_pc == _pc, "expected original PC to be stored before patching");
++    _deopt_state = is_deoptimized;
++    // leave _pc as is
 +  } else {
-+    iu_barrier(masm, val, tmp1);
-+    // G1 barrier needs uncompressed oop for region cross check.
-+    Register new_val = val;
-+    if (UseCompressedOops) {
-+      new_val = t1;
-+      __ mv(new_val, val);
-+    }
-+    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
++    _deopt_state = not_deoptimized;
++    _pc = pc;
 +  }
 +}
 +
-+void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-+                                                                  Register obj, Register tmp, Label& slowpath) {
-+  Label done;
-+  // Resolve jobject
-+  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
-+
-+  // Check for null.
-+  __ beqz(obj, done);
++bool frame::is_interpreted_frame() const  {
++  return Interpreter::contains(pc());
++}
 +
-+  assert(obj != t1, "need t1");
-+  Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
-+  __ lbu(t1, gc_state);
++int frame::frame_size(RegisterMap* map) const {
++  frame sender = this->sender(map);
++  return sender.sp() - sp();
++}
 +
-+  // Check for heap in evacuation phase
-+  __ andi(t0, t1, ShenandoahHeap::EVACUATION);
-+  __ bnez(t0, slowpath);
++intptr_t* frame::entry_frame_argument_at(int offset) const {
++  // convert offset to index to deal with tsi
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  // Entry frame's arguments are always in relation to unextended_sp()
++  return &unextended_sp()[index];
++}
 +
-+  __ bind(done);
++// sender_sp
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
 +}
 +
-+// Special Shenandoah CAS implementation that handles false negatives due
-+// to concurrent evacuation.  The service is more complex than a
-+// traditional CAS operation because the CAS operation is intended to
-+// succeed if the reference at addr exactly matches expected or if the
-+// reference at addr holds a pointer to a from-space object that has
-+// been relocated to the location named by expected.  There are two
-+// races that must be addressed:
-+//  a) A parallel thread may mutate the contents of addr so that it points
-+//     to a different object.  In this case, the CAS operation should fail.
-+//  b) A parallel thread may heal the contents of addr, replacing a
-+//     from-space pointer held in addr with the to-space pointer
-+//     representing the new location of the object.
-+// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL
-+// or it refers to an object that is not being evacuated out of
-+// from-space, or it refers to the to-space version of an object that
-+// is being evacuated out of from-space.
-+//
-+// By default, this operation implements sequential consistency and the
-+// value held in the result register following execution of the
-+// generated code sequence is 0 to indicate failure of CAS, non-zero
-+// to indicate success.  Arguments support variations on this theme:
-+//
-+//  acquire: Allow relaxation of the memory ordering on CAS from
-+//           sequential consistency.  This can be useful when
-+//           sequential consistency is not required, such as when
-+//           another sequentially consistent operation is already
-+//           present in the execution stream.  If acquire, successful
-+//           execution has the side effect of assuring that memory
-+//           values updated by other threads and "released" will be
-+//           visible to any read operations perfomed by this thread
-+//           which follow this operation in program order.  This is a
-+//           special optimization that should not be enabled by default.
-+//  release: Allow relaxation of the memory ordering on CAS from
-+//           sequential consistency.  This can be useful when
-+//           sequential consistency is not required, such as when
-+//           another sequentially consistent operation is already
-+//           present in the execution stream.  If release, successful
-+//           completion of this operation has the side effect of
-+//           assuring that all writes to memory performed by this
-+//           thread that precede this operation in program order are
-+//           visible to all other threads that subsequently "acquire"
-+//           before reading the respective memory values.  This is a
-+//           special optimization that should not be enabled by default.
-+//  is_cae:  This turns CAS (compare and swap) into CAE (compare and
-+//           exchange).  This HotSpot convention is that CAE makes
-+//           available to the caller the "failure witness", which is
-+//           the value that was stored in memory which did not match
-+//           the expected value.  If is_cae, the result is the value
-+//           most recently fetched from addr rather than a boolean
-+//           success indicator.
-+//
-+// Clobbers t0, t1
-+void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
-+                                                Register addr,
-+                                                Register expected,
-+                                                Register new_val,
-+                                                Assembler::Aqrl acquire,
-+                                                Assembler::Aqrl release,
-+                                                bool is_cae,
-+                                                Register result) {
-+  bool is_narrow = UseCompressedOops;
-+  Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64;
++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
++}
 +
-+  assert_different_registers(addr, expected, t0, t1);
-+  assert_different_registers(addr, new_val, t0, t1);
 +
-+  Label retry, success, fail, done;
++// monitor elements
 +
-+  __ bind(retry);
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
++}
 +
-+  // Step1: Try to CAS.
-+  __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1);
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
++  // make sure the pointer points inside the frame
++  assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer");
++  assert((intptr_t*) result < fp(),  "monitor end should be strictly below the frame pointer");
++  return result;
++}
 +
-+  // If success, then we are done.
-+  __ beq(expected, t1, success);
++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
++  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
++}
 +
-+  // Step2: CAS failed, check the forwared pointer.
-+  __ mv(t0, t1);
++// Used by template based interpreter deoptimization
++void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) {
++  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp;
++}
 +
-+  if (is_narrow) {
-+    __ decode_heap_oop(t0, t0);
++frame frame::sender_for_entry_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++  // Java frame called from C; skip all C frames and return top C
++  // frame of that chunk as the sender
++  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
++  assert(!entry_frame_is_first(), "next Java fp must be non zero");
++  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
++  // Since we are walking the stack now this nested anchor is obviously walkable
++  // even if it wasn't when it was stacked.
++  if (!jfa->walkable()) {
++    // Capture _last_Java_pc (if needed) and mark anchor walkable.
++    jfa->capture_last_Java_pc();
 +  }
-+  resolve_forward_pointer(masm, t0);
-+
-+  __ encode_heap_oop(t0, t0);
-+
-+  // Report failure when the forwarded oop was not expected.
-+  __ bne(t0, expected, fail);
-+
-+  // Step 3: CAS again using the forwarded oop.
-+  __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0);
-+
-+  // Retry when failed.
-+  __ bne(t0, t1, retry);
++  map->clear();
++  assert(map->include_argument_oops(), "should be set by clear");
++  vmassert(jfa->last_Java_pc() != NULL, "not walkable");
++  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
++  return fr;
++}
 +
-+  __ bind(success);
-+  if (is_cae) {
-+    __ mv(result, expected);
-+  } else {
-+    __ mv(result, 1);
-+  }
-+  __ j(done);
++OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const {
++  ShouldNotCallThis();
++  return nullptr;
++}
 +
-+  __ bind(fail);
-+  if (is_cae) {
-+    __ mv(result, t0);
-+  } else {
-+    __ mv(result, zr);
-+  }
++bool frame::optimized_entry_frame_is_first() const {
++  ShouldNotCallThis();
++  return false;
++}
 +
-+  __ bind(done);
++frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const {
++  ShouldNotCallThis();
++  return {};
 +}
 +
-+#undef __
++//------------------------------------------------------------------------------
++// frame::verify_deopt_original_pc
++//
++// Verifies the calculated original PC of a deoptimization PC for the
++// given unextended SP.
++#ifdef ASSERT
++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
++  frame fr;
 +
-+#ifdef COMPILER1
++  // This is ugly but it's better than to change {get,set}_original_pc
++  // to take an SP value as argument.  And it's only a debugging
++  // method anyway.
++  fr._unextended_sp = unextended_sp;
 +
-+#define __ ce->masm()->
++  assert_cond(nm != NULL);
++  address original_pc = nm->get_original_pc(&fr);
++  assert(nm->insts_contains_inclusive(original_pc),
++         "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
++}
++#endif
 +
-+void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
-+  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
-+  // At this point we know that marking is in progress.
-+  // If do_load() is true then we have to emit the
-+  // load of the previous value; otherwise it has already
-+  // been loaded into _pre_val.
-+  __ bind(*stub->entry());
++//------------------------------------------------------------------------------
++// frame::adjust_unextended_sp
++void frame::adjust_unextended_sp() {
++  // On riscv, sites calling method handle intrinsics and lambda forms are treated
++  // as any other call site. Therefore, no special action is needed when we are
++  // returning to any of these call sites.
 +
-+  assert(stub->pre_val()->is_register(), "Precondition.");
++  if (_cb != NULL) {
++    CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
++    if (sender_cm != NULL) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (sender_cm->is_deopt_entry(_pc) ||
++          sender_cm->is_deopt_mh_entry(_pc)) {
++        DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
++      }
++    }
++  }
++}
 +
-+  Register pre_val_reg = stub->pre_val()->as_register();
++//------------------------------------------------------------------------------
++// frame::update_map_with_saved_link
++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
++  // The interpreter and compiler(s) always save fp in a known
++  // location on entry. We must record where that location is
++  // so that if fp was live on callout from c2 we can find
++  // the saved copy no matter what it called.
 +
-+  if (stub->do_load()) {
-+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(),
-+                stub->info(), false /* wide */, false /* unaligned */);
-+  }
-+  __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
-+  ce->store_parameter(stub->pre_val()->as_register(), 0);
-+  __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
-+  __ j(*stub->continuation());
++  // Since the interpreter always saves fp if we record where it is then
++  // we don't have to always save fp on entry and exit to c2 compiled
++  // code, on entry will be enough.
++  assert(map != NULL, "map must be set");
++  map->set_location(::fp->as_VMReg(), (address) link_addr);
++  // this is weird "H" ought to be at a higher address however the
++  // oopMaps seems to have the "H" regs at the same address and the
++  // vanilla register.
++  map->set_location(::fp->as_VMReg()->next(), (address) link_addr);
 +}
 +
-+void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce,
-+                                                                    ShenandoahLoadReferenceBarrierStub* stub) {
-+  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
-+  __ bind(*stub->entry());
 +
-+  Register obj = stub->obj()->as_register();
-+  Register res = stub->result()->as_register();
-+  Register addr = stub->addr()->as_pointer_register();
-+  Register tmp1 = stub->tmp1()->as_register();
-+  Register tmp2 = stub->tmp2()->as_register();
++//------------------------------------------------------------------------------
++// frame::sender_for_interpreter_frame
++frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
++  // SP is the raw SP from the sender after adapter or interpreter
++  // extension.
++  intptr_t* sender_sp = this->sender_sp();
 +
-+  assert(res == x10, "result must arrive in x10");
-+  assert_different_registers(tmp1, tmp2, t0);
++  // This is the sp before any possible extension (adapter/locals).
++  intptr_t* unextended_sp = interpreter_frame_sender_sp();
 +
-+  if (res != obj) {
-+    __ mv(res, obj);
++#ifdef COMPILER2
++  assert(map != NULL, "map must be set");
++  if (map->update_map()) {
++    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
 +  }
++#endif // COMPILER2
 +
-+  // Check for null.
-+  __ beqz(res, *stub->continuation(), /* is_far */ true);
++  return frame(sender_sp, unextended_sp, link(), sender_pc());
++}
 +
-+  // Check for object in cset.
-+  __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
-+  __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
-+  __ add(t0, tmp2, tmp1);
-+  __ lb(tmp2, Address(t0));
-+  __ beqz(tmp2, *stub->continuation(), /* is_far */ true);
 +
-+  // Check if object is already forwarded.
-+  Label slow_path;
-+  __ ld(tmp1, Address(res, oopDesc::mark_offset_in_bytes()));
-+  __ xori(tmp1, tmp1, -1);
-+  __ andi(t0, tmp1, markOopDesc::lock_mask_in_place);
-+  __ bnez(t0, slow_path);
++//------------------------------------------------------------------------------
++// frame::sender_for_compiled_frame
++frame frame::sender_for_compiled_frame(RegisterMap* map) const {
++  // we cannot rely upon the last fp having been saved to the thread
++  // in C2 code but it will have been pushed onto the stack. so we
++  // have to find it relative to the unextended sp
 +
-+  // Decode forwarded object.
-+  __ ori(tmp1, tmp1, markOopDesc::marked_value);
-+  __ xori(res, tmp1, -1);
-+  __ j(*stub->continuation());
++  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
++  intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size();
++  intptr_t* unextended_sp = l_sender_sp;
 +
-+  __ bind(slow_path);
-+  ce->store_parameter(res, 0);
-+  ce->store_parameter(addr, 1);
-+  __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
++  // the return_address is always the word on the stack
++  address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset);
 +
-+  __ j(*stub->continuation());
-+}
++  intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset);
 +
-+#undef __
++  assert(map != NULL, "map must be set");
++  if (map->update_map()) {
++    // Tell GC to use argument oopmaps for some runtime stubs that need it.
++    // For C1, the runtime stub might not have oop maps, so set this flag
++    // outside of update_register_map.
++    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
++    if (_cb->oop_maps() != NULL) {
++      OopMapSet::update_register_map(this, map);
++    }
 +
-+#define __ sasm->
++    // Since the prolog does the save and restore of FP there is no
++    // oopmap for it so we must fill in its location as if there was
++    // an oopmap entry since if our caller was compiled code there
++    // could be live jvm state in it.
++    update_map_with_saved_link(map, saved_fp_addr);
++  }
 +
-+void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
-+  __ prologue("shenandoah_pre_barrier", false);
++  return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
++}
 +
-+  // arg0 : previous value of memory
++//------------------------------------------------------------------------------
++// frame::sender_raw
++frame frame::sender_raw(RegisterMap* map) const {
++  // Default is we done have to follow them. The sender_for_xxx will
++  // update it accordingly
++  assert(map != NULL, "map must be set");
++  map->set_include_argument_oops(false);
 +
-+  BarrierSet* bs = BarrierSet::barrier_set();
++  if (is_entry_frame()) {
++    return sender_for_entry_frame(map);
++  }
++  if (is_interpreted_frame()) {
++    return sender_for_interpreter_frame(map);
++  }
++  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
 +
-+  const Register pre_val = x10;
-+  const Register thread = xthread;
-+  const Register tmp = t0;
++  // This test looks odd: why is it not is_compiled_frame() ?  That's
++  // because stubs also have OOP maps.
++  if (_cb != NULL) {
++    return sender_for_compiled_frame(map);
++  }
 +
-+  Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
-+  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
++  // Must be native-compiled frame, i.e. the marshaling code for native
++  // methods that exists in the core system.
++  return frame(sender_sp(), link(), sender_pc());
++}
 +
-+  Label done;
-+  Label runtime;
++frame frame::sender(RegisterMap* map) const {
++  frame result = sender_raw(map);
 +
-+  // Is marking still active?
-+  Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
-+  __ lb(tmp, gc_state);
-+  __ andi(tmp, tmp, ShenandoahHeap::MARKING);
-+  __ beqz(tmp, done);
++  if (map->process_frames()) {
++    StackWatermarkSet::on_iteration(map->thread(), result);
++  }
 +
-+  // Can we store original value in the thread's buffer?
-+  __ ld(tmp, queue_index);
-+  __ beqz(tmp, runtime);
++  return result;
++}
 +
-+  __ sub(tmp, tmp, wordSize);
-+  __ sd(tmp, queue_index);
-+  __ ld(t1, buffer);
-+  __ add(tmp, tmp, t1);
-+  __ load_parameter(0, t1);
-+  __ sd(t1, Address(tmp, 0));
-+  __ j(done);
++bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
++  assert(is_interpreted_frame(), "Not an interpreted frame");
++  // These are reasonable sanity checks
++  if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
++    return false;
++  }
++  // These are hacks to keep us out of trouble.
++  // The problem with these is that they mask other problems
++  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
++    return false;
++  }
 +
-+  __ bind(runtime);
-+  __ push_call_clobbered_registers();
-+  __ load_parameter(0, pre_val);
-+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
-+  __ pop_call_clobbered_registers();
-+  __ bind(done);
++  // do some validation of frame elements
 +
-+  __ epilogue();
-+}
++  // first the method
++  Method* m = *interpreter_frame_method_addr();
++  // validate the method we'd find in this potential sender
++  if (!Method::is_valid_method(m)) {
++    return false;
++  }
 +
-+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) {
-+  __ prologue("shenandoah_load_reference_barrier", false);
-+  // arg0 : object to be resolved
++  // stack frames shouldn't be much larger than max_stack elements
++  // this test requires the use of unextended_sp which is the sp as seen by
++  // the current frame, and not sp which is the "raw" pc which could point
++  // further because of local variables of the callee method inserted after
++  // method arguments
++  if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) {
++    return false;
++  }
 +
-+  __ push_call_clobbered_registers();
-+  __ load_parameter(0, x10);
-+  __ load_parameter(1, x11);
-+  if (UseCompressedOops) {
-+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
-+  } else {
-+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
++  // validate bci/bcx
++  address bcp = interpreter_frame_bcp();
++  if (m->validate_bci_from_bcp(bcp) < 0) {
++    return false;
 +  }
-+  __ jalr(ra);
-+  __ mv(t0, x10);
-+  __ pop_call_clobbered_registers();
-+  __ mv(x10, t0);
 +
-+  __ epilogue();
++  // validate constantPoolCache*
++  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
++  if (MetaspaceObj::is_valid(cp) == false) {
++    return false;
++  }
++
++  // validate locals
++  address locals = (address) *interpreter_frame_locals_addr();
++  if (locals > thread->stack_base() || locals < (address) fp()) {
++    return false;
++  }
++
++  // We'd have to be pretty unlucky to be mislead at this point
++  return true;
 +}
 +
-+#undef __
++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  Method* method = interpreter_frame_method();
++  BasicType type = method->result_type();
 +
-+#endif // COMPILER1
++  intptr_t* tos_addr = NULL;
++  if (method->is_native()) {
++    tos_addr = (intptr_t*)sp();
++    if (type == T_FLOAT || type == T_DOUBLE) {
++      // This is because we do a push(ltos) after push(dtos) in generate_native_entry.
++      tos_addr += 2 * Interpreter::stackElementWords;
++    }
++  } else {
++    tos_addr = (intptr_t*)interpreter_frame_tos_address();
++  }
 +
-+address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
-+  assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
-+  return _shenandoah_lrb;
++  switch (type) {
++    case T_OBJECT  :
++    case T_ARRAY   : {
++      oop obj;
++      if (method->is_native()) {
++        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
++      } else {
++        oop* obj_p = (oop*)tos_addr;
++        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
++      }
++      assert(Universe::is_in_heap_or_null(obj), "sanity check");
++      *oop_result = obj;
++      break;
++    }
++    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
++    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
++    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
++    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
++    case T_INT     : value_result->i = *(jint*)tos_addr; break;
++    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
++    case T_FLOAT   : {
++        value_result->f = *(jfloat*)tos_addr;
++      break;
++    }
++    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
++    case T_VOID    : /* Nothing to do */ break;
++    default        : ShouldNotReachHere();
++  }
++
++  return type;
 +}
 +
-+#define __ cgen->assembler()->
 +
-+// Shenandoah load reference barrier.
-+//
-+// Input:
-+//   x10: OOP to evacuate.  Not null.
-+//   x11: load address
-+//
-+// Output:
-+//   x10: Pointer to evacuated OOP.
-+//
-+// Trash t0 t1  Preserve everything else.
-+address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
-+  __ align(6);
-+  StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
-+  address start = __ pc();
++intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  return &interpreter_frame_tos_address()[index];
++}
 +
-+  Label slow_path;
-+  __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr());
-+  __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
-+  __ add(t1, t1, t0);
-+  __ lbu(t1, Address(t1, 0));
-+  __ andi(t0, t1, 1);
-+  __ bnez(t0, slow_path);
-+  __ ret();
++#ifndef PRODUCT
 +
-+  __ bind(slow_path);
-+  __ enter(); // required for proper stackwalking of RuntimeStub frame
++#define DESCRIBE_FP_OFFSET(name) \
++  values.describe(frame_no, fp() + frame::name##_offset, #name)
 +
-+  __ push_call_clobbered_registers();
++void frame::describe_pd(FrameValues& values, int frame_no) {
++  if (is_interpreted_frame()) {
++    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_method);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
++    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
++    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
++    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
++  }
++}
++#endif
 +
-+  if (UseCompressedOops) {
-+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
-+  } else {
-+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
++intptr_t *frame::initial_deoptimization_info() {
++  // Not used on riscv, but we must return something.
++  return NULL;
++}
++
++intptr_t* frame::real_fp() const {
++  if (_cb != NULL) {
++    // use the frame size if valid
++    int size = _cb->frame_size();
++    if (size > 0) {
++      return unextended_sp() + size;
++    }
 +  }
-+  __ jalr(ra);
-+  __ mv(t0, x10);
-+  __ pop_call_clobbered_registers();
-+  __ mv(x10, t0);
++  // else rely on fp()
++  assert(!is_compiled_frame(), "unknown compiled frame size");
++  return fp();
++}
 +
-+  __ leave(); // required for proper stackwalking of RuntimeStub frame
-+  __ ret();
++#undef DESCRIBE_FP_OFFSET
 +
-+  return start;
++#ifndef PRODUCT
++// This is a generic constructor which is only used by pns() in debug.cpp.
++frame::frame(void* ptr_sp, void* ptr_fp, void* pc) {
++  init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc);
 +}
 +
-+#undef __
++#endif
 +
-+void ShenandoahBarrierSetAssembler::barrier_stubs_init() {
-+  if (ShenandoahLoadRefBarrier) {
-+    int stub_code_size = 2048;
-+    ResourceMark rm;
-+    BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size);
-+    CodeBuffer buf(bb);
-+    StubCodeGenerator cgen(&buf);
-+    _shenandoah_lrb = generate_shenandoah_lrb(&cgen);
-+  }
++void JavaFrameAnchor::make_walkable(JavaThread* thread) {
++  // last frame set?
++  if (last_Java_sp() == NULL) { return; }
++  // already walkable?
++  if (walkable()) { return; }
++  vmassert(Thread::current() == (Thread*)thread, "not current thread");
++  vmassert(last_Java_sp() != NULL, "not called from Java code?");
++  vmassert(last_Java_pc() == NULL, "already walkable");
++  capture_last_Java_pc();
++  vmassert(walkable(), "something went wrong");
 +}
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
++
++void JavaFrameAnchor::capture_last_Java_pc() {
++  vmassert(_last_Java_sp != NULL, "no last frame set");
++  vmassert(_last_Java_pc == NULL, "already walkable");
++  _last_Java_pc = (address)_last_Java_sp[-1];
++}
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
 new file mode 100644
-index 000000000..84bc55706
+index 00000000000..c06aaa9e391
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,92 @@
++++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
+@@ -0,0 +1,202 @@
 +/*
-+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -14710,277 +14950,193 @@ index 000000000..84bc55706
 + *
 + */
 +
-+#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
++#ifndef CPU_RISCV_FRAME_RISCV_HPP
++#define CPU_RISCV_FRAME_RISCV_HPP
 +
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#ifdef COMPILER1
-+class LIR_Assembler;
-+class ShenandoahPreBarrierStub;
-+class ShenandoahLoadReferenceBarrierStub;
-+class StubAssembler;
-+#endif
-+class StubCodeGenerator;
++#include "runtime/synchronizer.hpp"
 +
-+class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
-+public:
-+  static address shenandoah_lrb();
++// A frame represents a physical stack frame (an activation).  Frames can be
++// C or Java frames, and the Java frames can be interpreted or compiled.
++// In contrast, vframes represent source-level activations, so that one physical frame
++// can correspond to multiple source level frames because of inlining.
++// A frame is comprised of {pc, fp, sp}
++// ------------------------------ Asm interpreter ----------------------------------------
++// Layout of asm interpreter frame:
++//    [expression stack      ] * <- sp
 +
-+  void iu_barrier(MacroAssembler *masm, Register dst, Register tmp);
++//    [monitors[0]           ]   \
++//     ...                        | monitor block size = k
++//    [monitors[k-1]         ]   /
++//    [frame initial esp     ] ( == &monitors[0], initially here)       initial_sp_offset
++//    [byte code index/pointr]                   = bcx()                bcx_offset
 +
-+#ifdef COMPILER1
-+  void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
-+  void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
-+  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
-+  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm);
-+#endif
++//    [pointer to locals     ]                   = locals()             locals_offset
++//    [constant pool cache   ]                   = cache()              cache_offset
 +
-+  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register src, Register dst, Register count, RegSet saved_regs);
-+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                       Register dst, Address src, Register tmp1, Register tmp_thread);
-+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                        Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
-+  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-+                                             Register obj, Register tmp, Label& slowpath);
-+  virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
-+                           Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
++//    [klass of method       ]                   = mirror()             mirror_offset
++//    [padding               ]
 +
-+  virtual void barrier_stubs_init();
++//    [methodData            ]                   = mdp()                mdx_offset
++//    [Method                ]                   = method()             method_offset
 +
-+private:
++//    [last esp              ]                   = last_sp()            last_sp_offset
++//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
 +
-+  static address _shenandoah_lrb;
++//    [old frame pointer     ]
++//    [return pc             ]
 +
-+  void satb_write_barrier_pre(MacroAssembler* masm,
-+                              Register obj,
-+                              Register pre_val,
-+                              Register thread,
-+                              Register tmp,
-+                              bool tosca_live,
-+                              bool expand_call);
-+  void shenandoah_write_barrier_pre(MacroAssembler* masm,
-+                                    Register obj,
-+                                    Register pre_val,
-+                                    Register thread,
-+                                    Register tmp,
-+                                    bool tosca_live,
-+                                    bool expand_call);
++//    [last sp               ]   <- fp           = link()
++//    [oop temp              ]                     (only for native calls)
 +
-+  void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
-+  void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
-+  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr);
-+  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr);
++//    [padding               ]                     (to preserve machine SP alignment)
++//    [locals and parameters ]
++//                               <- sender sp
++// ------------------------------ Asm interpreter ----------------------------------------
 +
-+  address generate_shenandoah_lrb(StubCodeGenerator* cgen);
-+};
++// ------------------------------ C Frame ------------------------------------------------
++// Stack: gcc with -fno-omit-frame-pointer
++//                    .
++//                    .
++//       +->          .
++//       |   +-----------------+   |
++//       |   | return address  |   |
++//       |   |   previous fp ------+
++//       |   | saved registers |
++//       |   | local variables |
++//       |   |       ...       | <-+
++//       |   +-----------------+   |
++//       |   | return address  |   |
++//       +------ previous fp   |   |
++//           | saved registers |   |
++//           | local variables |   |
++//       +-> |       ...       |   |
++//       |   +-----------------+   |
++//       |   | return address  |   |
++//       |   |   previous fp ------+
++//       |   | saved registers |
++//       |   | local variables |
++//       |   |       ...       | <-+
++//       |   +-----------------+   |
++//       |   | return address  |   |
++//       +------ previous fp   |   |
++//           | saved registers |   |
++//           | local variables |   |
++//   $fp --> |       ...       |   |
++//           +-----------------+   |
++//           | return address  |   |
++//           |   previous fp ------+
++//           | saved registers |
++//   $sp --> | local variables |
++//           +-----------------+
++// ------------------------------ C Frame ------------------------------------------------
 +
-+#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
-new file mode 100644
-index 000000000..6e310697d
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
-@@ -0,0 +1,188 @@
-+//
-+// Copyright (c) 2018, Red Hat, Inc. All rights reserved.
-+// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
-+//
-+//
++ public:
++  enum {
++    pc_return_offset                                 =  0,
++    // All frames
++    link_offset                                      = -2,
++    return_addr_offset                               = -1,
++    sender_sp_offset                                 =  0,
++    // Interpreter frames
++    interpreter_frame_oop_temp_offset                =  1, // for native calls only
 +
-+source_hpp %{
-+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-+%}
++    interpreter_frame_sender_sp_offset               = -3,
++    // outgoing sp before a call to an invoked method
++    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
++    interpreter_frame_method_offset                  = interpreter_frame_last_sp_offset - 1,
++    interpreter_frame_mdp_offset                     = interpreter_frame_method_offset - 1,
++    interpreter_frame_padding_offset                 = interpreter_frame_mdp_offset - 1,
++    interpreter_frame_mirror_offset                  = interpreter_frame_padding_offset - 1,
++    interpreter_frame_cache_offset                   = interpreter_frame_mirror_offset - 1,
++    interpreter_frame_locals_offset                  = interpreter_frame_cache_offset - 1,
++    interpreter_frame_bcp_offset                     = interpreter_frame_locals_offset - 1,
++    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
 +
-+instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
++    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
 +
-+  effect(TEMP tmp, KILL cr);
++    // Entry frames
++    // n.b. these values are determined by the layout defined in
++    // stubGenerator for the Java call stub
++    entry_frame_after_call_words                     =  22,
++    entry_frame_call_wrapper_offset                  = -10,
 +
-+  format %{
-+    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah"
-+  %}
++    // we don't need a save area
++    arg_reg_save_area_bytes                          =  0
++  };
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
++  intptr_t ptr_at(int offset) const {
++    return *ptr_at_addr(offset);
++  }
 +
-+  ins_pipe(pipe_slow);
-+%}
++  void ptr_at_put(int offset, intptr_t value) {
++    *ptr_at_addr(offset) = value;
++  }
 +
-+instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++ private:
++  // an additional field beyond _sp and _pc:
++  intptr_t*   _fp; // frame pointer
++  // The interpreter and adapters will extend the frame of the caller.
++  // Since oopMaps are based on the sp of the caller before extension
++  // we need to know that value. However in order to compute the address
++  // of the return address we need the real "raw" sp. Since sparc already
++  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
++  // original sp we use that convention.
 +
-+  effect(TEMP tmp, KILL cr);
++  intptr_t*     _unextended_sp;
++  void adjust_unextended_sp();
 +
-+  format %{
-+    "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah"
-+  %}
++  intptr_t* ptr_at_addr(int offset) const {
++    return (intptr_t*) addr_at(offset);
++  }
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
++#ifdef ASSERT
++  // Used in frame::sender_for_{interpreter,compiled}_frame
++  static void verify_deopt_original_pc(   CompiledMethod* nm, intptr_t* unextended_sp);
++#endif
 +
-+  ins_pipe(pipe_slow);
-+%}
++ public:
++  // Constructors
 +
-+instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_exclusive(n));
-+  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++  frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
 +
-+  effect(TEMP tmp, KILL cr);
++  frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc);
 +
-+  format %{
-+    "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah"
-+  %}
++  frame(intptr_t* ptr_sp, intptr_t* ptr_fp);
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
++  void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
 +
-+  ins_pipe(pipe_slow);
-+%}
++  // accessors for the instance variables
++  // Note: not necessarily the real 'frame pointer' (see real_fp)
++  intptr_t*   fp() const { return _fp; }
 +
-+instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_exclusive(n));
-+  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++  inline address* sender_pc_addr() const;
 +
-+  effect(TEMP tmp, KILL cr);
++  // expression stack tos if we are nested in a java call
++  intptr_t* interpreter_frame_last_sp() const;
 +
-+  format %{
-+    "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah"
-+  %}
++  // helper to update a map with callee-saved RBP
++  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
-+
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
-+  effect(TEMP_DEF res, TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah"
-+  %}
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   true /* is_cae */, $res$$Register);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
-+
-+  effect(TEMP_DEF res, TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah"
-+  %}
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   true /* is_cae */, $res$$Register);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++  // deoptimization support
++  void interpreter_frame_set_last_sp(intptr_t* last_sp);
 +
-+  effect(TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah"
-+    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
-+  %}
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  static jint interpreter_frame_expression_stack_direction() { return -1; }
 +
-+instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++  // returns the sending frame, without applying any barriers
++  frame sender_raw(RegisterMap* map) const;
 +
-+  effect(TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah"
-+  %}
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
++#endif // CPU_RISCV_FRAME_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
 new file mode 100644
-index 000000000..96068e637
+index 00000000000..5ac1bf57f57
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-@@ -0,0 +1,44 @@
++++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
+@@ -0,0 +1,248 @@
 +/*
-+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -15003,310 +15159,236 @@ index 000000000..96068e637
 + *
 + */
 +
-+#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
-+#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
++#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP
++#define CPU_RISCV_FRAME_RISCV_INLINE_HPP
 +
-+const int StackAlignmentInBytes = 16;
++#include "code/codeCache.hpp"
++#include "code/vmreg.inline.hpp"
 +
-+// Indicates whether the C calling conventions require that
-+// 32-bit integer argument values are extended to 64 bits.
-+const bool CCallingConventionRequiresIntsAsLongs = false;
++// Inline functions for RISCV frames:
 +
-+#define DEOPTIMIZE_WHEN_PATCHING
++// Constructors:
 +
-+#define SUPPORTS_NATIVE_CX8
++inline frame::frame() {
++  _pc = NULL;
++  _sp = NULL;
++  _unextended_sp = NULL;
++  _fp = NULL;
++  _cb = NULL;
++  _deopt_state = unknown;
++}
 +
-+#define SUPPORT_RESERVED_STACK_AREA
++static int spin;
 +
-+#define THREAD_LOCAL_POLL
++inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
++  intptr_t a = intptr_t(ptr_sp);
++  intptr_t b = intptr_t(ptr_fp);
++  _sp = ptr_sp;
++  _unextended_sp = ptr_sp;
++  _fp = ptr_fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
 +
-+#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-new file mode 100644
-index 000000000..b46661a8f
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -0,0 +1,120 @@
-+/*
-+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
 +
-+#ifndef CPU_RISCV_GLOBALS_RISCV_HPP
-+#define CPU_RISCV_GLOBALS_RISCV_HPP
++inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
++  init(ptr_sp, ptr_fp, pc);
++}
 +
-+#include "utilities/globalDefinitions.hpp"
-+#include "utilities/macros.hpp"
++inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) {
++  intptr_t a = intptr_t(ptr_sp);
++  intptr_t b = intptr_t(ptr_fp);
++  _sp = ptr_sp;
++  _unextended_sp = unextended_sp;
++  _fp = ptr_fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
 +
-+// Sets the default values for platform dependent flags used by the runtime system.
-+// (see globals.hpp)
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc),
++           "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
 +
-+define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
++inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) {
++  intptr_t a = intptr_t(ptr_sp);
++  intptr_t b = intptr_t(ptr_fp);
++  _sp = ptr_sp;
++  _unextended_sp = ptr_sp;
++  _fp = ptr_fp;
++  _pc = (address)(ptr_sp[-1]);
 +
-+define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
-+define_pd_global(bool, TrapBasedNullChecks,      false);
-+define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
++  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
++  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
++  // unlucky the junk value could be to a zombied method and we'll die on the
++  // find_blob call. This is also why we can have no asserts on the validity
++  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
++  // -> pd_last_frame should use a specialized version of pd_last_frame which could
++  // call a specilaized frame constructor instead of this one.
++  // Then we could use the assert below. However this assert is of somewhat dubious
++  // value.
 +
-+define_pd_global(uintx, CodeCacheSegmentSize,    64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
-+define_pd_global(intx, CodeEntryAlignment,       64);
-+define_pd_global(intx, OptoLoopAlignment,        16);
-+define_pd_global(intx, InlineFrequencyCount,     100);
++  _cb = CodeCache::find_blob(_pc);
++  adjust_unextended_sp();
 +
-+#define DEFAULT_STACK_YELLOW_PAGES (2)
-+#define DEFAULT_STACK_RED_PAGES (1)
-+// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the
-+// stack if compiled for unix and LP64. To pass stack overflow tests we need
-+// 20 shadow pages.
-+#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5))
-+#define DEFAULT_STACK_RESERVED_PAGES (1)
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
 +
-+#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
-+#define MIN_STACK_RED_PAGES    DEFAULT_STACK_RED_PAGES
-+#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
-+#define MIN_STACK_RESERVED_PAGES (0)
++// Accessors
 +
-+define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES);
-+define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES);
-+define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
-+define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
++inline bool frame::equal(frame other) const {
++  bool ret =  sp() == other.sp() &&
++              unextended_sp() == other.unextended_sp() &&
++              fp() == other.fp() &&
++              pc() == other.pc();
++  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
++  return ret;
++}
 +
-+define_pd_global(bool, RewriteBytecodes,     true);
-+define_pd_global(bool, RewriteFrequentPairs, true);
++// Return unique id for this frame. The id must have a value where we can distinguish
++// identity and younger/older relationship. NULL represents an invalid (incomparable)
++// frame.
++inline intptr_t* frame::id(void) const { return unextended_sp(); }
 +
-+define_pd_global(bool, UseMembar,            true);
++// Return true if the frame is older (less recent activation) than the frame represented by id
++inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() > id ; }
 +
-+define_pd_global(bool, PreserveFramePointer, false);
++inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
 +
-+// GC Ergo Flags
-+define_pd_global(uintx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
++inline intptr_t* frame::link_or_null() const {
++  intptr_t** ptr = (intptr_t **)addr_at(link_offset);
++  return os::is_readable_pointer(ptr) ? *ptr : NULL;
++}
 +
-+define_pd_global(uintx, TypeProfileLevel, 111);
++inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
 +
-+define_pd_global(bool, CompactStrings, true);
++// Return address
++inline address* frame::sender_pc_addr() const     { return (address*) addr_at(return_addr_offset); }
++inline address  frame::sender_pc() const          { return *sender_pc_addr(); }
++inline intptr_t* frame::sender_sp() const         { return addr_at(sender_sp_offset); }
 +
-+// Clear short arrays bigger than one word in an arch-specific way
-+define_pd_global(intx, InitArrayShortSize, BytesPerLong);
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
++}
 +
-+define_pd_global(bool, ThreadLocalHandshakes, true);
++inline intptr_t* frame::interpreter_frame_last_sp() const {
++  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
++}
 +
-+define_pd_global(intx, InlineSmallCode,          1000);
++inline intptr_t* frame::interpreter_frame_bcp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
++}
 +
-+#define ARCH_FLAGS(develop,                                                      \
-+                   product,                                                      \
-+                   diagnostic,                                                   \
-+                   experimental,                                                 \
-+                   notproduct,                                                   \
-+                   range,                                                        \
-+                   constraint,                                                   \
-+                   writeable)                                                    \
-+                                                                                 \
-+  product(bool, NearCpool, true,                                                 \
-+         "constant pool is close to instructions")                               \
-+  product(bool, UseBarriersForVolatile, false,                                   \
-+          "Use memory barriers to implement volatile accesses")                  \
-+  product(bool, UseCRC32, false,                                                 \
-+          "Use CRC32 instructions for CRC32 computation")                        \
-+  product(bool, UseBlockZeroing, true,                                           \
-+          "Use DC ZVA for block zeroing")                                        \
-+  product(intx, BlockZeroingLowLimit, 256,                                       \
-+          "Minimum size in bytes when block zeroing will be used")               \
-+          range(1, max_jint)                                                     \
-+  product(bool, TraceTraps, false, "Trace all traps the signal handler")         \
-+  /* For now we're going to be safe and add the I/O bits to userspace fences. */ \
-+  product(bool, UseConservativeFence, true,                                      \
-+          "Extend i for r and o for w in the pred/succ flags of fence")          \
-+  product(bool, AvoidUnalignedAccesses, true,                                    \
-+          "Avoid generating unaligned memory accesses")                          \
-+  product(intx, EagerArrayCopyThreshold, 128,                                    \
-+          "Threshod of array length by bytes to "                                \
-+          "trigger the eager array copy")                                        \
-+          range(0, 65535)                                                        \
-+  experimental(bool, UseRVV, false, "Use RVV instructions")                      \
-+  experimental(bool, UseZba, false, "Use Zba instructions")                      \
-+  experimental(bool, UseZbb, false, "Use Zbb instructions")
++inline intptr_t* frame::interpreter_frame_mdp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
++}
 +
-+#endif // CPU_RISCV_GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
-new file mode 100644
-index 000000000..980b2a81b
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
-@@ -0,0 +1,79 @@
-+/*
-+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "code/icBuffer.hpp"
-+#include "gc/shared/collectedHeap.inline.hpp"
-+#include "interpreter/bytecodes.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/oop.inline.hpp"
++// Constant pool cache
 +
-+int InlineCacheBuffer::ic_stub_code_size() {
-+  // 6: auipc + ld + auipc + jalr + address(2 * instruction_size)
-+  // 5: auipc + ld + j + address(2 * instruction_size )
-+  return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size;
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
 +}
 +
-+#define __ masm->
++// Method
 +
-+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
-+  assert_cond(code_begin != NULL && entry_point != NULL);
-+  ResourceMark rm;
-+  CodeBuffer      code(code_begin, ic_stub_code_size());
-+  MacroAssembler* masm            = new MacroAssembler(&code);
-+  // Note: even though the code contains an embedded value, we do not need reloc info
-+  // because
-+  // (1) the value is old (i.e., doesn't matter for scavenges)
-+  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
++inline Method** frame::interpreter_frame_method_addr() const {
++  return (Method**)addr_at(interpreter_frame_method_offset);
++}
 +
-+  address start = __ pc();
-+  Label l;
-+  __ ld(t1, l);
-+  __ far_jump(ExternalAddress(entry_point));
-+  __ align(wordSize);
-+  __ bind(l);
-+  __ emit_int64((intptr_t)cached_value);
-+  // Only need to invalidate the 1st two instructions - not the whole ic stub
-+  ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());
-+  assert(__ pc() - start == ic_stub_code_size(), "must be");
++// Mirror
++
++inline oop* frame::interpreter_frame_mirror_addr() const {
++  return (oop*)addr_at(interpreter_frame_mirror_offset);
 +}
 +
-+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
-+  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
-+  NativeJump* jump = nativeJump_at(move->next_instruction_address());
-+  return jump->jump_destination();
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++  intptr_t* last_sp = interpreter_frame_last_sp();
++  if (last_sp == NULL) {
++    return sp();
++  } else {
++    // sp() may have been extended or shrunk by an adapter.  At least
++    // check that we don't fall behind the legal region.
++    // For top deoptimized frame last_sp == interpreter_frame_monitor_end.
++    assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
++    return last_sp;
++  }
 +}
 +
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
++}
 +
-+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
-+  // The word containing the cached value is at the end of this IC buffer
-+  uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize);
-+  void* o = (void*)*p;
-+  return o;
++inline int frame::interpreter_frame_monitor_size() {
++  return BasicObjectLock::size();
 +}
-diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp
-new file mode 100644
-index 000000000..ed8022784
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/icache_riscv.cpp
-@@ -0,0 +1,61 @@
-+/*
-+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#include "precompiled.hpp"
-+#include "runtime/icache.hpp"
-+#include "macroAssembler_riscv.hpp"
 +
-+#define __ _masm->
++// expression stack
++// (the max_stack arguments are used by the GC; see class FrameClosure)
 +
-+static int icache_flush(address addr, int lines, int magic) {
-+  // To make a store to instruction memory visible to all RISC-V harts,
-+  // the writing hart has to execute a data FENCE before requesting that
-+  // all remote RISC-V harts execute a FENCE.I
-+  //
-+  // No such-assurance is defined at the interface level of the builtin
-+  // method, and so we should make sure it works.
-+  __asm__ volatile("fence rw, rw" : : : "memory");
-+  
-+  __builtin___clear_cache(addr, addr + (lines << ICache::log2_line_size));
-+  return magic;
++inline intptr_t* frame::interpreter_frame_expression_stack() const {
++  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
++  return monitor_end-1;
 +}
 +
-+void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
 +
-+  address start = (address)icache_flush;
++// Entry frames
 +
-+  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
++ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
++}
 +
-+  // ICache::invalidate_range() contains explicit condition that the first
-+  // call is invoked on the generated icache flush stub code range.
-+  ICache::invalidate_range(start, 0);
 +
-+  {
-+    StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush");
-+    __ ret();
-+  }
++// Compiled frames
++PRAGMA_DIAG_PUSH
++PRAGMA_NONNULL_IGNORED
++inline oop frame::saved_oop_result(RegisterMap* map) const {
++  oop* result_adr = (oop *)map->location(x10->as_VMReg());
++  guarantee(result_adr != NULL, "bad register save location");
++  return (*result_adr);
 +}
 +
-+#undef __
-diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp
++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
++  oop* result_adr = (oop *)map->location(x10->as_VMReg());
++  guarantee(result_adr != NULL, "bad register save location");
++  *result_adr = obj;
++}
++PRAGMA_DIAG_POP
++
++#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
 new file mode 100644
-index 000000000..a503d3be3
+index 00000000000..1c46b3947d3
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/icache_riscv.hpp
-@@ -0,0 +1,42 @@
++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+@@ -0,0 +1,484 @@
 +/*
-+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -15330,2259 +15412,2292 @@ index 000000000..a503d3be3
 + *
 + */
 +
-+#ifndef CPU_RISCV_ICACHE_RISCV_HPP
-+#define CPU_RISCV_ICACHE_RISCV_HPP
-+
-+// Interface for updating the instruction cache. Whenever the VM
-+// modifies code, part of the processor instruction cache potentially
-+// has to be flushed.
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/g1/g1BarrierSet.hpp"
++#include "gc/g1/g1BarrierSetAssembler.hpp"
++#include "gc/g1/g1BarrierSetRuntime.hpp"
++#include "gc/g1/g1CardTable.hpp"
++#include "gc/g1/g1ThreadLocalData.hpp"
++#include "gc/g1/heapRegion.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.hpp"
++#ifdef COMPILER1
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/g1/c1/g1BarrierSetC1.hpp"
++#endif
 +
-+class ICache : public AbstractICache {
-+public:
-+  enum {
-+    stub_size      = 16,                // Size of the icache flush stub in bytes
-+    line_size      = BytesPerWord,      // conservative
-+    log2_line_size = LogBytesPerWord    // log2(line_size)
-+  };
-+};
++#define __ masm->
 +
-+#endif // CPU_RISCV_ICACHE_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-new file mode 100644
-index 000000000..91deb0ae2
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -0,0 +1,1932 @@
-+/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                            Register addr, Register count, RegSet saved_regs) {
++  assert_cond(masm != NULL);
++  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
++  if (!dest_uninitialized) {
++    Label done;
++    Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "interp_masm_riscv.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "interpreter/interpreterRuntime.hpp"
-+#include "logging/log.hpp"
-+#include "oops/arrayOop.hpp"
-+#include "oops/markOop.hpp"
-+#include "oops/method.hpp"
-+#include "oops/methodData.hpp"
-+#include "prims/jvmtiExport.hpp"
-+#include "prims/jvmtiThreadState.hpp"
-+#include "runtime/basicLock.hpp"
-+#include "runtime/biasedLocking.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/safepointMechanism.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/thread.inline.hpp"
++    // Is marking active?
++    if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++      __ lwu(t0, in_progress);
++    } else {
++      assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++      __ lbu(t0, in_progress);
++    }
++    __ beqz(t0, done);
 +
++    __ push_reg(saved_regs, sp);
++    if (count == c_rarg0) {
++      if (addr == c_rarg1) {
++        // exactly backwards!!
++        __ mv(t0, c_rarg0);
++        __ mv(c_rarg0, c_rarg1);
++        __ mv(c_rarg1, t0);
++      } else {
++        __ mv(c_rarg1, count);
++        __ mv(c_rarg0, addr);
++      }
++    } else {
++      __ mv(c_rarg0, addr);
++      __ mv(c_rarg1, count);
++    }
++    if (UseCompressedOops) {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
++    } else {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
++    }
++    __ pop_reg(saved_regs, sp);
 +
-+void InterpreterMacroAssembler::narrow(Register result) {
-+  // Get method->_constMethod->_result_type
-+  ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize));
-+  ld(t0, Address(t0, Method::const_offset()));
-+  lbu(t0, Address(t0, ConstMethod::result_type_offset()));
++    __ bind(done);
++  }
++}
 +
-+  Label done, notBool, notByte, notChar;
++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                             Register start, Register count, Register tmp, RegSet saved_regs) {
++  assert_cond(masm != NULL);
++  __ push_reg(saved_regs, sp);
++  assert_different_registers(start, count, tmp);
++  assert_different_registers(c_rarg0, count);
++  __ mv(c_rarg0, start);
++  __ mv(c_rarg1, count);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
++  __ pop_reg(saved_regs, sp);
++}
 +
-+  // common case first
-+  mv(t1, T_INT);
-+  beq(t0, t1, done);
++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
++                                                 Register obj,
++                                                 Register pre_val,
++                                                 Register thread,
++                                                 Register tmp,
++                                                 bool tosca_live,
++                                                 bool expand_call) {
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
 +
-+  // mask integer result to narrower return type.
-+  mv(t1, T_BOOLEAN);
-+  bne(t0, t1, notBool);
++  assert_cond(masm != NULL);
++  assert(thread == xthread, "must be");
 +
-+  andi(result, result, 0x1);
-+  j(done);
++  Label done;
++  Label runtime;
 +
-+  bind(notBool);
-+  mv(t1, T_BYTE);
-+  bne(t0, t1, notByte);
-+  sign_extend(result, result, 8);
-+  j(done);
++  assert_different_registers(obj, pre_val, tmp, t0);
++  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
 +
-+  bind(notByte);
-+  mv(t1, T_CHAR);
-+  bne(t0, t1, notChar);
-+  zero_extend(result, result, 16);
-+  j(done);
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
 +
-+  bind(notChar);
-+  sign_extend(result, result, 16);
++  // Is marking active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width
++    __ lwu(tmp, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ lbu(tmp, in_progress);
++  }
++  __ beqz(tmp, done);
 +
-+  // Nothing to do for T_INT
-+  bind(done);
-+  addw(result, result, zr);
-+}
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
++  }
 +
-+void InterpreterMacroAssembler::jump_to_entry(address entry) {
-+  assert(entry != NULL, "Entry must have been generated by now");
-+  j(entry);
-+}
++  // Is the previous value null?
++  __ beqz(pre_val, done);
 +
-+void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
-+  if (JvmtiExport::can_pop_frame()) {
-+    Label L;
-+    // Initiate popframe handling only if it is not already being
-+    // processed. If the flag has the popframe_processing bit set,
-+    // it means that this code is called *during* popframe handling - we
-+    // don't want to reenter.
-+    // This method is only called just after the call into the vm in
-+    // call_VM_base, so the arg registers are available.
-+    lwu(t1, Address(xthread, JavaThread::popframe_condition_offset()));
-+    andi(t0, t1, JavaThread::popframe_pending_bit);
-+    beqz(t0, L);
-+    andi(t0, t1, JavaThread::popframe_processing_bit);
-+    bnez(t0, L);
-+    // Call Interpreter::remove_activation_preserving_args_entry() to get the
-+    // address of the same-named entrypoint in the generated interpreter code.
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
-+    jr(x10);
-+    bind(L);
-+  }
-+}
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
 +
++  __ ld(tmp, index);                       // tmp := *index_adr
++  __ beqz(tmp, runtime);                   // tmp == 0?
++                                           // If yes, goto runtime
 +
-+void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
-+  ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset()));
-+  const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset());
-+  const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset());
-+  const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset());
-+  switch (state) {
-+    case atos:
-+      ld(x10, oop_addr);
-+      sd(zr, oop_addr);
-+      verify_oop(x10);
-+      break;
-+    case ltos:
-+      ld(x10, val_addr);
-+      break;
-+    case btos:  // fall through
-+    case ztos:  // fall through
-+    case ctos:  // fall through
-+    case stos:  // fall through
-+    case itos:
-+      lwu(x10, val_addr);
-+      break;
-+    case ftos:
-+      flw(f10, val_addr);
-+      break;
-+    case dtos:
-+      fld(f10, val_addr);
-+      break;
-+    case vtos:
-+      /* nothing to do */
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+  // Clean up tos value in the thread object
-+  mvw(t0, (int) ilgl);
-+  sw(t0, tos_addr);
-+  sw(zr, val_addr);
-+}
++  __ sub(tmp, tmp, wordSize);              // tmp := tmp - wordSize
++  __ sd(tmp, index);                       // *index_adr := tmp
++  __ ld(t0, buffer);
++  __ add(tmp, tmp, t0);                    // tmp := tmp + *buffer_adr
 +
++  // Record the previous value
++  __ sd(pre_val, Address(tmp, 0));
++  __ j(done);
 +
-+void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
-+  if (JvmtiExport::can_force_early_return()) {
-+    Label L;
-+    ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
-+    beqz(t0, L);  // if [thread->jvmti_thread_state() == NULL] then exit
++  __ bind(runtime);
++  // save the live input values
++  RegSet saved = RegSet::of(pre_val);
++  if (tosca_live) { saved += RegSet::of(x10); }
++  if (obj != noreg) { saved += RegSet::of(obj); }
 +
-+    // Initiate earlyret handling only if it is not already being processed.
-+    // If the flag has the earlyret_processing bit set, it means that this code
-+    // is called *during* earlyret handling - we don't want to reenter.
-+    lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset()));
-+    mv(t1, JvmtiThreadState::earlyret_pending);
-+    bne(t0, t1, L);
++  __ push_reg(saved, sp);
 +
-+    // Call Interpreter::remove_activation_early_entry() to get the address of the
-+    // same-named entrypoint in the generated interpreter code.
-+    ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
-+    lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset()));
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0);
-+    jr(x10);
-+    bind(L);
++  if (expand_call) {
++    assert(pre_val != c_rarg1, "smashed arg");
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
 +  }
-+}
 +
-+void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) {
-+  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
-+  lhu(reg, Address(xbcp, bcp_offset));
-+  revb_h(reg, reg);
-+}
++  __ pop_reg(saved, sp);
 +
-+void InterpreterMacroAssembler::get_dispatch() {
-+  int32_t offset = 0;
-+  la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset);
-+  addi(xdispatch, xdispatch, offset);
-+}
++  __ bind(done);
 +
-+void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
-+                                                       int bcp_offset,
-+                                                       size_t index_size) {
-+  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
-+  if (index_size == sizeof(u2)) {
-+    load_unsigned_short(index, Address(xbcp, bcp_offset));
-+  } else if (index_size == sizeof(u4)) {
-+    lwu(index, Address(xbcp, bcp_offset));
-+    // Check if the secondary index definition is still ~x, otherwise
-+    // we have to change the following assembler code to calculate the
-+    // plain index.
-+    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
-+    xori(index, index, -1);
-+    addw(index, index, zr);
-+  } else if (index_size == sizeof(u1)) {
-+    load_unsigned_byte(index, Address(xbcp, bcp_offset));
-+  } else {
-+    ShouldNotReachHere();
-+  }
 +}
 +
-+// Return
-+// Rindex: index into constant pool
-+// Rcache: address of cache entry - ConstantPoolCache::base_offset()
-+//
-+// A caller must add ConstantPoolCache::base_offset() to Rcache to get
-+// the true address of the cache entry.
-+//
-+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
-+                                                           Register index,
-+                                                           int bcp_offset,
-+                                                           size_t index_size) {
-+  assert_different_registers(cache, index);
-+  assert_different_registers(cache, xcpool);
-+  get_cache_index_at_bcp(index, bcp_offset, index_size);
-+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
-+  // Convert from field index to ConstantPoolCacheEntry
-+  // riscv already has the cache in xcpool so there is no need to
-+  // install it in cache. Instead we pre-add the indexed offset to
-+  // xcpool and return it in cache. All clients of this method need to
-+  // be modified accordingly.
-+  shadd(cache, index, xcpool, cache, 5);
-+}
++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
++                                                  Register store_addr,
++                                                  Register new_val,
++                                                  Register thread,
++                                                  Register tmp,
++                                                  Register tmp2) {
++  assert_cond(masm != NULL);
++  assert(thread == xthread, "must be");
++  assert_different_registers(store_addr, new_val, thread, tmp, tmp2,
++                             t0);
++  assert(store_addr != noreg && new_val != noreg && tmp != noreg &&
++         tmp2 != noreg, "expecting a register");
 +
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
 +
-+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
-+                                                                        Register index,
-+                                                                        Register bytecode,
-+                                                                        int byte_no,
-+                                                                        int bcp_offset,
-+                                                                        size_t index_size) {
-+  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
-+  // We use a 32-bit load here since the layout of 64-bit words on
-+  // little-endian machines allow us that.
-+  // n.b. unlike x86 cache already includes the index offset
-+  la(bytecode, Address(cache,
-+                       ConstantPoolCache::base_offset() +
-+                       ConstantPoolCacheEntry::indices_offset()));
-+  membar(MacroAssembler::AnyAny);
-+  lwu(bytecode, bytecode);
-+  membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+  const int shift_count = (1 + byte_no) * BitsPerByte;
-+  slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte));
-+  srli(bytecode, bytecode, XLEN - BitsPerByte);
-+}
++  BarrierSet* bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
 +
-+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
-+                                                               Register tmp,
-+                                                               int bcp_offset,
-+                                                               size_t index_size) {
-+  assert(cache != tmp, "must use different register");
-+  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
-+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
-+  // Convert from field index to ConstantPoolCacheEntry index
-+  // and from word offset to byte offset
-+  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
-+  ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
-+  // skip past the header
-+  add(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
-+  // construct pointer to cache entry
-+  shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord);
-+}
++  Label done;
++  Label runtime;
 +
-+// Load object from cpool->resolved_references(index)
-+void InterpreterMacroAssembler::load_resolved_reference_at_index(
-+                                Register result, Register index, Register tmp) {
-+  assert_different_registers(result, index);
++  // Does store cross heap regions?
 +
-+  get_constant_pool(result);
-+  // Load pointer for resolved_references[] objArray
-+  ld(result, Address(result, ConstantPool::cache_offset_in_bytes()));
-+  ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
-+  resolve_oop_handle(result, tmp);
-+  // Add in the index
-+  addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
-+  shadd(result, index, result, index, LogBytesPerHeapOop);
-+  load_heap_oop(result, Address(result, 0));
-+}
++  __ xorr(tmp, store_addr, new_val);
++  __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
++  __ beqz(tmp, done);
 +
-+void InterpreterMacroAssembler::load_resolved_klass_at_offset(
-+                                Register cpool, Register index, Register klass, Register temp) {
-+  shadd(temp, index, cpool, temp, LogBytesPerWord);
-+  lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index
-+  ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses
-+  shadd(klass, temp, klass, temp, LogBytesPerWord);
-+  ld(klass, Address(klass, Array<Klass*>::base_offset_in_bytes()));
-+}
++  // crosses regions, storing NULL?
 +
-+// Generate a subtype check: branch to ok_is_subtype if sub_klass is a
-+// subtype of super_klass.
-+//
-+// Args:
-+//      x10: superklass
-+//      Rsub_klass: subklass
-+//
-+// Kills:
-+//      x12, x15
-+void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
-+                                                  Label& ok_is_subtype) {
-+  assert(Rsub_klass != x10, "x10 holds superklass");
-+  assert(Rsub_klass != x12, "x12 holds 2ndary super array length");
-+  assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr");
++  __ beqz(new_val, done);
 +
-+  // Profile the not-null value's klass.
-+  profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15
++  // storing region crossing non-NULL, is card already dirty?
 +
-+  // Do the check.
-+  check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12
++  ExternalAddress cardtable((address) ct->byte_map_base());
++  const Register card_addr = tmp;
 +
-+  // Profile the failure of the check.
-+  profile_typecheck_failed(x12); // blows x12
-+}
++  __ srli(card_addr, store_addr, CardTable::card_shift());
 +
-+// Java Expression Stack
++  // get the address of the card
++  __ load_byte_map_base(tmp2);
++  __ add(card_addr, card_addr, tmp2);
++  __ lbu(tmp2, Address(card_addr));
++  __ mv(t0, (int)G1CardTable::g1_young_card_val());
++  __ beq(tmp2, t0, done);
 +
-+void InterpreterMacroAssembler::pop_ptr(Register r) {
-+  ld(r, Address(esp, 0));
-+  addi(esp, esp, wordSize);
-+}
++  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
 +
-+void InterpreterMacroAssembler::pop_i(Register r) {
-+  lw(r, Address(esp, 0)); // lw do signed extended
-+  addi(esp, esp, wordSize);
-+}
++  __ membar(MacroAssembler::StoreLoad);
 +
-+void InterpreterMacroAssembler::pop_l(Register r) {
-+  ld(r, Address(esp, 0));
-+  addi(esp, esp, 2 * Interpreter::stackElementSize);
-+}
++  __ lbu(tmp2, Address(card_addr));
++  __ beqz(tmp2, done);
 +
-+void InterpreterMacroAssembler::push_ptr(Register r) {
-+  addi(esp, esp, -wordSize);
-+  sd(r, Address(esp, 0));
-+}
++  // storing a region crossing, non-NULL oop, card is clean.
++  // dirty card and log.
 +
-+void InterpreterMacroAssembler::push_i(Register r) {
-+  addi(esp, esp, -wordSize);
-+  addw(r, r, zr); // signed extended
-+  sd(r, Address(esp, 0));
-+}
++  __ sb(zr, Address(card_addr));
 +
-+void InterpreterMacroAssembler::push_l(Register r) {
-+  addi(esp, esp, -2 * wordSize);
-+  sd(zr, Address(esp, wordSize));
-+  sd(r, Address(esp));
-+}
++  __ ld(t0, queue_index);
++  __ beqz(t0, runtime);
++  __ sub(t0, t0, wordSize);
++  __ sd(t0, queue_index);
 +
-+void InterpreterMacroAssembler::pop_f(FloatRegister r) {
-+  flw(r, esp, 0);
-+  addi(esp, esp, wordSize);
-+}
++  __ ld(tmp2, buffer);
++  __ add(t0, tmp2, t0);
++  __ sd(card_addr, Address(t0, 0));
++  __ j(done);
 +
-+void InterpreterMacroAssembler::pop_d(FloatRegister r) {
-+  fld(r, esp, 0);
-+  addi(esp, esp, 2 * Interpreter::stackElementSize);
++  __ bind(runtime);
++  // save the live input values
++  RegSet saved = RegSet::of(store_addr);
++  __ push_reg(saved, sp);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
++  __ pop_reg(saved, sp);
++
++  __ bind(done);
 +}
 +
-+void InterpreterMacroAssembler::push_f(FloatRegister r) {
-+  addi(esp, esp, -wordSize);
-+  fsw(r, Address(esp, 0));
++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
++  assert_cond(masm != NULL);
++  bool on_oop = is_reference_type(type);
++  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
++  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
++  bool on_reference = on_weak || on_phantom;
++  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++  if (on_oop && on_reference) {
++    // RA is live.  It must be saved around calls.
++    __ enter(); // barrier may call runtime
++    // Generate the G1 pre-barrier code to log the value of
++    // the referent field in an SATB buffer.
++    g1_write_barrier_pre(masm /* masm */,
++                         noreg /* obj */,
++                         dst /* pre_val */,
++                         xthread /* thread */,
++                         tmp1 /* tmp */,
++                         true /* tosca_live */,
++                         true /* expand_call */);
++    __ leave();
++  }
 +}
 +
-+void InterpreterMacroAssembler::push_d(FloatRegister r) {
-+  addi(esp, esp, -2 * wordSize);
-+  fsd(r, Address(esp, 0));
-+}
-+
-+void InterpreterMacroAssembler::pop(TosState state) {
-+  switch (state) {
-+    case atos:
-+      pop_ptr();
-+      verify_oop(x10);
-+      break;
-+    case btos:  // fall through
-+    case ztos:  // fall through
-+    case ctos:  // fall through
-+    case stos:  // fall through
-+    case itos:
-+      pop_i();
-+      break;
-+    case ltos:
-+      pop_l();
-+      break;
-+    case ftos:
-+      pop_f();
-+      break;
-+    case dtos:
-+      pop_d();
-+      break;
-+    case vtos:
-+      /* nothing to do */
-+      break;
-+    default:
-+      ShouldNotReachHere();
++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  assert_cond(masm != NULL);
++  // flatten object address if needed
++  if (dst.offset() == 0) {
++    if (dst.base() != x13) {
++      __ mv(x13, dst.base());
++    }
++  } else {
++    __ la(x13, dst);
 +  }
-+}
 +
-+void InterpreterMacroAssembler::push(TosState state) {
-+  switch (state) {
-+    case atos:
-+      verify_oop(x10);
-+      push_ptr();
-+      break;
-+    case btos:  // fall through
-+    case ztos:  // fall through
-+    case ctos:  // fall through
-+    case stos:  // fall through
-+    case itos:
-+      push_i();
-+      break;
-+    case ltos:
-+      push_l();
-+      break;
-+    case ftos:
-+      push_f();
-+      break;
-+    case dtos:
-+      push_d();
-+      break;
-+    case vtos:
-+      /* nothing to do */
-+      break;
-+    default:
-+      ShouldNotReachHere();
++  g1_write_barrier_pre(masm,
++                       x13 /* obj */,
++                       tmp2 /* pre_val */,
++                       xthread /* thread */,
++                       tmp1  /* tmp */,
++                       val != noreg /* tosca_live */,
++                       false /* expand_call */);
++
++  if (val == noreg) {
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg);
++  } else {
++    // G1 barrier needs uncompressed oop for region cross check.
++    Register new_val = val;
++    if (UseCompressedOops) {
++      new_val = t1;
++      __ mv(new_val, val);
++    }
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg);
++    g1_write_barrier_post(masm,
++                          x13 /* store_adr */,
++                          new_val /* new_val */,
++                          xthread /* thread */,
++                          tmp1 /* tmp */,
++                          tmp2 /* tmp2 */);
 +  }
 +}
 +
-+// Helpers for swap and dup
-+void InterpreterMacroAssembler::load_ptr(int n, Register val) {
-+  ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
-+}
++#ifdef COMPILER1
 +
-+void InterpreterMacroAssembler::store_ptr(int n, Register val) {
-+  sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
-+}
++#undef __
++#define __ ce->masm()->
 +
-+void InterpreterMacroAssembler::load_float(Address src) {
-+  flw(f10, src);
-+}
++void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
++  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 +
-+void InterpreterMacroAssembler::load_double(Address src) {
-+  fld(f10, src);
-+}
++  // At this point we know that marking is in progress.
++  // If do_load() is true then we have to emit the
++  // load of the previous value; otherwise it has already
++  // been loaded into _pre_val.
++  __ bind(*stub->entry());
 +
-+void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
-+  // set sender sp
-+  mv(x30, sp);
-+  // record last_sp
-+  sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+}
++  assert(stub->pre_val()->is_register(), "Precondition.");
 +
-+// Jump to from_interpreted entry of a call unless single stepping is possible
-+// in this thread in which case we must call the i2i entry
-+void InterpreterMacroAssembler::jump_from_interpreted(Register method) {
-+  prepare_to_jump_from_interpreted();
-+  if (JvmtiExport::can_post_interpreter_events()) {
-+    Label run_compiled_code;
-+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
-+    // compiled code in threads for which the event is enabled.  Check here for
-+    // interp_only_mode if these events CAN be enabled.
-+    lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
-+    beqz(t0, run_compiled_code);
-+    ld(t0, Address(method, Method::interpreter_entry_offset()));
-+    jr(t0);
-+    bind(run_compiled_code);
-+  }
++  Register pre_val_reg = stub->pre_val()->as_register();
 +
-+  ld(t0, Address(method, Method::from_interpreted_offset()));
-+  jr(t0);
++  if (stub->do_load()) {
++    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
++  }
++  __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
++  ce->store_parameter(stub->pre_val()->as_register(), 0);
++  __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
++  __ j(*stub->continuation());
 +}
 +
-+// The following two routines provide a hook so that an implementation
-+// can schedule the dispatch in two parts.  amd64 does not do this.
-+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
++void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
++  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  __ bind(*stub->entry());
++  assert(stub->addr()->is_register(), "Precondition");
++  assert(stub->new_val()->is_register(), "Precondition");
++  Register new_val_reg = stub->new_val()->as_register();
++  __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true);
++  ce->store_parameter(stub->addr()->as_pointer_register(), 0);
++  __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
++  __ j(*stub->continuation());
 +}
 +
-+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
-+  dispatch_next(state, step);
-+}
++#undef __
 +
-+void InterpreterMacroAssembler::dispatch_base(TosState state,
-+                                              address* table,
-+                                              bool verifyoop,
-+                                              bool generate_poll,
-+                                              Register Rs) {
-+  // Pay attention to the argument Rs, which is acquiesce in t0.
-+  if (VerifyActivationFrameSize) {
-+    Unimplemented();
-+  }
-+  if (verifyoop && state == atos) {
-+    verify_oop(x10);
-+  }
++#define __ sasm->
 +
-+  Label safepoint;
-+  address* const safepoint_table = Interpreter::safept_table(state);
-+  bool needs_thread_local_poll = generate_poll &&
-+    SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
++void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("g1_pre_barrier", false);
 +
-+  if (needs_thread_local_poll) {
-+    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
-+    ld(t1, Address(xthread, Thread::polling_page_offset()));
-+    andi(t1, t1, 1 << exact_log2(SafepointMechanism::poll_bit()));
-+    bnez(t1, safepoint);
-+  }
-+  if (table == Interpreter::dispatch_table(state)) {
-+    mv(t1, Interpreter::distance_from_dispatch_table(state));
-+    add(t1, Rs, t1);
-+    shadd(t1, t1, xdispatch, t1, 3);
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  // arg0 : previous value of memory
++  const Register pre_val = x10;
++  const Register thread = xthread;
++  const Register tmp = t0;
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  Label done;
++  Label runtime;
++
++  // Is marking still active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {  // 4-byte width
++    __ lwu(tmp, in_progress);
 +  } else {
-+    mv(t1, (address)table);
-+    shadd(t1, Rs, t1, Rs, 3);
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ lbu(tmp, in_progress);
 +  }
-+  ld(t1, Address(t1));
-+  jr(t1);
++  __ beqz(tmp, done);
 +
-+  if (needs_thread_local_poll) {
-+    bind(safepoint);
-+    la(t1, ExternalAddress((address)safepoint_table));
-+    shadd(t1, Rs, t1, Rs, 3);
-+    ld(t1, Address(t1));
-+    jr(t1);
-+  }
-+}
++  // Can we store original value in the thread's buffer?
++  __ ld(tmp, queue_index);
++  __ beqz(tmp, runtime);
 +
-+void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) {
-+  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs);
-+}
++  __ sub(tmp, tmp, wordSize);
++  __ sd(tmp, queue_index);
++  __ ld(t1, buffer);
++  __ add(tmp, tmp, t1);
++  __ load_parameter(0, t1);
++  __ sd(t1, Address(tmp, 0));
++  __ j(done);
 +
-+void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) {
-+  dispatch_base(state, Interpreter::normal_table(state), Rs);
-+}
++  __ bind(runtime);
++  __ push_call_clobbered_registers();
++  __ load_parameter(0, pre_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  __ pop_call_clobbered_registers();
++  __ bind(done);
 +
-+void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) {
-+  dispatch_base(state, Interpreter::normal_table(state), false, Rs);
++  __ epilogue();
 +}
 +
-+void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
-+  // load next bytecode
-+  load_unsigned_byte(t0, Address(xbcp, step));
-+  add(xbcp, xbcp, step);
-+  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
-+}
++void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("g1_post_barrier", false);
 +
-+void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
-+  // load current bytecode
-+  lbu(t0, Address(xbcp, 0));
-+  dispatch_base(state, table);
-+}
++  // arg0 : store_address
++  Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp
 +
-+// remove activation
-+//
-+// Unlock the receiver if this is a synchronized method.
-+// Unlock any Java monitors from syncronized blocks.
-+// Remove the activation from the stack.
-+//
-+// If there are locked Java monitors
-+//    If throw_monitor_exception
-+//       throws IllegalMonitorStateException
-+//    Else if install_monitor_exception
-+//       installs IllegalMonitorStateException
-+//    Else
-+//       no error processing
-+void InterpreterMacroAssembler::remove_activation(
-+                                TosState state,
-+                                bool throw_monitor_exception,
-+                                bool install_monitor_exception,
-+                                bool notify_jvmdi) {
-+  // Note: Registers x13 may be in use for the
-+  // result check if synchronized method
-+  Label unlocked, unlock, no_unlock;
++  BarrierSet* bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
 +
-+  // get the value of _do_not_unlock_if_synchronized into x13
-+  const Address do_not_unlock_if_synchronized(xthread,
-+    in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
-+  lbu(x13, do_not_unlock_if_synchronized);
-+  sb(zr, do_not_unlock_if_synchronized); // reset the flag
++  Label done;
++  Label runtime;
 +
-+  // get method access flags
-+  ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize));
-+  ld(x12, Address(x11, Method::access_flags_offset()));
-+  andi(t0, x12, JVM_ACC_SYNCHRONIZED);
-+  beqz(t0, unlocked);
++  // At this point we know new_value is non-NULL and the new_value crosses regions.
++  // Must check to see if card is already dirty
++  const Register thread = xthread;
 +
-+  // Don't unlock anything if the _do_not_unlock_if_synchronized flag
-+  // is set.
-+  bnez(x13, no_unlock);
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
 +
-+  // unlock monitor
-+  push(state); // save result
++  const Register card_offset = t1;
++  // RA is free here, so we can use it to hold the byte_map_base.
++  const Register byte_map_base = ra;
 +
-+  // BasicObjectLock will be first in list, since this is a
-+  // synchronized method. However, need to check that the object has
-+  // not been unlocked by an explicit monitorexit bytecode.
-+  const Address monitor(fp, frame::interpreter_frame_initial_sp_offset *
-+                        wordSize - (int) sizeof(BasicObjectLock));
-+  // We use c_rarg1 so that if we go slow path it will be the correct
-+  // register for unlock_object to pass to VM directly
-+  la(c_rarg1, monitor); // address of first monitor
++  assert_different_registers(card_offset, byte_map_base, t0);
 +
-+  ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
-+  bnez(x10, unlock);
++  __ load_parameter(0, card_offset);
++  __ srli(card_offset, card_offset, CardTable::card_shift());
++  __ load_byte_map_base(byte_map_base);
 +
-+  pop(state);
-+  if (throw_monitor_exception) {
-+    // Entry already unlocked, need to throw exception
-+    call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                    InterpreterRuntime::throw_illegal_monitor_state_exception));
-+    should_not_reach_here();
-+  } else {
-+    // Monitor already unlocked during a stack unroll. If requested,
-+    // install an illegal_monitor_state_exception.  Continue with
-+    // stack unrolling.
-+    if (install_monitor_exception) {
-+      call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                      InterpreterRuntime::new_illegal_monitor_state_exception));
-+    }
-+    j(unlocked);
-+  }
++  // Convert card offset into an address in card_addr
++  Register card_addr = card_offset;
++  __ add(card_addr, byte_map_base, card_addr);
 +
-+  bind(unlock);
-+  unlock_object(c_rarg1);
-+  pop(state);
++  __ lbu(t0, Address(card_addr, 0));
++  __ sub(t0, t0, (int)G1CardTable::g1_young_card_val());
++  __ beqz(t0, done);
 +
-+  // Check that for block-structured locking (i.e., that all locked
-+  // objects has been unlocked)
-+  bind(unlocked);
++  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
 +
-+  // x10: Might contain return value
++  __ membar(MacroAssembler::StoreLoad);
++  __ lbu(t0, Address(card_addr, 0));
++  __ beqz(t0, done);
 +
-+  // Check that all monitors are unlocked
-+  {
-+    Label loop, exception, entry, restart;
-+    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
-+    const Address monitor_block_top(
-+      fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
-+    const Address monitor_block_bot(
-+      fp, frame::interpreter_frame_initial_sp_offset * wordSize);
++  // storing region crossing non-NULL, card is clean.
++  // dirty card and log.
++  __ sb(zr, Address(card_addr, 0));
 +
-+    bind(restart);
-+    // We use c_rarg1 so that if we go slow path it will be the correct
-+    // register for unlock_object to pass to VM directly
-+    ld(c_rarg1, monitor_block_top); // points to current entry, starting
-+                                     // with top-most entry
-+    la(x9, monitor_block_bot);  // points to word before bottom of
-+                                  // monitor block
++  __ ld(t0, queue_index);
++  __ beqz(t0, runtime);
++  __ sub(t0, t0, wordSize);
++  __ sd(t0, queue_index);
 +
-+    j(entry);
++  // Reuse RA to hold buffer_addr
++  const Register buffer_addr = ra;
 +
-+    // Entry already locked, need to throw exception
-+    bind(exception);
++  __ ld(buffer_addr, buffer);
++  __ add(t0, buffer_addr, t0);
++  __ sd(card_addr, Address(t0, 0));
++  __ j(done);
 +
-+    if (throw_monitor_exception) {
-+      // Throw exception
-+      MacroAssembler::call_VM(noreg,
-+                              CAST_FROM_FN_PTR(address, InterpreterRuntime::
-+                                               throw_illegal_monitor_state_exception));
++  __ bind(runtime);
++  __ push_call_clobbered_registers();
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
++  __ pop_call_clobbered_registers();
++  __ bind(done);
++  __ epilogue();
++}
 +
-+      should_not_reach_here();
-+    } else {
-+      // Stack unrolling. Unlock object and install illegal_monitor_exception.
-+      // Unlock does not block, so don't have to worry about the frame.
-+      // We don't have to preserve c_rarg1 since we are going to throw an exception.
++#undef __
 +
-+      push(state);
-+      unlock_object(c_rarg1);
-+      pop(state);
++#endif // COMPILER1
+diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
+new file mode 100644
+index 00000000000..37bc183f39c
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
+@@ -0,0 +1,78 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+      if (install_monitor_exception) {
-+        call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                        InterpreterRuntime::
-+                                        new_illegal_monitor_state_exception));
-+      }
++#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
 +
-+      j(restart);
-+    }
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++#include "utilities/macros.hpp"
 +
-+    bind(loop);
-+    // check if current entry is used
-+    add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes());
-+    ld(t0, Address(t0, 0));
-+    bnez(t0, exception);
++#ifdef COMPILER1
++class LIR_Assembler;
++#endif
++class StubAssembler;
++class G1PreBarrierStub;
++class G1PostBarrierStub;
 +
-+    add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry
-+    bind(entry);
-+    bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry
-+  }
++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
++protected:
++  void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                       Register addr, Register count, RegSet saved_regs);
++  void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                        Register start, Register count, Register tmp, RegSet saved_regs);
 +
-+  bind(no_unlock);
++  void g1_write_barrier_pre(MacroAssembler* masm,
++                            Register obj,
++                            Register pre_val,
++                            Register thread,
++                            Register tmp,
++                            bool tosca_live,
++                            bool expand_call);
 +
-+  // jvmti support
-+  if (notify_jvmdi) {
-+    notify_method_exit(state, NotifyJVMTI);    // preserve TOSCA
++  void g1_write_barrier_post(MacroAssembler* masm,
++                             Register store_addr,
++                             Register new_val,
++                             Register thread,
++                             Register tmp,
++                             Register tmp2);
 +
-+  } else {
-+    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
-+  }
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
 +
-+  // remove activation
-+  // get sender esp
-+  ld(t1,
-+     Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
-+  if (StackReservedPages > 0) {
-+    // testing if reserved zone needs to be re-enabled
-+    Label no_reserved_zone_enabling;
++public:
++#ifdef COMPILER1
++  void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
++  void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
 +
-+    ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
-+    ble(t1, t0, no_reserved_zone_enabling);
++  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
++#endif
 +
-+    call_VM_leaf(
-+      CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread);
-+    call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                    InterpreterRuntime::throw_delayed_StackOverflowError));
-+    should_not_reach_here();
++  void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++               Register dst, Address src, Register tmp1, Register tmp_thread);
++};
 +
-+    bind(no_reserved_zone_enabling);
-+  }
-+
-+  // restore sender esp
-+  mv(esp, t1);
-+  // remove frame anchor
-+  leave();
-+  // If we're returning to interpreted code we will shortly be
-+  // adjusting SP to allow some space for ESP.  If we're returning to
-+  // compiled code the saved sender SP was saved in sender_sp, so this
-+  // restores it.
-+  andi(sp, esp, -16);
-+}
-+
-+// Lock object
-+//
-+// Args:
-+//      c_rarg1: BasicObjectLock to be used for locking
-+//
-+// Kills:
-+//      x10
-+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
-+//      t0, t1 (temp regs)
-+void InterpreterMacroAssembler::lock_object(Register lock_reg)
-+{
-+  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
-+  if (UseHeavyMonitors) {
-+    call_VM(noreg,
-+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
-+            lock_reg);
-+  } else {
-+    Label done;
-+
-+    const Register swap_reg = x10;
-+    const Register tmp = c_rarg2;
-+    const Register obj_reg = c_rarg3; // Will contain the oop
++#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
+new file mode 100644
+index 00000000000..8735fd014ff
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
-+    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
-+    const int mark_offset = lock_offset +
-+                            BasicLock::displaced_header_offset_in_bytes();
++#ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
++#define CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
 +
-+    Label slow_case;
++const size_t G1MergeHeapRootsPrefetchCacheSize = 16;
 +
-+    // Load object pointer into obj_reg c_rarg3
-+    ld(obj_reg, Address(lock_reg, obj_offset));
++#endif // CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
+new file mode 100644
+index 00000000000..3c115a2ea02
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
+@@ -0,0 +1,302 @@
++/*
++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    if (UseBiasedLocking) {
-+      biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
-+    }
++#include "precompiled.hpp"
++#include "classfile/classLoaderData.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/barrierSetNMethod.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "memory/universe.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.hpp"
 +
-+    // Load (object->mark() | 1) into swap_reg
-+    ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-+    ori(swap_reg, t0, 1);
++#define __ masm->
 +
-+    // Save (object->mark() | 1) into BasicLock's displaced header
-+    sd(swap_reg, Address(lock_reg, mark_offset));
++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
++  assert_cond(masm != NULL);
 +
-+    assert(lock_offset == 0,
-+           "displached header must be first word in BasicObjectLock");
++  // RA is live. It must be saved around calls.
 +
-+    if (PrintBiasedLockingStatistics) {
-+      Label fail, fast;
-+      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail);
-+      bind(fast);
-+      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
-+                  t1, t0);
-+      j(done);
-+      bind(fail);
-+    } else {
-+      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL);
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++  switch (type) {
++    case T_OBJECT:  // fall through
++    case T_ARRAY: {
++      if (in_heap) {
++        if (UseCompressedOops) {
++          __ lwu(dst, src);
++          if (is_not_null) {
++            __ decode_heap_oop_not_null(dst);
++          } else {
++            __ decode_heap_oop(dst);
++          }
++        } else {
++          __ ld(dst, src);
++        }
++      } else {
++        assert(in_native, "why else?");
++        __ ld(dst, src);
++      }
++      break;
 +    }
++    case T_BOOLEAN: __ load_unsigned_byte (dst, src); break;
++    case T_BYTE:    __ load_signed_byte   (dst, src); break;
++    case T_CHAR:    __ load_unsigned_short(dst, src); break;
++    case T_SHORT:   __ load_signed_short  (dst, src); break;
++    case T_INT:     __ lw                 (dst, src); break;
++    case T_LONG:    __ ld                 (dst, src); break;
++    case T_ADDRESS: __ ld                 (dst, src); break;
++    case T_FLOAT:   __ flw                (f10, src); break;
++    case T_DOUBLE:  __ fld                (f10, src); break;
++    default: Unimplemented();
++  }
++}
 +
-+    // Test if the oopMark is an obvious stack pointer, i.e.,
-+    //  1) (mark & 7) == 0, and
-+    //  2) sp <= mark < mark + os::pagesize()
-+    //
-+    // These 3 tests can be done by evaluating the following
-+    // expression: ((mark - sp) & (7 - os::vm_page_size())),
-+    // assuming both stack pointer and pagesize have their
-+    // least significant 3 bits clear.
-+    // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg
-+    sub(swap_reg, swap_reg, sp);
-+    mv(t0, (int64_t)(7 - os::vm_page_size()));
-+    andr(swap_reg, swap_reg, t0);
-+
-+    // Save the test result, for recursive case, the result is zero
-+    sd(swap_reg, Address(lock_reg, mark_offset));
-+
-+    if (PrintBiasedLockingStatistics) {
-+      bnez(swap_reg, slow_case);
-+      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
-+                  t1, t0);
++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                   Address dst, Register val, Register tmp1, Register tmp2) {
++  assert_cond(masm != NULL);
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  switch (type) {
++    case T_OBJECT: // fall through
++    case T_ARRAY: {
++      val = val == noreg ? zr : val;
++      if (in_heap) {
++        if (UseCompressedOops) {
++          assert(!dst.uses(val), "not enough registers");
++          if (val != zr) {
++            __ encode_heap_oop(val);
++          }
++          __ sw(val, dst);
++        } else {
++          __ sd(val, dst);
++        }
++      } else {
++        assert(in_native, "why else?");
++        __ sd(val, dst);
++      }
++      break;
 +    }
-+    beqz(swap_reg, done);
-+
-+    bind(slow_case);
-+
-+    // Call the runtime routine for slow case
-+    call_VM(noreg,
-+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
-+            lock_reg);
-+
-+    bind(done);
++    case T_BOOLEAN:
++      __ andi(val, val, 0x1);  // boolean is true if LSB is 1
++      __ sb(val, dst);
++      break;
++    case T_BYTE:    __ sb(val, dst); break;
++    case T_CHAR:    __ sh(val, dst); break;
++    case T_SHORT:   __ sh(val, dst); break;
++    case T_INT:     __ sw(val, dst); break;
++    case T_LONG:    __ sd(val, dst); break;
++    case T_ADDRESS: __ sd(val, dst); break;
++    case T_FLOAT:   __ fsw(f10,  dst); break;
++    case T_DOUBLE:  __ fsd(f10,  dst); break;
++    default: Unimplemented();
 +  }
++
 +}
 +
++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                                        Register obj, Register tmp, Label& slowpath) {
++  assert_cond(masm != NULL);
++  // If mask changes we need to ensure that the inverse is still encodable as an immediate
++  STATIC_ASSERT(JNIHandles::weak_tag_mask == 1);
++  __ andi(obj, obj, ~JNIHandles::weak_tag_mask);
++  __ ld(obj, Address(obj, 0));             // *obj
++}
 +
-+// Unlocks an object. Used in monitorexit bytecode and
-+// remove_activation.  Throws an IllegalMonitorException if object is
-+// not locked by current thread.
-+//
-+// Args:
-+//      c_rarg1: BasicObjectLock for lock
-+//
-+// Kills:
-+//      x10
-+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
-+//      t0, t1 (temp regs)
-+void InterpreterMacroAssembler::unlock_object(Register lock_reg)
-+{
-+  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
++// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register tmp1,
++                                        Register tmp2,
++                                        Label& slow_case,
++                                        bool is_far) {
++  assert_cond(masm != NULL);
++  assert_different_registers(obj, tmp2);
++  assert_different_registers(obj, var_size_in_bytes);
++  Register end = tmp2;
 +
-+  if (UseHeavyMonitors) {
-+    call_VM(noreg,
-+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
-+            lock_reg);
++  __ ld(obj, Address(xthread, JavaThread::tlab_top_offset()));
++  if (var_size_in_bytes == noreg) {
++    __ la(end, Address(obj, con_size_in_bytes));
 +  } else {
-+    Label done;
-+
-+    const Register swap_reg   = x10;
-+    const Register header_reg = c_rarg2;  // Will contain the old oopMark
-+    const Register obj_reg    = c_rarg3;  // Will contain the oop
-+
-+    save_bcp(); // Save in case of exception
++    __ add(end, obj, var_size_in_bytes);
++  }
++  __ ld(t0, Address(xthread, JavaThread::tlab_end_offset()));
++  __ bgtu(end, t0, slow_case, is_far);
 +
-+    // Convert from BasicObjectLock structure to object and BasicLock
-+    // structure Store the BasicLock address into x10
-+    la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
++  // update the tlab top pointer
++  __ sd(end, Address(xthread, JavaThread::tlab_top_offset()));
 +
-+    // Load oop into obj_reg(c_rarg3)
-+    ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
++  // recover var_size_in_bytes if necessary
++  if (var_size_in_bytes == end) {
++    __ sub(var_size_in_bytes, var_size_in_bytes, obj);
++  }
++}
 +
-+    // Free entry
-+    sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
++// Defines obj, preserves var_size_in_bytes
++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register tmp1,
++                                        Label& slow_case,
++                                        bool is_far) {
++  assert_cond(masm != NULL);
++  assert_different_registers(obj, var_size_in_bytes, tmp1);
++  if (!Universe::heap()->supports_inline_contig_alloc()) {
++    __ j(slow_case);
++  } else {
++    Register end = tmp1;
++    Label retry;
++    __ bind(retry);
 +
-+    if (UseBiasedLocking) {
-+      biased_locking_exit(obj_reg, header_reg, done);
++    // Get the current end of the heap
++    ExternalAddress address_end((address) Universe::heap()->end_addr());
++    {
++      int32_t offset;
++      __ la_patchable(t1, address_end, offset);
++      __ ld(t1, Address(t1, offset));
 +    }
 +
-+    // Load the old header from BasicLock structure
-+    ld(header_reg, Address(swap_reg,
-+                           BasicLock::displaced_header_offset_in_bytes()));
++    // Get the current top of the heap
++    ExternalAddress address_top((address) Universe::heap()->top_addr());
++    {
++      int32_t offset;
++      __ la_patchable(t0, address_top, offset);
++      __ addi(t0, t0, offset);
++      __ lr_d(obj, t0, Assembler::aqrl);
++    }
 +
-+    // Test for recursion
-+    beqz(header_reg, done);
++    // Adjust it my the size of our new object
++    if (var_size_in_bytes == noreg) {
++      __ la(end, Address(obj, con_size_in_bytes));
++    } else {
++      __ add(end, obj, var_size_in_bytes);
++    }
 +
-+    // Atomic swap back the old header
-+    cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL);
++    // if end < obj then we wrapped around high memory
++    __ bltu(end, obj, slow_case, is_far);
 +
-+    // Call the runtime routine for slow case.
-+    sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
-+    call_VM(noreg,
-+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
-+            lock_reg);
++    __ bgtu(end, t1, slow_case, is_far);
 +
-+    bind(done);
++    // If heap_top hasn't been changed by some other thread, update it.
++    __ sc_d(t1, end, t0, Assembler::rl);
++    __ bnez(t1, retry);
 +
-+    restore_bcp();
++    incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1);
 +  }
 +}
 +
++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
++                                               Register var_size_in_bytes,
++                                               int con_size_in_bytes,
++                                               Register tmp1) {
++  assert_cond(masm != NULL);
++  assert(tmp1->is_valid(), "need temp reg");
 +
-+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
-+                                                         Label& zero_continue) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
-+  beqz(mdp, zero_continue);
++  __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
++  if (var_size_in_bytes->is_valid()) {
++    __ add(tmp1, tmp1, var_size_in_bytes);
++  } else {
++    __ add(tmp1, tmp1, con_size_in_bytes);
++  }
++  __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
 +}
 +
-+// Set the method data pointer for the current bcp.
-+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  Label set_mdp;
-+  push_reg(RegSet::of(x10, x11), sp); // save x10, x11
++void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
++  BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
 +
-+  // Test MDO to avoid the call if it is NULL.
-+  ld(x10, Address(xmethod, in_bytes(Method::method_data_offset())));
-+  beqz(x10, set_mdp);
-+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp);
-+  // x10: mdi
-+  // mdo is guaranteed to be non-zero here, we checked for it before the call.
-+  ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
-+  la(x11, Address(x11, in_bytes(MethodData::data_offset())));
-+  add(x10, x11, x10);
-+  sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
-+  bind(set_mdp);
-+  pop_reg(RegSet::of(x10, x11), sp);
-+}
++  if (bs_nm == NULL) {
++    return;
++  }
 +
-+void InterpreterMacroAssembler::verify_method_data_pointer() {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+#ifdef ASSERT
-+  Label verify_continue;
-+  add(sp, sp, -4 * wordSize);
-+  sd(x10, Address(sp, 0));
-+  sd(x11, Address(sp, wordSize));
-+  sd(x12, Address(sp, 2 * wordSize));
-+  sd(x13, Address(sp, 3 * wordSize));
-+  test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue
-+  get_method(x11);
++  // RISCV atomic operations require that the memory address be naturally aligned.
++  __ align(4);
 +
-+  // If the mdp is valid, it will point to a DataLayout header which is
-+  // consistent with the bcp.  The converse is highly probable also.
-+  lh(x12, Address(x13, in_bytes(DataLayout::bci_offset())));
-+  ld(t0, Address(x11, Method::const_offset()));
-+  add(x12, x12, t0);
-+  la(x12, Address(x12, ConstMethod::codes_offset()));
-+  beq(x12, xbcp, verify_continue);
-+  // x10: method
-+  // xbcp: bcp // xbcp == 22
-+  // x13: mdp
-+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp),
-+               x11, xbcp, x13);
-+  bind(verify_continue);
-+  ld(x10, Address(sp, 0));
-+  ld(x11, Address(sp, wordSize));
-+  ld(x12, Address(sp, 2 * wordSize));
-+  ld(x13, Address(sp, 3 * wordSize));
-+  add(sp, sp, 4 * wordSize);
-+#endif // ASSERT
-+}
++  Label skip, guard;
++  Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset()));
 +
++  __ lwu(t0, guard);
 +
-+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
-+                                                int constant,
-+                                                Register value) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  Address data(mdp_in, constant);
-+  sd(value, data);
-+}
++  // Subsequent loads of oops must occur after load of guard value.
++  // BarrierSetNMethod::disarm sets guard with release semantics.
++  __ membar(MacroAssembler::LoadLoad);
++  __ lwu(t1, thread_disarmed_addr);
++  __ beq(t0, t1, skip);
 +
++  int32_t offset = 0;
++  __ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset);
++  __ jalr(ra, t0, offset);
++  __ j(skip);
 +
-+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
-+                                                      int constant,
-+                                                      bool decrement) {
-+  increment_mdp_data_at(mdp_in, noreg, constant, decrement);
-+}
++  __ bind(guard);
 +
-+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
-+                                                      Register reg,
-+                                                      int constant,
-+                                                      bool decrement) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  // %%% this does 64bit counters at best it is wasting space
-+  // at worst it is a rare bug when counters overflow
++  assert(__ offset() % 4 == 0, "bad alignment");
++  __ emit_int32(0); // nmethod guard value. Skipped over in common case.
 +
-+  assert_different_registers(t1, t0, mdp_in, reg);
++  __ bind(skip);
++}
 +
-+  Address addr1(mdp_in, constant);
-+  Address addr2(t1, 0);
-+  Address &addr = addr1;
-+  if (reg != noreg) {
-+    la(t1, addr1);
-+    add(t1, t1, reg);
-+    addr = addr2;
++void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
++  BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
++  if (bs == NULL) {
++    return;
 +  }
 +
-+  if (decrement) {
-+    ld(t0, addr);
-+    addi(t0, t0, -DataLayout::counter_increment);
-+    Label L;
-+    bltz(t0, L);      // skip store if counter underflow
-+    sd(t0, addr);
-+    bind(L);
-+  } else {
-+    assert(DataLayout::counter_increment == 1,
-+           "flow-free idiom only works with 1");
-+    ld(t0, addr);
-+    addi(t0, t0, DataLayout::counter_increment);
-+    Label L;
-+    blez(t0, L);       // skip store if counter overflow
-+    sd(t0, addr);
-+    bind(L);
-+  }
-+}
++  Label bad_call;
++  __ beqz(xmethod, bad_call);
 +
-+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
-+                                                int flag_byte_constant) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  int flags_offset = in_bytes(DataLayout::flags_offset());
-+  // Set the flag
-+  lbu(t1, Address(mdp_in, flags_offset));
-+  ori(t1, t1, flag_byte_constant);
-+  sb(t1, Address(mdp_in, flags_offset));
-+}
++  // Pointer chase to the method holder to find out if the method is concurrently unloading.
++  Label method_live;
++  __ load_method_holder_cld(t0, xmethod);
 +
++  // Is it a strong CLD?
++  __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_offset()));
++  __ bnez(t1, method_live);
 +
-+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
-+                                                 int offset,
-+                                                 Register value,
-+                                                 Register test_value_out,
-+                                                 Label& not_equal_continue) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  if (test_value_out == noreg) {
-+    ld(t1, Address(mdp_in, offset));
-+    bne(value, t1, not_equal_continue);
-+  } else {
-+    // Put the test value into a register, so caller can use it:
-+    ld(test_value_out, Address(mdp_in, offset));
-+    bne(value, test_value_out, not_equal_continue);
-+  }
-+}
++  // Is it a weak but alive CLD?
++  __ push_reg(RegSet::of(x28, x29), sp);
 +
++  __ ld(x28, Address(t0, ClassLoaderData::holder_offset()));
 +
-+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
-+                                                     int offset_of_disp) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  ld(t1, Address(mdp_in, offset_of_disp));
-+  add(mdp_in, mdp_in, t1);
-+  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
-+}
++  // Uses x28 & x29, so we must pass new temporaries.
++  __ resolve_weak_handle(x28, x29);
++  __ mv(t0, x28);
 +
-+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
-+                                                     Register reg,
-+                                                     int offset_of_disp) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  add(t1, mdp_in, reg);
-+  ld(t1, Address(t1, offset_of_disp));
-+  add(mdp_in, mdp_in, t1);
-+  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
-+}
++  __ pop_reg(RegSet::of(x28, x29), sp);
 +
++  __ bnez(t0, method_live);
 +
-+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
-+                                                       int constant) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  addi(mdp_in, mdp_in, constant);
-+  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++  __ bind(bad_call);
++
++  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
++  __ bind(method_live);
 +}
+diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
+new file mode 100644
+index 00000000000..b85f7f5582b
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
+@@ -0,0 +1,79 @@
++/*
++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
++#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
 +
-+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetNMethod.hpp"
++#include "memory/allocation.hpp"
++#include "oops/access.hpp"
 +
-+  // save/restore across call_VM
-+  addi(sp, sp, -2 * wordSize);
-+  sd(zr, Address(sp, 0));
-+  sd(return_bci, Address(sp, wordSize));
-+  call_VM(noreg,
-+          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
-+          return_bci);
-+  ld(zr, Address(sp, 0));
-+  ld(return_bci, Address(sp, wordSize));
-+  addi(sp, sp, 2 * wordSize);
-+}
++class BarrierSetAssembler: public CHeapObj<mtGC> {
++private:
++  void incr_allocated_bytes(MacroAssembler* masm,
++                            Register var_size_in_bytes, int con_size_in_bytes,
++                            Register t1 = noreg);
 +
-+void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
-+                                                     Register bumped_count) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register src, Register dst, Register count, RegSet saved_regs) {}
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register start, Register end, Register tmp, RegSet saved_regs) {}
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
 +
-+    // If no method data exists, go to profile_continue.
-+    // Otherwise, assign to mdp
-+    test_method_data_pointer(mdp, profile_continue);
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
 +
-+    // We are taking a branch.  Increment the taken count.
-+    Address data(mdp, in_bytes(JumpData::taken_offset()));
-+    ld(bumped_count, data);
-+    assert(DataLayout::counter_increment == 1,
-+            "flow-free idiom only works with 1");
-+    addi(bumped_count, bumped_count, DataLayout::counter_increment);
-+    Label L;
-+    // eg: bumped_count=0x7fff ffff ffff ffff  + 1 < 0. so we use <= 0;
-+    blez(bumped_count, L);       // skip store if counter overflow,
-+    sd(bumped_count, data);
-+    bind(L);
-+    // The method data pointer needs to be updated to reflect the new target.
-+    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
-+    bind(profile_continue);
-+  }
-+}
++  virtual void tlab_allocate(MacroAssembler* masm,
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register tmp1,                     // temp register
++    Register tmp2,                     // temp register
++    Label&   slow_case,                // continuation point if fast allocation fails
++    bool is_far = false
++  );
 +
-+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
++  void eden_allocate(MacroAssembler* masm,
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register tmp1,                     // temp register
++    Label&   slow_case,                // continuation point if fast allocation fails
++    bool is_far = false
++  );
++  virtual void barrier_stubs_init() {}
 +
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
++  virtual void nmethod_entry_barrier(MacroAssembler* masm);
++  virtual void c2i_entry_barrier(MacroAssembler* masm);
++  virtual ~BarrierSetAssembler() {}
++};
 +
-+    // We are taking a branch.  Increment the not taken count.
-+    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
++#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
+new file mode 100644
+index 00000000000..ae7ee4c5a44
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
+@@ -0,0 +1,171 @@
++/*
++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    // The method data pointer needs to be updated to correspond to
-+    // the next bytecode
-+    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
-+    bind(profile_continue);
-+  }
-+}
++#include "precompiled.hpp"
++#include "code/codeCache.hpp"
++#include "code/nativeInst.hpp"
++#include "gc/shared/barrierSetNMethod.hpp"
++#include "logging/log.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/registerMap.hpp"
++#include "runtime/thread.hpp"
++#include "utilities/align.hpp"
++#include "utilities/debug.hpp"
 +
-+void InterpreterMacroAssembler::profile_call(Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
++class NativeNMethodBarrier: public NativeInstruction {
++  address instruction_address() const { return addr_at(0); }
 +
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
++  int *guard_addr() {
++    /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */
++    return reinterpret_cast<int*>(instruction_address() + 12 * 4);
++  }
 +
-+    // We are making a call.  Increment the count.
-+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++public:
++  int get_value() {
++    return Atomic::load_acquire(guard_addr());
++  }
 +
-+    // The method data pointer needs to be updated to reflect the new target.
-+    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
-+    bind(profile_continue);
++  void set_value(int value) {
++    Atomic::release_store(guard_addr(), value);
 +  }
-+}
 +
-+void InterpreterMacroAssembler::profile_final_call(Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
++  void verify() const;
++};
 +
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
++// Store the instruction bitmask, bits and name for checking the barrier.
++struct CheckInsn {
++  uint32_t mask;
++  uint32_t bits;
++  const char *name;
++};
 +
-+    // We are making a call.  Increment the count.
-+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++static const struct CheckInsn barrierInsn[] = {
++  { 0x00000fff, 0x00000297, "auipc  t0, 0           "},
++  { 0x000fffff, 0x0002e283, "lwu    t0, 48(t0)      "},
++  { 0xffffffff, 0x0aa0000f, "fence  ir, ir          "},
++  { 0x000fffff, 0x000be303, "lwu    t1, 112(xthread)"},
++  { 0x01fff07f, 0x00628063, "beq    t0, t1, skip    "},
++  { 0x00000fff, 0x000002b7, "lui    t0, imm0        "},
++  { 0x000fffff, 0x00028293, "addi   t0, t0, imm1    "},
++  { 0xffffffff, 0x00b29293, "slli   t0, t0, 11      "},
++  { 0x000fffff, 0x00028293, "addi   t0, t0, imm2    "},
++  { 0xffffffff, 0x00529293, "slli   t0, t0, 5       "},
++  { 0x000fffff, 0x000280e7, "jalr   ra, imm3(t0)    "},
++  { 0x00000fff, 0x0000006f, "j      skip            "}
++  /* guard: */
++  /* 32bit nmethod guard value */
++  /* skip: */
++};
 +
-+    // The method data pointer needs to be updated to reflect the new target.
-+    update_mdp_by_constant(mdp,
-+                           in_bytes(VirtualCallData::
-+                                    virtual_call_data_size()));
-+    bind(profile_continue);
++// The encodings must match the instructions emitted by
++// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific
++// register numbers and immediate values in the encoding.
++void NativeNMethodBarrier::verify() const {
++  intptr_t addr = (intptr_t) instruction_address();
++  for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) {
++    uint32_t inst = *((uint32_t*) addr);
++    if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) {
++      tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst);
++      fatal("not an %s instruction.", barrierInsn[i].name);
++    }
++    addr += 4;
 +  }
 +}
 +
 +
-+void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
-+                                                     Register mdp,
-+                                                     Register reg2,
-+                                                     bool receiver_can_be_null) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
++/* We're called from an nmethod when we need to deoptimize it. We do
++   this by throwing away the nmethod's frame and jumping to the
++   ic_miss stub. This looks like there has been an IC miss at the
++   entry of the nmethod, so we resolve the call, which will fall back
++   to the interpreter if the nmethod has been unloaded. */
++void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
 +
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
++  typedef struct {
++    intptr_t *sp; intptr_t *fp; address ra; address pc;
++  } frame_pointers_t;
 +
-+    Label skip_receiver_profile;
-+    if (receiver_can_be_null) {
-+      Label not_null;
-+      // We are making a call.  Increment the count for null receiver.
-+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
-+      j(skip_receiver_profile);
-+      bind(not_null);
-+    }
++  frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5);
 +
-+    // Record the receiver type.
-+    record_klass_in_profile(receiver, mdp, reg2, true);
-+    bind(skip_receiver_profile);
++  JavaThread *thread = JavaThread::current();
++  RegisterMap reg_map(thread, false);
++  frame frame = thread->last_frame();
 +
-+    // The method data pointer needs to be updated to reflect the new target.
++  assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be");
++  assert(frame.cb() == nm, "must be");
++  frame = frame.sender(&reg_map);
 +
-+    update_mdp_by_constant(mdp,
-+                           in_bytes(VirtualCallData::
-+                                    virtual_call_data_size()));
-+    bind(profile_continue);
++  LogTarget(Trace, nmethod, barrier) out;
++  if (out.is_enabled()) {
++    ResourceMark mark;
++    log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p",
++                                nm->method()->name_and_sig_as_C_string(),
++                                nm, *(address *) return_address_ptr, nm->is_osr_method(), thread,
++                                thread->name(), frame.sp(), nm->verified_entry_point());
 +  }
-+}
-+
-+// This routine creates a state machine for updating the multi-row
-+// type profile at a virtual call site (or other type-sensitive bytecode).
-+// The machine visits each row (of receiver/count) until the receiver type
-+// is found, or until it runs out of rows.  At the same time, it remembers
-+// the location of the first empty row.  (An empty row records null for its
-+// receiver, and can be allocated for a newly-observed receiver type.)
-+// Because there are two degrees of freedom in the state, a simple linear
-+// search will not work; it must be a decision tree.  Hence this helper
-+// function is recursive, to generate the required tree structured code.
-+// It's the interpreter, so we are trading off code space for speed.
-+// See below for example code.
-+void InterpreterMacroAssembler::record_klass_in_profile_helper(
-+                                Register receiver, Register mdp,
-+                                Register reg2,
-+                                Label& done, bool is_virtual_call) {
-+  if (TypeProfileWidth == 0) {
-+    if (is_virtual_call) {
-+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
-+    }
-+
-+  } else {
-+    int non_profiled_offset = -1;
-+    if (is_virtual_call) {
-+      non_profiled_offset = in_bytes(CounterData::count_offset());
-+    }
 +
-+    record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
-+      &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
-+  }
++  new_frame->sp = frame.sp();
++  new_frame->fp = frame.fp();
++  new_frame->ra = frame.pc();
++  new_frame->pc = SharedRuntime::get_handle_wrong_method_stub();
 +}
 +
-+void InterpreterMacroAssembler::record_item_in_profile_helper(
-+  Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows,
-+  OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) {
-+  int last_row = total_rows - 1;
-+  assert(start_row <= last_row, "must be work left to do");
-+  // Test this row for both the item and for null.
-+  // Take any of three different outcomes:
-+  //   1. found item => increment count and goto done
-+  //   2. found null => keep looking for case 1, maybe allocate this cell
-+  //   3. found something else => keep looking for cases 1 and 2
-+  // Case 3 is handled by a recursive call.
-+  for (int row = start_row; row <= last_row; row++) {
-+    Label next_test;
-+    bool test_for_null_also = (row == start_row);
-+
-+    // See if the item is item[n].
-+    int item_offset = in_bytes(item_offset_fn(row));
-+    test_mdp_data_at(mdp, item_offset, item,
-+                     (test_for_null_also ? reg2 : noreg),
-+                     next_test);
-+    // (Reg2 now contains the item from the CallData.)
-+
-+    // The item is item[n].  Increment count[n].
-+    int count_offset = in_bytes(item_count_offset_fn(row));
-+    increment_mdp_data_at(mdp, count_offset);
-+    j(done);
-+    bind(next_test);
-+
-+    if (test_for_null_also) {
-+      Label found_null;
-+      // Failed the equality check on item[n]...  Test for null.
-+      if (start_row == last_row) {
-+        // The only thing left to do is handle the null case.
-+        if (non_profiled_offset >= 0) {
-+          beqz(reg2, found_null);
-+          // Item did not match any saved item and there is no empty row for it.
-+          // Increment total counter to indicate polymorphic case.
-+          increment_mdp_data_at(mdp, non_profiled_offset);
-+          j(done);
-+          bind(found_null);
-+        } else {
-+          bnez(reg2, done);
-+        }
-+        break;
-+      }
-+      // Since null is rare, make it be the branch-taken case.
-+      beqz(reg2, found_null);
++// This is the offset of the entry barrier from where the frame is completed.
++// If any code changes between the end of the verified entry where the entry
++// barrier resides, and the completion of the frame, then
++// NativeNMethodCmpBarrier::verify() will immediately complain when it does
++// not find the expected native instruction at this offset, which needs updating.
++// Note that this offset is invariant of PreserveFramePointer.
 +
-+      // Put all the "Case 3" tests here.
-+      record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
-+        item_offset_fn, item_count_offset_fn, non_profiled_offset);
++// see BarrierSetAssembler::nmethod_entry_barrier
++// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32
++static const int entry_barrier_offset = -4 * 13;
 +
-+      // Found a null.  Keep searching for a matching item,
-+      // but remember that this is an empty (unused) slot.
-+      bind(found_null);
-+    }
-+  }
++static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) {
++  address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset;
++  NativeNMethodBarrier* barrier = reinterpret_cast<NativeNMethodBarrier*>(barrier_address);
++  debug_only(barrier->verify());
++  return barrier;
++}
 +
-+  // In the fall-through case, we found no matching item, but we
-+  // observed the item[start_row] is NULL.
-+  // Fill in the item field and increment the count.
-+  int item_offset = in_bytes(item_offset_fn(start_row));
-+  set_mdp_data_at(mdp, item_offset, item);
-+  int count_offset = in_bytes(item_count_offset_fn(start_row));
-+  mv(reg2, DataLayout::counter_increment);
-+  set_mdp_data_at(mdp, count_offset, reg2);
-+  if (start_row > 0) {
-+    j(done);
++void BarrierSetNMethod::disarm(nmethod* nm) {
++  if (!supports_entry_barrier(nm)) {
++    return;
 +  }
-+}
 +
-+// Example state machine code for three profile rows:
-+//   # main copy of decision tree, rooted at row[1]
-+//   if (row[0].rec == rec) then [
-+//     row[0].incr()
-+//     goto done
-+//   ]
-+//   if (row[0].rec != NULL) then [
-+//     # inner copy of decision tree, rooted at row[1]
-+//     if (row[1].rec == rec) then [
-+//       row[1].incr()
-+//       goto done
-+//     ]
-+//     if (row[1].rec != NULL) then [
-+//       # degenerate decision tree, rooted at row[2]
-+//       if (row[2].rec == rec) then [
-+//         row[2].incr()
-+//         goto done
-+//       ]
-+//       if (row[2].rec != NULL) then [
-+//         count.incr()
-+//         goto done
-+//       ] # overflow
-+//       row[2].init(rec)
-+//       goto done
-+//     ] else [
-+//       # remember row[1] is empty
-+//       if (row[2].rec == rec) then [
-+//         row[2].incr()
-+//         goto done
-+//       ]
-+//       row[1].init(rec)
-+//       goto done
-+//     ]
-+//   else [
-+//     # remember row[0] is empty
-+//     if (row[1].rec == rec) then [
-+//       row[1].incr()
-+//       goto done
-+//     ]
-+//     if (row[2].rec == rec) then [
-+//       row[2].incr()
-+//       goto done
-+//     ]
-+//     row[0].init(rec)
-+//     goto done
-+//   ]
-+//   done:
++  // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier.
++  NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
 +
-+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
-+                                                        Register mdp, Register reg2,
-+                                                        bool is_virtual_call) {
-+  assert(ProfileInterpreter, "must be profiling");
-+  Label done;
++  barrier->set_value(disarmed_value());
++}
 +
-+  record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call);
++bool BarrierSetNMethod::is_armed(nmethod* nm) {
++  if (!supports_entry_barrier(nm)) {
++    return false;
++  }
 +
-+  bind(done);
++  NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
++  return barrier->get_value() != disarmed_value();
 +}
+diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
+new file mode 100644
+index 00000000000..a419f92b5f6
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
+@@ -0,0 +1,111 @@
++/*
++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/cardTableBarrierSetAssembler.hpp"
++#include "gc/shared/gc_globals.hpp"
++#include "interpreter/interp_masm.hpp"
 +
-+    // Update the total ret count.
-+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++#define __ masm->
 +
-+    for (uint row = 0; row < RetData::row_limit(); row++) {
-+      Label next_test;
 +
-+      // See if return_bci is equal to bci[n]:
-+      test_mdp_data_at(mdp,
-+                       in_bytes(RetData::bci_offset(row)),
-+                       return_bci, noreg,
-+                       next_test);
++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) {
++  assert_cond(masm != NULL);
++  assert_different_registers(obj, tmp);
++  BarrierSet* bs = BarrierSet::barrier_set();
++  assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
 +
-+      // return_bci is equal to bci[n].  Increment the count.
-+      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
++  __ srli(obj, obj, CardTable::card_shift());
 +
-+      // The method data pointer needs to be updated to reflect the new target.
-+      update_mdp_by_offset(mdp,
-+                           in_bytes(RetData::bci_displacement_offset(row)));
-+      j(profile_continue);
-+      bind(next_test);
-+    }
++  assert(CardTable::dirty_card_val() == 0, "must be");
 +
-+    update_mdp_for_ret(return_bci);
++  __ load_byte_map_base(tmp);
++  __ add(tmp, obj, tmp);
 +
-+    bind(profile_continue);
++  if (UseCondCardMark) {
++    Label L_already_dirty;
++    __ membar(MacroAssembler::StoreLoad);
++    __ lbu(t1,  Address(tmp));
++    __ beqz(t1, L_already_dirty);
++    __ sb(zr, Address(tmp));
++    __ bind(L_already_dirty);
++  } else {
++    __ sb(zr, Address(tmp));
 +  }
 +}
 +
-+void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                                    Register start, Register count, Register tmp, RegSet saved_regs) {
++  assert_cond(masm != NULL);
++  assert_different_registers(start, tmp);
++  assert_different_registers(count, tmp);
 +
-+    // The method data pointer needs to be updated.
-+    int mdp_delta = in_bytes(BitData::bit_data_size());
-+    if (TypeProfileCasts) {
-+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
-+    }
-+    update_mdp_by_constant(mdp, mdp_delta);
++  Label L_loop, L_done;
++  const Register end = count;
 +
-+    bind(profile_continue);
-+  }
-+}
++  __ beqz(count, L_done); // zero count - nothing to do
++  // end = start + count << LogBytesPerHeapOop
++  __ shadd(end, count, start, count, LogBytesPerHeapOop);
++  __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive
 +
-+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
-+    if (ProfileInterpreter && TypeProfileCasts) {
-+    Label profile_continue;
++  __ srli(start, start, CardTable::card_shift());
++  __ srli(end, end, CardTable::card_shift());
++  __ sub(count, end, start); // number of bytes to copy
 +
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
++  __ load_byte_map_base(tmp);
++  __ add(start, start, tmp);
 +
-+    int count_offset = in_bytes(CounterData::count_offset());
-+    // Back up the address, since we have already bumped the mdp.
-+    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
++  __ bind(L_loop);
++  __ add(tmp, start, count);
++  __ sb(zr, Address(tmp));
++  __ sub(count, count, 1);
++  __ bgez(count, L_loop);
++  __ bind(L_done);
++}
 +
-+    // *Decrement* the counter.  We expect to see zero or small negatives.
-+    increment_mdp_data_at(mdp, count_offset, true);
++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                                Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool is_array = (decorators & IS_ARRAY) != 0;
++  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
++  bool precise = is_array || on_anonymous;
 +
-+    bind (profile_continue);
++  bool needs_post_barrier = val != noreg && in_heap;
++  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
++  if (needs_post_barrier) {
++    // flatten object address if needed
++    if (!precise || dst.offset() == 0) {
++      store_check(masm, dst.base(), x13);
++    } else {
++      assert_cond(masm != NULL);
++      __ la(x13, dst);
++      store_check(masm, x13, t0);
++    }
 +  }
 +}
+diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
+new file mode 100644
+index 00000000000..686fe8fa478
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    // The method data pointer needs to be updated.
-+    int mdp_delta = in_bytes(BitData::bit_data_size());
-+    if (TypeProfileCasts) {
-+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
 +
-+      // Record the object type.
-+      record_klass_in_profile(klass, mdp, reg2, false);
-+    }
-+    update_mdp_by_constant(mdp, mdp_delta);
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
 +
-+    bind(profile_continue);
-+  }
-+}
++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
++protected:
++  void store_check(MacroAssembler* masm, Register obj, Register tmp);
 +
-+void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register start, Register count, Register tmp, RegSet saved_regs);
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++};
 +
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
++#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
+new file mode 100644
+index 00000000000..7aa2015f9ec
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    // Update the default case count
-+    increment_mdp_data_at(mdp,
-+                          in_bytes(MultiBranchData::default_count_offset()));
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
 +
-+    // The method data pointer needs to be updated.
-+    update_mdp_by_offset(mdp,
-+                         in_bytes(MultiBranchData::
-+                                  default_displacement_offset()));
++#define __ masm->
 +
-+    bind(profile_continue);
++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register src, Register dst, Register count, RegSet saved_regs) {
++
++  if (is_oop) {
++    gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs);
 +  }
 +}
 +
-+void InterpreterMacroAssembler::profile_switch_case(Register index,
-+                                                    Register mdp,
-+                                                    Register reg2) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register start, Register count, Register tmp,
++                                                   RegSet saved_regs) {
++  if (is_oop) {
++    gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs);
++  }
++}
 +
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  if (is_reference_type(type)) {
++    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  } else {
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  }
++}
+diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
+new file mode 100644
+index 00000000000..00419c3163c
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    // Build the base (index * per_case_size_in_bytes()) +
-+    // case_array_offset_in_bytes()
-+    mvw(reg2, in_bytes(MultiBranchData::per_case_size()));
-+    mvw(t0, in_bytes(MultiBranchData::case_array_offset()));
-+    Assembler::mul(index, index, reg2);
-+    Assembler::add(index, index, t0);
++#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
 +
-+    // Update the case count
-+    increment_mdp_data_at(mdp,
-+                          index,
-+                          in_bytes(MultiBranchData::relative_count_offset()));
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
 +
-+    // The method data pointer need to be updated.
-+    update_mdp_by_offset(mdp,
-+                         index,
-+                         in_bytes(MultiBranchData::
-+                                  relative_displacement_offset()));
++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
++// accesses, which are overridden in the concrete BarrierSetAssembler.
 +
-+    bind(profile_continue);
-+  }
-+}
++class ModRefBarrierSetAssembler: public BarrierSetAssembler {
++protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                               Register addr, Register count, RegSet saved_regs) {}
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register start, Register count, Register tmp, RegSet saved_regs) {}
 +
-+void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; }
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
 +
-+void InterpreterMacroAssembler::notify_method_entry() {
-+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
-+  // track stack depth.  If it is possible to enter interp_only_mode we add
-+  // the code to check if the event should be sent.
-+  if (JvmtiExport::can_post_interpreter_events()) {
-+    Label L;
-+    lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
-+    beqz(x13, L);
-+    call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                    InterpreterRuntime::post_method_entry));
-+    bind(L);
-+  }
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register src, Register dst, Register count, RegSet saved_regs);
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register start, Register count, Register tmp, RegSet saved_regs);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++};
 +
-+  {
-+    SkipIfEqual skip(this, &DTraceMethodProbes, false);
-+    get_method(c_rarg1);
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
-+                 xthread, c_rarg1);
-+  }
++#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
+new file mode 100644
+index 00000000000..cd568cc723f
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
+@@ -0,0 +1,117 @@
++/*
++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // RedefineClasses() tracing support for obsolete method entry
-+  if (log_is_enabled(Trace, redefine, class, obsolete)) {
-+    get_method(c_rarg1);
-+    call_VM_leaf(
-+      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
-+      xthread, c_rarg1);
-+  }
-+}
++#include "precompiled.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/shared/gc_globals.hpp"
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
 +
++#define __ masm->masm()->
 +
-+void InterpreterMacroAssembler::notify_method_exit(
-+    TosState state, NotifyMethodExitMode mode) {
-+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
-+  // track stack depth.  If it is possible to enter interp_only_mode we add
-+  // the code to check if the event should be sent.
-+  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
-+    Label L;
-+    // Note: frame::interpreter_frame_result has a dependency on how the
-+    // method result is saved across the call to post_method_exit. If this
-+    // is changed then the interpreter_frame_result implementation will
-+    // need to be updated too.
++void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) {
++  Register addr = _addr->as_register_lo();
++  Register newval = _new_value->as_register();
++  Register cmpval = _cmp_value->as_register();
++  Register tmp1 = _tmp1->as_register();
++  Register tmp2 = _tmp2->as_register();
++  Register result = result_opr()->as_register();
 +
-+    // template interpreter will leave the result on the top of the stack.
-+    push(state);
-+    lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
-+    beqz(x13, L);
-+    call_VM(noreg,
-+            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
-+    bind(L);
-+    pop(state);
-+  }
++  ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1);
 +
-+  {
-+    SkipIfEqual skip(this, &DTraceMethodProbes, false);
-+    push(state);
-+    get_method(c_rarg1);
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
-+                 xthread, c_rarg1);
-+    pop(state);
++  if (UseCompressedOops) {
++    __ encode_heap_oop(tmp1, cmpval);
++    cmpval = tmp1;
++    __ encode_heap_oop(tmp2, newval);
++    newval = tmp2;
 +  }
-+}
-+
 +
-+// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
-+void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
-+                                                        int increment, Address mask,
-+                                                        Register tmp1, Register tmp2,
-+                                                        bool preloaded, Label* where) {
-+  Label done;
-+  if (!preloaded) {
-+    lwu(tmp1, counter_addr);
-+  }
-+  add(tmp1, tmp1, increment);
-+  sw(tmp1, counter_addr);
-+  lwu(tmp2, mask);
-+  andr(tmp1, tmp1, tmp2);
-+  bnez(tmp1, done);
-+  j(*where); // offset is too large so we have to use j instead of beqz here
-+  bind(done);
++  ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq,
++                                                 /* release */ Assembler::rl, /* is_cae */ false, result);
 +}
 +
-+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
-+                                                  int number_of_arguments) {
-+  // interpreter specific
-+  //
-+  // Note: No need to save/restore rbcp & rlocals pointer since these
-+  //       are callee saved registers and no blocking/ GC can happen
-+  //       in leaf calls.
-+#ifdef ASSERT
-+  {
-+   Label L;
-+   ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+   beqz(t0, L);
-+   stop("InterpreterMacroAssembler::call_VM_leaf_base:"
-+        " last_sp != NULL");
-+   bind(L);
-+  }
-+#endif /* ASSERT */
-+  // super call
-+  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
-+}
++#undef __
 +
-+void InterpreterMacroAssembler::call_VM_base(Register oop_result,
-+                                             Register java_thread,
-+                                             Register last_java_sp,
-+                                             address  entry_point,
-+                                             int      number_of_arguments,
-+                                             bool     check_exceptions) {
-+  // interpreter specific
-+  //
-+  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
-+  //       really make a difference for these runtime calls, since they are
-+  //       slow anyway. Btw., bcp must be saved/restored since it may change
-+  //       due to GC.
-+  save_bcp();
 +#ifdef ASSERT
-+  {
-+    Label L;
-+    ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+    beqz(t0, L);
-+    stop("InterpreterMacroAssembler::call_VM_base:"
-+         " last_sp != NULL");
-+    bind(L);
-+  }
-+#endif /* ASSERT */
-+  // super call
-+  MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp,
-+                               entry_point, number_of_arguments,
-+                               check_exceptions);
-+// interpreter specific
-+  restore_bcp();
-+  restore_locals();
-+}
-+
-+void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) {
-+  assert_different_registers(obj, tmp, t0, mdo_addr.base());
-+  Label update, next, none;
++#define __ gen->lir(__FILE__, __LINE__)->
++#else
++#define __ gen->lir()->
++#endif
 +
-+  verify_oop(obj);
++LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) {
++  BasicType bt = access.type();
++  if (access.is_oop()) {
++    LIRGenerator *gen = access.gen();
++    if (ShenandoahSATBBarrier) {
++      pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(),
++                  LIR_OprFact::illegalOpr /* pre_val */);
++    }
++    if (ShenandoahCASBarrier) {
++      cmp_value.load_item();
++      new_value.load_item();
 +
-+  bnez(obj, update);
-+  orptr(mdo_addr, TypeEntries::null_seen, t0, tmp);
-+  j(next);
++      LIR_Opr tmp1 = gen->new_register(T_OBJECT);
++      LIR_Opr tmp2 = gen->new_register(T_OBJECT);
++      LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base();
++      LIR_Opr result = gen->new_register(T_INT);
 +
-+  bind(update);
-+  load_klass(obj, obj);
++      __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result));
++      return result;
++    }
++  }
++  return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
++}
 +
-+  ld(t0, mdo_addr);
-+  xorr(obj, obj, t0);
-+  andi(t0, obj, TypeEntries::type_klass_mask);
-+  beqz(t0, next); // klass seen before, nothing to
-+                  // do. The unknown bit may have been
-+                  // set already but no need to check.
++LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) {
++  LIRGenerator* gen = access.gen();
++  BasicType type = access.type();
 +
-+  andi(t0, obj, TypeEntries::type_unknown);
-+  bnez(t0, next);
-+  // already unknown. Nothing to do anymore.
++  LIR_Opr result = gen->new_register(type);
++  value.load_item();
++  LIR_Opr value_opr = value.result();
 +
-+  ld(t0, mdo_addr);
-+  beqz(t0, none);
-+  mv(tmp, (u1)TypeEntries::null_seen);
-+  beq(t0, tmp, none);
-+  // There is a chance that the checks above (re-reading profiling
-+  // data from memory) fail if another thread has just set the
-+  // profiling to this obj's klass
-+  ld(t0, mdo_addr);
-+  xorr(obj, obj, t0);
-+  andi(t0, obj, TypeEntries::type_klass_mask);
-+  beqz(t0, next);
++  if (access.is_oop()) {
++    value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators());
++  }
 +
-+  // different than before. Cannot keep accurate profile.
-+  orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp);
-+  j(next);
++  assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type");
++  LIR_Opr tmp = gen->new_register(T_INT);
++  __ xchg(access.resolved_addr(), value_opr, result, tmp);
 +
-+  bind(none);
-+  // first time here. Set profile type.
-+  sd(obj, mdo_addr);
++  if (access.is_oop()) {
++    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators());
++    LIR_Opr tmp_opr = gen->new_register(type);
++    __ move(result, tmp_opr);
++    result = tmp_opr;
++    if (ShenandoahSATBBarrier) {
++      pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr,
++                  result /* pre_val */);
++    }
++  }
 +
-+  bind(next);
++  return result;
 +}
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
+new file mode 100644
+index 00000000000..d0ac6e52436
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
+@@ -0,0 +1,712 @@
++/*
++ * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
-+  if (!ProfileInterpreter) {
-+    return;
-+  }
++#include "precompiled.hpp"
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
++#include "gc/shenandoah/shenandoahForwarding.hpp"
++#include "gc/shenandoah/shenandoahHeap.inline.hpp"
++#include "gc/shenandoah/shenandoahHeapRegion.hpp"
++#include "gc/shenandoah/shenandoahRuntime.hpp"
++#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
++#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.hpp"
++#ifdef COMPILER1
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
++#endif
 +
-+  if (MethodData::profile_arguments() || MethodData::profile_return()) {
-+    Label profile_continue;
++#define __ masm->
 +
-+    test_method_data_pointer(mdp, profile_continue);
++void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                       Register src, Register dst, Register count, RegSet saved_regs) {
++  if (is_oop) {
++    bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
++    if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) {
 +
-+    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
++      Label done;
 +
-+    lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start));
-+    if (is_virtual) {
-+      mv(tmp, (u1)DataLayout::virtual_call_type_data_tag);
-+      bne(t0, tmp, profile_continue);
-+    } else {
-+      mv(tmp, (u1)DataLayout::call_type_data_tag);
-+      bne(t0, tmp, profile_continue);
-+    }
++      // Avoid calling runtime if count == 0
++      __ beqz(count, done);
 +
-+    // calculate slot step
-+    static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0));
-+    static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0;
-+
-+    // calculate type step
-+    static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0));
-+    static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0;
-+
-+    if (MethodData::profile_arguments()) {
-+      Label done, loop, loopEnd, profileArgument, profileReturnType;
-+      RegSet pushed_registers;
-+      pushed_registers += x15;
-+      pushed_registers += x16;
-+      pushed_registers += x17;
-+      Register mdo_addr = x15;
-+      Register index = x16;
-+      Register off_to_args = x17;
-+      push_reg(pushed_registers, sp);
-+
-+      mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset()));
-+      mv(t0, TypeProfileArgsLimit);
-+      beqz(t0, loopEnd);
-+
-+      mv(index, zr); // index < TypeProfileArgsLimit
-+      bind(loop);
-+      bgtz(index, profileReturnType);
-+      mv(t0, (int)MethodData::profile_return());
-+      beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false
-+      bind(profileReturnType);
-+      // If return value type is profiled we may have no argument to profile
-+      ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
-+      mv(t1, - TypeStackSlotEntries::per_arg_count());
-+      mul(t1, index, t1);
-+      add(tmp, tmp, t1);
-+      mv(t1, TypeStackSlotEntries::per_arg_count());
-+      add(t0, mdp, off_to_args);
-+      blt(tmp, t1, done);
-+
-+      bind(profileArgument);
-+
-+      ld(tmp, Address(callee, Method::const_offset()));
-+      load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset()));
-+      // stack offset o (zero based) from the start of the argument
-+      // list, for n arguments translates into offset n - o - 1 from
-+      // the end of the argument list
-+      mv(t0, stack_slot_offset0);
-+      mv(t1, slot_step);
-+      mul(t1, index, t1);
-+      add(t0, t0, t1);
-+      add(t0, mdp, t0);
-+      ld(t0, Address(t0));
-+      sub(tmp, tmp, t0);
-+      addi(tmp, tmp, -1);
-+      Address arg_addr = argument_address(tmp);
-+      ld(tmp, arg_addr);
-+
-+      mv(t0, argument_type_offset0);
-+      mv(t1, type_step);
-+      mul(t1, index, t1);
-+      add(t0, t0, t1);
-+      add(mdo_addr, mdp, t0);
-+      Address mdo_arg_addr(mdo_addr, 0);
-+      profile_obj_type(tmp, mdo_arg_addr, t1);
-+
-+      int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
-+      addi(off_to_args, off_to_args, to_add);
-+
-+      // increment index by 1
-+      addi(index, index, 1);
-+      mv(t1, TypeProfileArgsLimit);
-+      blt(index, t1, loop);
-+      bind(loopEnd);
++      // Is GC active?
++      Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
++      assert_different_registers(src, dst, count, t0);
 +
-+      if (MethodData::profile_return()) {
-+        ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
-+        addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count());
++      __ lbu(t0, gc_state);
++      if (ShenandoahSATBBarrier && dest_uninitialized) {
++        __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED);
++        __ beqz(t0, done);
++      } else {
++        __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
++        __ beqz(t0, done);
 +      }
 +
-+      add(t0, mdp, off_to_args);
-+      bind(done);
-+      mv(mdp, t0);
-+
-+      // unspill the clobbered registers
-+      pop_reg(pushed_registers, sp);
-+
-+      if (MethodData::profile_return()) {
-+        // We're right after the type profile for the last
-+        // argument. tmp is the number of cells left in the
-+        // CallTypeData/VirtualCallTypeData to reach its end. Non null
-+        // if there's a return to profile.
-+        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
-+        shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size));
++      __ push_reg(saved_regs, sp);
++      if (UseCompressedOops) {
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),
++                        src, dst, count);
++      } else {
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count);
 +      }
-+      sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
-+    } else {
-+      assert(MethodData::profile_return(), "either profile call args or call ret");
-+      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
++      __ pop_reg(saved_regs, sp);
++      __ bind(done);
 +    }
++  }
++}
 +
-+    // mdp points right after the end of the
-+    // CallTypeData/VirtualCallTypeData, right after the cells for the
-+    // return value type if there's one
-+
-+    bind(profile_continue);
++void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
++                                                                 Register obj,
++                                                                 Register pre_val,
++                                                                 Register thread,
++                                                                 Register tmp,
++                                                                 bool tosca_live,
++                                                                 bool expand_call) {
++  if (ShenandoahSATBBarrier) {
++    satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call);
 +  }
 +}
 +
-+void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
-+  assert_different_registers(mdp, ret, tmp, xbcp, t0, t1);
-+  if (ProfileInterpreter && MethodData::profile_return()) {
-+    Label profile_continue, done;
++void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
++                                                           Register obj,
++                                                           Register pre_val,
++                                                           Register thread,
++                                                           Register tmp,
++                                                           bool tosca_live,
++                                                           bool expand_call) {
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++  assert(thread == xthread, "must be");
 +
-+    test_method_data_pointer(mdp, profile_continue);
++  Label done;
++  Label runtime;
 +
-+    if (MethodData::profile_return_jsr292_only()) {
-+      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++  assert_different_registers(obj, pre_val, tmp, t0);
++  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
 +
-+      // If we don't profile all invoke bytecodes we must make sure
-+      // it's a bytecode we indeed profile. We can't go back to the
-+      // begining of the ProfileData we intend to update to check its
-+      // type because we're right after it and we don't known its
-+      // length
-+      Label do_profile;
-+      lbu(t0, Address(xbcp, 0));
-+      mv(tmp, (u1)Bytecodes::_invokedynamic);
-+      beq(t0, tmp, do_profile);
-+      mv(tmp, (u1)Bytecodes::_invokehandle);
-+      beq(t0, tmp, do_profile);
-+      get_method(tmp);
-+      lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
-+      mv(t1, vmIntrinsics::_compiledLambdaForm);
-+      bne(t0, t1, profile_continue);
-+      bind(do_profile);
-+    }
++  Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset()));
++  Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 +
-+    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
-+    mv(tmp, ret);
-+    profile_obj_type(tmp, mdo_ret_addr, t1);
++  // Is marking active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++    __ lwu(tmp, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ lbu(tmp, in_progress);
++  }
++  __ beqz(tmp, done);
 +
-+    bind(profile_continue);
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
 +  }
-+}
 +
-+void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) {
-+  assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3);
-+  if (ProfileInterpreter && MethodData::profile_parameters()) {
-+    Label profile_continue, done;
++  // Is the previous value null?
++  __ beqz(pre_val, done);
 +
-+    test_method_data_pointer(mdp, profile_continue);
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
++  __ ld(tmp, index);                        // tmp := *index_adr
++  __ beqz(tmp, runtime);                    // tmp == 0? If yes, goto runtime
 +
-+    // Load the offset of the area within the MDO used for
-+    // parameters. If it's negative we're not profiling any parameters
-+    lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())));
-+    srli(tmp2, tmp1, 31);
-+    bnez(tmp2, profile_continue);  // i.e. sign bit set
++  __ sub(tmp, tmp, wordSize);               // tmp := tmp - wordSize
++  __ sd(tmp, index);                        // *index_adr := tmp
++  __ ld(t0, buffer);
++  __ add(tmp, tmp, t0);                     // tmp := tmp + *buffer_adr
 +
-+    // Compute a pointer to the area for parameters from the offset
-+    // and move the pointer to the slot for the last
-+    // parameters. Collect profiling from last parameter down.
-+    // mdo start + parameters offset + array length - 1
-+    add(mdp, mdp, tmp1);
-+    ld(tmp1, Address(mdp, ArrayData::array_len_offset()));
-+    add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
++  // Record the previous value
++  __ sd(pre_val, Address(tmp, 0));
++  __ j(done);
 +
-+    Label loop;
-+    bind(loop);
++  __ bind(runtime);
++  // save the live input values
++  RegSet saved = RegSet::of(pre_val);
++  if (tosca_live) saved += RegSet::of(x10);
++  if (obj != noreg) saved += RegSet::of(obj);
 +
-+    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
-+    int type_base = in_bytes(ParametersTypeData::type_offset(0));
-+    int per_arg_scale = exact_log2(DataLayout::cell_size);
-+    add(t0, mdp, off_base);
-+    add(t1, mdp, type_base);
++  __ push_reg(saved, sp);
 +
++  // Calling the runtime using the regular call_VM_leaf mechanism generates
++  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
++  // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL.
++  //
++  // If we care generating the pre-barrier without a frame (e.g. in the
++  // intrinsified Reference.get() routine) then ebp might be pointing to
++  // the caller frame and so this check will most likely fail at runtime.
++  //
++  // Expanding the call directly bypasses the generation of the check.
++  // So when we do not have have a full interpreter frame on the stack
++  // expand_call should be passed true.
++  if (expand_call) {
++    assert(pre_val != c_rarg1, "smashed arg");
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
++  }
 +
-+    shadd(tmp2, tmp1, t0, tmp2, per_arg_scale);
-+    // load offset on the stack from the slot for this parameter
-+    ld(tmp2, Address(tmp2, 0));
-+    neg(tmp2, tmp2);
++  __ pop_reg(saved, sp);
 +
-+    // read the parameter from the local area
-+    shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize);
-+    ld(tmp2, Address(tmp2, 0));
++  __ bind(done);
++}
 +
-+    // profile the parameter
-+    shadd(t1, tmp1, t1, t0, per_arg_scale);
-+    Address arg_type(t1, 0);
-+    profile_obj_type(tmp2, arg_type, tmp3);
++void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
++  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
 +
-+    // go to next parameter
-+    add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
-+    bgez(tmp1, loop);
++  Label is_null;
++  __ beqz(dst, is_null);
++  resolve_forward_pointer_not_null(masm, dst, tmp);
++  __ bind(is_null);
++}
 +
-+    bind(profile_continue);
++// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely
++// passed in.
++void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
++  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
++  // The below loads the mark word, checks if the lowest two bits are
++  // set, and if so, clear the lowest two bits and copy the result
++  // to dst. Otherwise it leaves dst alone.
++  // Implementing this is surprisingly awkward. I do it here by:
++  // - Inverting the mark word
++  // - Test lowest two bits == 0
++  // - If so, set the lowest two bits
++  // - Invert the result back, and copy to dst
++  RegSet saved_regs = RegSet::of(t2);
++  bool borrow_reg = (tmp == noreg);
++  if (borrow_reg) {
++    // No free registers available. Make one useful.
++    tmp = t0;
++    if (tmp == dst) {
++      tmp = t1;
++    }
++    saved_regs += RegSet::of(tmp);
 +  }
-+}
 +
-+void InterpreterMacroAssembler::get_method_counters(Register method,
-+                                                    Register mcs, Label& skip) {
-+  Label has_counters;
-+  ld(mcs, Address(method, Method::method_counters_offset()));
-+  bnez(mcs, has_counters);
-+  call_VM(noreg, CAST_FROM_FN_PTR(address,
-+          InterpreterRuntime::build_method_counters), method);
-+  ld(mcs, Address(method, Method::method_counters_offset()));
-+  beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory
-+  bind(has_counters);
-+}
++  assert_different_registers(tmp, dst, t2);
++  __ push_reg(saved_regs, sp);
 +
-+#ifdef ASSERT
-+void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits,
-+                                                    const char* msg, bool stop_by_hit) {
-+  Label L;
-+  andi(t0, access_flags, flag_bits);
-+  if (stop_by_hit) {
-+    beqz(t0, L);
-+  } else {
-+    bnez(t0, L);
-+  }
-+  stop(msg);
-+  bind(L);
-+}
++  Label done;
++  __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
++  __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1
++  __ andi(t2, tmp, markWord::lock_mask_in_place);
++  __ bnez(t2, done);
++  __ ori(tmp, tmp, markWord::marked_value);
++  __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1
++  __ bind(done);
 +
-+void InterpreterMacroAssembler::verify_frame_setup() {
-+  Label L;
-+  const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
-+  ld(t0, monitor_block_top);
-+  beq(esp, t0, L);
-+  stop("broken stack frame setup in interpreter");
-+  bind(L);
++  __ pop_reg(saved_regs, sp);
 +}
-+#endif
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
-new file mode 100644
-index 000000000..042ee8280
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
-@@ -0,0 +1,283 @@
-+/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP
-+#define CPU_RISCV_INTERP_MASM_RISCV_HPP
 +
-+#include "asm/macroAssembler.hpp"
-+#include "interpreter/invocationCounter.hpp"
-+#include "runtime/frame.hpp"
++void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
++                                                           Register dst,
++                                                           Address load_addr,
++                                                           DecoratorSet decorators) {
++  assert(ShenandoahLoadRefBarrier, "Should be enabled");
++  assert(dst != t1 && load_addr.base() != t1, "need t1");
++  assert_different_registers(load_addr.base(), t0, t1);
 +
-+// This file specializes the assember with interpreter-specific macros
++  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
++  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
++  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
++  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
++  bool is_narrow  = UseCompressedOops && !is_native;
 +
-+typedef ByteSize (*OffsetFunction)(uint);
++  Label heap_stable, not_cset;
++  __ enter();
++  Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
++  __ lbu(t1, gc_state);
 +
-+class InterpreterMacroAssembler: public MacroAssembler {
-+ protected:
-+  // Interpreter specific version of call_VM_base
-+  using MacroAssembler::call_VM_leaf_base;
++  // Check for heap stability
++  if (is_strong) {
++    __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED);
++    __ beqz(t1, heap_stable);
++  } else {
++    Label lrb;
++    __ andi(t0, t1, ShenandoahHeap::WEAK_ROOTS);
++    __ bnez(t0, lrb);
++    __ andi(t0, t1, ShenandoahHeap::HAS_FORWARDED);
++    __ beqz(t0, heap_stable);
++    __ bind(lrb);
++  }
 +
-+  virtual void call_VM_leaf_base(address entry_point,
-+                                 int number_of_arguments);
++  // use x11 for load address
++  Register result_dst = dst;
++  if (dst == x11) {
++    __ mv(t1, dst);
++    dst = t1;
++  }
 +
-+  virtual void call_VM_base(Register oop_result,
-+                            Register java_thread,
-+                            Register last_java_sp,
-+                            address  entry_point,
-+                            int number_of_arguments,
-+                            bool check_exceptions);
++  // Save x10 and x11, unless it is an output register
++  RegSet saved_regs = RegSet::of(x10, x11) - result_dst;
++  __ push_reg(saved_regs, sp);
++  __ la(x11, load_addr);
++  __ mv(x10, dst);
 +
-+  // base routine for all dispatches
-+  void dispatch_base(TosState state, address* table, bool verifyoop = true,
-+                     bool generate_poll = false, Register Rs = t0);
++  // Test for in-cset
++  if (is_strong) {
++    __ li(t1, (uint64_t)ShenandoahHeap::in_cset_fast_test_addr());
++    __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
++    __ add(t1, t1, t0);
++    __ lbu(t1, Address(t1));
++    __ andi(t0, t1, 1);
++    __ beqz(t0, not_cset);
++  }
 +
-+ public:
-+  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
-+  virtual ~InterpreterMacroAssembler() {}
++  __ push_call_clobbered_registers();
++  if (is_strong) {
++    if (is_narrow) {
++      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow);
++    } else {
++      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
++    }
++  } else if (is_weak) {
++    if (is_narrow) {
++      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow);
++    } else {
++      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
++    }
++  } else {
++    assert(is_phantom, "only remaining strength");
++    assert(!is_narrow, "phantom access cannot be narrow");
++    __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
++  }
++  __ jalr(ra);
++  __ mv(t0, x10);
++  __ pop_call_clobbered_registers();
++  __ mv(x10, t0);
++  __ bind(not_cset);
++  __ mv(result_dst, x10);
++  __ pop_reg(saved_regs, sp);
 +
-+  void load_earlyret_value(TosState state);
++  __ bind(heap_stable);
++  __ leave();
++}
 +
-+  void jump_to_entry(address entry);
++void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) {
++  if (ShenandoahIUBarrier) {
++    __ push_call_clobbered_registers();
 +
-+  virtual void check_and_handle_popframe(Register java_thread);
-+  virtual void check_and_handle_earlyret(Register java_thread);
++    satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false);
 +
-+  // Interpreter-specific registers
-+  void save_bcp() {
-+    sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
++    __ pop_call_clobbered_registers();
 +  }
++}
 +
-+  void restore_bcp() {
-+    ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
++//
++// Arguments:
++//
++// Inputs:
++//   src:        oop location to load from, might be clobbered
++//
++// Output:
++//   dst:        oop loaded from src location
++//
++// Kill:
++//   x30 (tmp reg)
++//
++// Alias:
++//   dst: x30 (might use x30 as temporary output register to avoid clobbering src)
++//
++void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
++                                            DecoratorSet decorators,
++                                            BasicType type,
++                                            Register dst,
++                                            Address src,
++                                            Register tmp1,
++                                            Register tmp_thread) {
++  // 1: non-reference load, no additional barrier is needed
++  if (!is_reference_type(type)) {
++    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++    return;
 +  }
 +
-+  void restore_locals() {
-+    ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize));
-+  }
++  // 2: load a reference from src location and apply LRB if needed
++  if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
++    Register result_dst = dst;
 +
-+  void restore_constant_pool_cache() {
-+    ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
-+  }
++    // Preserve src location for LRB
++    RegSet saved_regs;
++    if (dst == src.base()) {
++      dst = (src.base() == x28) ? x29 : x28;
++      saved_regs = RegSet::of(dst);
++      __ push_reg(saved_regs, sp);
++    }
++    assert_different_registers(dst, src.base());
 +
-+  void get_dispatch();
++    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
 +
-+  // Helpers for runtime call arguments/results
-+  void get_method(Register reg) {
-+    ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize));
-+  }
++    load_reference_barrier(masm, dst, src, decorators);
 +
-+  void get_const(Register reg) {
-+    get_method(reg);
-+    ld(reg, Address(reg, in_bytes(Method::const_offset())));
-+  }
++    if (dst != result_dst) {
++      __ mv(result_dst, dst);
++      dst = result_dst;
++    }
 +
-+  void get_constant_pool(Register reg) {
-+    get_const(reg);
-+    ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset())));
++    if (saved_regs.bits() != 0) {
++      __ pop_reg(saved_regs, sp);
++    }
++  } else {
++    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
 +  }
 +
-+  void get_constant_pool_cache(Register reg) {
-+    get_constant_pool(reg);
-+    ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes()));
++  // 3: apply keep-alive barrier if needed
++  if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
++    __ enter();
++    __ push_call_clobbered_registers();
++    satb_write_barrier_pre(masm /* masm */,
++                           noreg /* obj */,
++                           dst /* pre_val */,
++                           xthread /* thread */,
++                           tmp1 /* tmp */,
++                           true /* tosca_live */,
++                           true /* expand_call */);
++    __ pop_call_clobbered_registers();
++    __ leave();
 +  }
++}
 +
-+  void get_cpool_and_tags(Register cpool, Register tags) {
-+    get_constant_pool(cpool);
-+    ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes()));
++void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                             Address dst, Register val, Register tmp1, Register tmp2) {
++  bool on_oop = is_reference_type(type);
++  if (!on_oop) {
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++    return;
 +  }
 +
-+  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
-+  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
-+  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
-+  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
-+  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
-+  void get_method_counters(Register method, Register mcs, Label& skip);
++  // flatten object address if needed
++  if (dst.offset() == 0) {
++    if (dst.base() != x13) {
++      __ mv(x13, dst.base());
++    }
++  } else {
++    __ la(x13, dst);
++  }
 +
-+  // Load cpool->resolved_references(index).
-+  void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15);
++  shenandoah_write_barrier_pre(masm,
++                               x13 /* obj */,
++                               tmp2 /* pre_val */,
++                               xthread /* thread */,
++                               tmp1  /* tmp */,
++                               val != noreg /* tosca_live */,
++                               false /* expand_call */);
 +
-+  // Load cpool->resolved_klass_at(index).
-+  void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp);
++  if (val == noreg) {
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg);
++  } else {
++    iu_barrier(masm, val, tmp1);
++    // G1 barrier needs uncompressed oop for region cross check.
++    Register new_val = val;
++    if (UseCompressedOops) {
++      new_val = t1;
++      __ mv(new_val, val);
++    }
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg);
++  }
++}
 +
-+  void pop_ptr(Register r = x10);
-+  void pop_i(Register r = x10);
-+  void pop_l(Register r = x10);
-+  void pop_f(FloatRegister r = f10);
-+  void pop_d(FloatRegister r = f10);
-+  void push_ptr(Register r = x10);
-+  void push_i(Register r = x10);
-+  void push_l(Register r = x10);
-+  void push_f(FloatRegister r = f10);
-+  void push_d(FloatRegister r = f10);
++void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                                                  Register obj, Register tmp, Label& slowpath) {
++  Label done;
++  // Resolve jobject
++  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
 +
-+  void pop(TosState state); // transition vtos -> state
-+  void push(TosState state); // transition state -> vtos
++  // Check for null.
++  __ beqz(obj, done);
 +
-+  void empty_expression_stack() {
-+    ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
-+    // NULL last_sp until next java call
-+    sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  assert(obj != t1, "need t1");
++  Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
++  __ lbu(t1, gc_state);
++
++  // Check for heap in evacuation phase
++  __ andi(t0, t1, ShenandoahHeap::EVACUATION);
++  __ bnez(t0, slowpath);
++
++  __ bind(done);
++}
++
++// Special Shenandoah CAS implementation that handles false negatives due
++// to concurrent evacuation.  The service is more complex than a
++// traditional CAS operation because the CAS operation is intended to
++// succeed if the reference at addr exactly matches expected or if the
++// reference at addr holds a pointer to a from-space object that has
++// been relocated to the location named by expected.  There are two
++// races that must be addressed:
++//  a) A parallel thread may mutate the contents of addr so that it points
++//     to a different object.  In this case, the CAS operation should fail.
++//  b) A parallel thread may heal the contents of addr, replacing a
++//     from-space pointer held in addr with the to-space pointer
++//     representing the new location of the object.
++// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL
++// or it refers to an object that is not being evacuated out of
++// from-space, or it refers to the to-space version of an object that
++// is being evacuated out of from-space.
++//
++// By default the value held in the result register following execution
++// of the generated code sequence is 0 to indicate failure of CAS,
++// non-zero to indicate success. If is_cae, the result is the value most
++// recently fetched from addr rather than a boolean success indicator.
++//
++// Clobbers t0, t1
++void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
++                                                Register addr,
++                                                Register expected,
++                                                Register new_val,
++                                                Assembler::Aqrl acquire,
++                                                Assembler::Aqrl release,
++                                                bool is_cae,
++                                                Register result) {
++  bool is_narrow = UseCompressedOops;
++  Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64;
++
++  assert_different_registers(addr, expected, t0, t1);
++  assert_different_registers(addr, new_val, t0, t1);
++
++  Label retry, success, fail, done;
++
++  __ bind(retry);
++
++  // Step1: Try to CAS.
++  __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1);
++
++  // If success, then we are done.
++  __ beq(expected, t1, success);
++
++  // Step2: CAS failed, check the forwared pointer.
++  __ mv(t0, t1);
++
++  if (is_narrow) {
++    __ decode_heap_oop(t0, t0);
 +  }
++  resolve_forward_pointer(masm, t0);
 +
-+  // Helpers for swap and dup
-+  void load_ptr(int n, Register val);
-+  void store_ptr(int n, Register val);
++  __ encode_heap_oop(t0, t0);
 +
-+// Load float value from 'address'. The value is loaded onto the FPU register v0.
-+  void load_float(Address src);
-+  void load_double(Address src);
++  // Report failure when the forwarded oop was not expected.
++  __ bne(t0, expected, fail);
 +
-+  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
-+  // a subtype of super_klass.
-+  void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
++  // Step 3: CAS again using the forwarded oop.
++  __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0);
 +
-+  // Dispatching
-+  void dispatch_prolog(TosState state, int step = 0);
-+  void dispatch_epilog(TosState state, int step = 0);
-+  // dispatch via t0
-+  void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0);
-+  // dispatch normal table via t0 (assume t0 is loaded already)
-+  void dispatch_only_normal(TosState state, Register Rs = t0);
-+  void dispatch_only_noverify(TosState state, Register Rs = t0);
-+  // load t0 from [xbcp + step] and dispatch via t0
-+  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
-+  // load t0 from [xbcp] and dispatch via t0 and table
-+  void dispatch_via (TosState state, address* table);
++  // Retry when failed.
++  __ bne(t0, t1, retry);
 +
-+  // jump to an invoked target
-+  void prepare_to_jump_from_interpreted();
-+  void jump_from_interpreted(Register method);
++  __ bind(success);
++  if (is_cae) {
++    __ mv(result, expected);
++  } else {
++    __ addi(result, zr, 1);
++  }
++  __ j(done);
 +
++  __ bind(fail);
++  if (is_cae) {
++    __ mv(result, t0);
++  } else {
++    __ mv(result, zr);
++  }
 +
-+  // Returning from interpreted functions
-+  //
-+  // Removes the current activation (incl. unlocking of monitors)
-+  // and sets up the return address.  This code is also used for
-+  // exception unwindwing. In that case, we do not want to throw
-+  // IllegalMonitorStateExceptions, since that might get us into an
-+  // infinite rethrow exception loop.
-+  // Additionally this code is used for popFrame and earlyReturn.
-+  // In popFrame case we want to skip throwing an exception,
-+  // installing an exception, and notifying jvmdi.
-+  // In earlyReturn case we only want to skip throwing an exception
-+  // and installing an exception.
-+  void remove_activation(TosState state,
-+                         bool throw_monitor_exception = true,
-+                         bool install_monitor_exception = true,
-+                         bool notify_jvmdi = true);
++  __ bind(done);
++}
 +
-+  // FIXME: Give us a valid frame at a null check.
-+  virtual void null_check(Register reg, int offset = -1) {
-+        MacroAssembler::null_check(reg, offset);
++#undef __
++
++#ifdef COMPILER1
++
++#define __ ce->masm()->
++
++void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
++  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  // At this point we know that marking is in progress.
++  // If do_load() is true then we have to emit the
++  // load of the previous value; otherwise it has already
++  // been loaded into _pre_val.
++  __ bind(*stub->entry());
++
++  assert(stub->pre_val()->is_register(), "Precondition.");
++
++  Register pre_val_reg = stub->pre_val()->as_register();
++
++  if (stub->do_load()) {
++    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
 +  }
++  __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
++  ce->store_parameter(stub->pre_val()->as_register(), 0);
++  __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
++  __ j(*stub->continuation());
++}
 +
-+  // Object locking
-+  void lock_object  (Register lock_reg);
-+  void unlock_object(Register lock_reg);
++void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce,
++                                                                    ShenandoahLoadReferenceBarrierStub* stub) {
++  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  __ bind(*stub->entry());
 +
-+  // Interpreter profiling operations
-+  void set_method_data_pointer_for_bcp();
-+  void test_method_data_pointer(Register mdp, Label& zero_continue);
-+  void verify_method_data_pointer();
++  DecoratorSet decorators = stub->decorators();
++  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
++  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
++  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
++  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
 +
-+  void set_mdp_data_at(Register mdp_in, int constant, Register value);
-+  void increment_mdp_data_at(Address data, bool decrement = false);
-+  void increment_mdp_data_at(Register mdp_in, int constant,
-+                             bool decrement = false);
-+  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
-+                             bool decrement = false);
-+  void increment_mask_and_jump(Address counter_addr,
-+                               int increment, Address mask,
-+                               Register tmp1, Register tmp2,
-+                               bool preloaded, Label* where);
++  Register obj = stub->obj()->as_register();
++  Register res = stub->result()->as_register();
++  Register addr = stub->addr()->as_pointer_register();
++  Register tmp1 = stub->tmp1()->as_register();
++  Register tmp2 = stub->tmp2()->as_register();
 +
-+  void set_mdp_flag_at(Register mdp_in, int flag_constant);
-+  void test_mdp_data_at(Register mdp_in, int offset, Register value,
-+                        Register test_value_out,
-+                        Label& not_equal_continue);
++  assert(res == x10, "result must arrive in x10");
++  assert_different_registers(tmp1, tmp2, t0);
 +
-+  void record_klass_in_profile(Register receiver, Register mdp,
-+                               Register reg2, bool is_virtual_call);
-+  void record_klass_in_profile_helper(Register receiver, Register mdp,
-+                                      Register reg2,
-+                                      Label& done, bool is_virtual_call);
-+  void record_item_in_profile_helper(Register item, Register mdp,
-+                                     Register reg2, int start_row, Label& done, int total_rows,
-+                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
-+                                     int non_profiled_offset);
++  if (res != obj) {
++    __ mv(res, obj);
++  }
 +
-+  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
-+  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
-+  void update_mdp_by_constant(Register mdp_in, int constant);
-+  void update_mdp_for_ret(Register return_bci);
++  if (is_strong) {
++    // Check for object in cset.
++    __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
++    __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
++    __ add(tmp2, tmp2, tmp1);
++    __ lbu(tmp2, Address(tmp2));
++    __ beqz(tmp2, *stub->continuation(), true /* is_far */);
++  }
 +
-+  // narrow int return value
-+  void narrow(Register result);
++  ce->store_parameter(res, 0);
++  ce->store_parameter(addr, 1);
 +
-+  void profile_taken_branch(Register mdp, Register bumped_count);
-+  void profile_not_taken_branch(Register mdp);
-+  void profile_call(Register mdp);
-+  void profile_final_call(Register mdp);
-+  void profile_virtual_call(Register receiver, Register mdp,
-+                            Register t1,
-+                            bool receiver_can_be_null = false);
-+  void profile_ret(Register return_bci, Register mdp);
-+  void profile_null_seen(Register mdp);
-+  void profile_typecheck(Register mdp, Register klass, Register temp);
-+  void profile_typecheck_failed(Register mdp);
-+  void profile_switch_default(Register mdp);
-+  void profile_switch_case(Register index_in_scratch, Register mdp,
-+                           Register temp);
++  if (is_strong) {
++    if (is_native) {
++      __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
++    } else {
++      __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
++    }
++  } else if (is_weak) {
++    __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
++  } else {
++    assert(is_phantom, "only remaining strength");
++    __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
++  }
 +
-+  void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp);
-+  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
-+  void profile_return_type(Register mdp, Register ret, Register tmp);
-+  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3);
++  __ j(*stub->continuation());
++}
 +
-+  // Debugging
-+  // only if +VerifyFPU  && (state == ftos || state == dtos)
-+  void verify_FPU(int stack_depth, TosState state = ftos);
++#undef __
 +
-+  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
++#define __ sasm->
 +
-+  // support for jvmti/dtrace
-+  void notify_method_entry();
-+  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
++void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("shenandoah_pre_barrier", false);
 +
-+  virtual void _call_Unimplemented(address call_site) {
-+    save_bcp();
-+    set_last_Java_frame(esp, fp, (address) pc(), t0);
-+    MacroAssembler::_call_Unimplemented(call_site);
++  // arg0 : previous value of memory
++
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  const Register pre_val = x10;
++  const Register thread = xthread;
++  const Register tmp = t0;
++
++  Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  Label done;
++  Label runtime;
++
++  // Is marking still active?
++  Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
++  __ lb(tmp, gc_state);
++  __ andi(tmp, tmp, ShenandoahHeap::MARKING);
++  __ beqz(tmp, done);
++
++  // Can we store original value in the thread's buffer?
++  __ ld(tmp, queue_index);
++  __ beqz(tmp, runtime);
++
++  __ sub(tmp, tmp, wordSize);
++  __ sd(tmp, queue_index);
++  __ ld(t1, buffer);
++  __ add(tmp, tmp, t1);
++  __ load_parameter(0, t1);
++  __ sd(t1, Address(tmp, 0));
++  __ j(done);
++
++  __ bind(runtime);
++  __ push_call_clobbered_registers();
++  __ load_parameter(0, pre_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
++  __ pop_call_clobbered_registers();
++  __ bind(done);
++
++  __ epilogue();
++}
++
++void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm,
++                                                                                    DecoratorSet decorators) {
++  __ prologue("shenandoah_load_reference_barrier", false);
++  // arg0 : object to be resolved
++
++  __ push_call_clobbered_registers();
++  __ load_parameter(0, x10);
++  __ load_parameter(1, x11);
++
++  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
++  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
++  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
++  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
++  if (is_strong) {
++    if (is_native) {
++      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
++    } else {
++      if (UseCompressedOops) {
++        __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow);
++      } else {
++        __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
++      }
++    }
++  } else if (is_weak) {
++    assert(!is_native, "weak must not be called off-heap");
++    if (UseCompressedOops) {
++      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow);
++    } else {
++      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
++    }
++  } else {
++    assert(is_phantom, "only remaining strength");
++    assert(is_native, "phantom must only be called off-heap");
++    __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_phantom);
 +  }
++  __ jalr(ra);
++  __ mv(t0, x10);
++  __ pop_call_clobbered_registers();
++  __ mv(x10, t0);
 +
-+#ifdef ASSERT
-+  void verify_access_flags(Register access_flags, uint32_t flag_bits,
-+                           const char* msg, bool stop_by_hit = true);
-+  void verify_frame_setup();
-+#endif
-+};
++  __ epilogue();
++}
 +
-+#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
++#undef __
++
++#endif // COMPILER1
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
 new file mode 100644
-index 000000000..777f326e3
+index 00000000000..a705f497667
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
-@@ -0,0 +1,296 @@
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
+@@ -0,0 +1,88 @@
 +/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -17606,455 +17721,369 @@ index 000000000..777f326e3
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "interpreter/interpreterRuntime.hpp"
-+#include "memory/allocation.inline.hpp"
-+#include "memory/universe.hpp"
-+#include "oops/method.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "runtime/handles.inline.hpp"
-+#include "runtime/icache.hpp"
-+#include "runtime/interfaceSupport.inline.hpp"
-+#include "runtime/signature.hpp"
-+
-+#define __ _masm->
-+
-+// Implementation of SignatureHandlerGenerator
-+Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; }
-+Register InterpreterRuntime::SignatureHandlerGenerator::to()   { return sp; }
-+Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; }
-+
-+Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() {
-+  if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
-+    return g_INTArgReg[++_num_reg_int_args];
-+  }
-+  return noreg;
-+}
++#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
 +
-+FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() {
-+  if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
-+    return g_FPArgReg[_num_reg_fp_args++];
-+  }
-+  return fnoreg;
-+}
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
++#ifdef COMPILER1
++class LIR_Assembler;
++class ShenandoahPreBarrierStub;
++class ShenandoahLoadReferenceBarrierStub;
++class StubAssembler;
++#endif
++class StubCodeGenerator;
 +
-+int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() {
-+  int ret = _stack_offset;
-+  _stack_offset += wordSize;
-+  return ret;
-+}
++class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
++private:
 +
-+InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
-+  const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
-+  _masm = new MacroAssembler(buffer); // allocate on resourse area by default
-+  _num_reg_int_args = (method->is_static() ? 1 : 0);
-+  _num_reg_fp_args = 0;
-+  _stack_offset = 0;
-+}
++  void satb_write_barrier_pre(MacroAssembler* masm,
++                              Register obj,
++                              Register pre_val,
++                              Register thread,
++                              Register tmp,
++                              bool tosca_live,
++                              bool expand_call);
++  void shenandoah_write_barrier_pre(MacroAssembler* masm,
++                                    Register obj,
++                                    Register pre_val,
++                                    Register thread,
++                                    Register tmp,
++                                    bool tosca_live,
++                                    bool expand_call);
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
-+  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
++  void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
++  void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
++  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators);
 +
-+  Register reg = next_gpr();
-+  if (reg != noreg) {
-+    __ lw(reg, src);
-+  } else {
-+    __ lw(x10, src);
-+    __ sw(x10, Address(to(), next_stack_offset()));
-+  }
-+}
++public:
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
-+  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);
 +
-+  Register reg = next_gpr();
-+  if (reg != noreg) {
-+    __ ld(reg, src);
-+  } else  {
-+    __ ld(x10, src);
-+    __ sd(x10, Address(to(), next_stack_offset()));
-+  }
-+}
++#ifdef COMPILER1
++  void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
++  void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
++  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators);
++#endif
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
-+  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register src, Register dst, Register count, RegSet saved_regs);
 +
-+  FloatRegister reg = next_fpr();
-+  if (reg != fnoreg) {
-+    __ flw(reg, src);
-+  } else {
-+    // a floating-point argument is passed according to the integer calling
-+    // convention if no floating-point argument register available
-+    pass_int();
-+  }
-+}
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
-+  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
 +
-+  FloatRegister reg = next_fpr();
-+  if (reg != fnoreg) {
-+    __ fld(reg, src);
-+  } else {
-+    // a floating-point argument is passed according to the integer calling
-+    // convention if no floating-point argument register available
-+    pass_long();
-+  }
-+}
++  void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
++                   Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
++};
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
-+  Register reg = next_gpr();
-+  if (reg == c_rarg1) {
-+    assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
-+    __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset()));
-+  } else if (reg != noreg) {
-+      // c_rarg2-c_rarg7
-+      __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
-+      __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2,  2:c_rarg3...
-+      __ ld(temp(), x10);
-+      Label L;
-+      __ beqz(temp(), L);
-+      __ mv(reg, x10);
-+      __ bind(L);
-+  } else {
-+    //to stack
-+    __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
-+    __ ld(temp(), x10);
-+    Label L;
-+    __ bnez(temp(), L);
-+    __ mv(x10, zr);
-+    __ bind(L);
-+    assert(sizeof(jobject) == wordSize, "");
-+    __ sd(x10, Address(to(), next_stack_offset()));
-+  }
-+}
++#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
+new file mode 100644
+index 00000000000..6c855f23c2a
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
+@@ -0,0 +1,285 @@
++//
++// Copyright (c) 2018, Red Hat, Inc. All rights reserved.
++// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
-+  // generate code to handle arguments
-+  iterate(fingerprint);
++source_hpp %{
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
++%}
 +
-+  // return result handler
-+  __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type())));
-+  __ ret();
++instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+  __ flush();
-+}
++  effect(TEMP tmp, KILL cr);
 +
++  format %{
++    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah"
++  %}
 +
-+// Implementation of SignatureHandlerLibrary
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
 +
-+void SignatureHandlerLibrary::pd_set_handler(address handler) {}
++  ins_pipe(pipe_slow);
++%}
 +
++instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+class SlowSignatureHandler
-+  : public NativeSignatureIterator {
-+ private:
-+  address   _from;
-+  intptr_t* _to;
-+  intptr_t* _int_args;
-+  intptr_t* _fp_args;
-+  intptr_t* _fp_identifiers;
-+  unsigned int _num_reg_int_args;
-+  unsigned int _num_reg_fp_args;
++  effect(TEMP tmp, KILL cr);
 +
++  format %{
++    "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah"
++  %}
 +
-+  intptr_t* single_slot_addr() {
-+    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
-+    _from -= Interpreter::stackElementSize;
-+    return from_addr;
-+  }
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
 +
-+  intptr_t* double_slot_addr() {
-+    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1));
-+    _from -= 2 * Interpreter::stackElementSize;
-+    return from_addr;
-+  }
++  ins_pipe(pipe_slow);
++%}
 +
-+  int pass_gpr(intptr_t value) {
-+    if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
-+      *_int_args++ = value;
-+      return _num_reg_int_args++;
-+    }
-+    return -1;
-+  }
++instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
++  predicate(needs_acquiring_load_reserved(n));
++  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+  int pass_fpr(intptr_t value) {
-+    if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
-+      *_fp_args++ = value;
-+      return _num_reg_fp_args++;
-+    }
-+    return -1;
-+  }
++  effect(TEMP tmp, KILL cr);
 +
-+  void pass_stack(intptr_t value) {
-+    *_to++ = value;
-+  }
++  format %{
++    "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah"
++  %}
 +
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
 +
-+  virtual void pass_int() {
-+    jint value = *(jint*)single_slot_addr();
-+    if (pass_gpr(value) < 0) {
-+      pass_stack(value);
-+    }
-+  }
++  ins_pipe(pipe_slow);
++%}
 +
++instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
++  predicate(needs_acquiring_load_reserved(n));
++  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+  virtual void pass_long() {
-+    intptr_t value = *double_slot_addr();
-+    if (pass_gpr(value) < 0) {
-+      pass_stack(value);
-+    }
-+  }
++  effect(TEMP tmp, KILL cr);
 +
-+  virtual void pass_object() {
-+    intptr_t* addr = single_slot_addr();
-+    intptr_t value = *addr == 0 ? NULL : (intptr_t)addr;
-+    if (pass_gpr(value) < 0) {
-+      pass_stack(value);
-+    }
-+  }
++  format %{
++    "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah"
++  %}
 +
-+  virtual void pass_float() {
-+    jint value = *(jint*) single_slot_addr();
-+    // a floating-point argument is passed according to the integer calling
-+    // convention if no floating-point argument register available
-+    if (pass_fpr(value) < 0 && pass_gpr(value) < 0) {
-+      pass_stack(value);
-+    }
-+  }
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
 +
-+ virtual void pass_double() {
-+    intptr_t value = *double_slot_addr();
-+    int arg = pass_fpr(value);
-+    if (0 <= arg) {
-+      *_fp_identifiers |= (1ull << arg); // mark as double
-+    } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack
-+      pass_stack(value);
-+    }
-+  }
++  ins_pipe(pipe_slow);
++%}
 +
-+ public:
-+  SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to)
-+    : NativeSignatureIterator(method)
-+  {
-+    _from = from;
-+    _to   = to;
++instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
++  effect(TEMP_DEF res, TEMP tmp, KILL cr);
 +
-+    _int_args = to - (method->is_static() ? 16 : 17);
-+    _fp_args =  to - 8;
-+    _fp_identifiers = to - 9;
-+    *(int*) _fp_identifiers = 0;
-+    _num_reg_int_args = (method->is_static() ? 1 : 0);
-+    _num_reg_fp_args = 0;
-+  }
-+  ~SlowSignatureHandler()
-+  {
-+    _from           = NULL;
-+    _to             = NULL;
-+    _int_args       = NULL;
-+    _fp_args        = NULL;
-+    _fp_identifiers = NULL;
-+  }
-+};
++  format %{
++    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah"
++  %}
 +
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   true /* is_cae */, $res$$Register);
++  %}
 +
-+IRT_ENTRY(address,
-+          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
-+                                                     Method* method,
-+                                                     intptr_t* from,
-+                                                     intptr_t* to))
-+  methodHandle m(thread, (Method*)method);
-+  assert(m->is_native(), "sanity check");
++  ins_pipe(pipe_slow);
++%}
 +
-+  // handle arguments
-+  SlowSignatureHandler ssh(m, (address)from, to);
-+  ssh.iterate((uint64_t)UCONST64(-1));
++instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+  // return result handler
-+  return Interpreter::result_handler(m->result_type());
-+IRT_END
-diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
-new file mode 100644
-index 000000000..06342869f
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
-@@ -0,0 +1,68 @@
-+/*
-+ * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  effect(TEMP_DEF res, TEMP tmp, KILL cr);
++  format %{
++    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah"
++  %}
 +
-+#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP
-+#define CPU_RISCV_INTERPRETERRT_RISCV_HPP
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   true /* is_cae */, $res$$Register);
++  %}
 +
-+// This is included in the middle of class Interpreter.
-+// Do not include files here.
++  ins_pipe(pipe_slow);
++%}
 +
-+// native method calls
++instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+class SignatureHandlerGenerator: public NativeSignatureIterator {
-+ private:
-+  MacroAssembler* _masm;
-+  unsigned int _num_reg_fp_args;
-+  unsigned int _num_reg_int_args;
-+  int _stack_offset;
++  effect(TEMP tmp, KILL cr);
++  format %{
++    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah"
++    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
++  %}
 +
-+  void pass_int();
-+  void pass_long();
-+  void pass_float();
-+  void pass_double();
-+  void pass_object();
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
 +
-+  Register next_gpr();
-+  FloatRegister next_fpr();
-+  int next_stack_offset();
++  ins_pipe(pipe_slow);
++%}
 +
-+ public:
-+  // Creation
-+  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
-+  virtual ~SignatureHandlerGenerator() {
-+    _masm = NULL;
-+  }
++instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
++  predicate(needs_acquiring_load_reserved(n));
++  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+  // Code generation
-+  void generate(uint64_t fingerprint);
++  effect(TEMP_DEF res, TEMP tmp, KILL cr);
++  format %{
++    "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah"
++  %}
 +
-+  // Code generation support
-+  static Register from();
-+  static Register to();
-+  static Register temp();
-+};
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register);
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
++                                                   true /* is_cae */, $res$$Register);
++  %}
 +
-+#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
-new file mode 100644
-index 000000000..a169b8c5f
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
-@@ -0,0 +1,89 @@
-+/*
-+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  ins_pipe(pipe_slow);
++%}
 +
-+#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
-+#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
++instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
++  predicate(needs_acquiring_load_reserved(n));
++  match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+private:
++  effect(TEMP_DEF res, TEMP tmp, KILL cr);
++  format %{
++    "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah"
++  %}
 +
-+  // FP value associated with _last_Java_sp:
-+  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register);
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
++                                                   true /* is_cae */, $res$$Register);
++  %}
 +
-+public:
-+  // Each arch must define reset, save, restore
-+  // These are used by objects that only care about:
-+  //  1 - initializing a new state (thread creation, javaCalls)
-+  //  2 - saving a current state (javaCalls)
-+  //  3 - restoring an old state (javaCalls)
++  ins_pipe(pipe_slow);
++%}
 +
-+  void clear(void) {
-+    // clearing _last_Java_sp must be first
-+    _last_Java_sp = NULL;
-+    OrderAccess::release();
-+    _last_Java_fp = NULL;
-+    _last_Java_pc = NULL;
-+  }
++instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+  void copy(JavaFrameAnchor* src) {
-+    // In order to make sure the transition state is valid for "this"
-+    // We must clear _last_Java_sp before copying the rest of the new data
-+    //
-+    // Hack Alert: Temporary bugfix for 4717480/4721647
-+    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
-+    // unless the value is changing
-+    //
-+    assert(src != NULL, "Src should not be NULL.");
-+    if (_last_Java_sp != src->_last_Java_sp) {
-+      _last_Java_sp = NULL;
-+      OrderAccess::release();
-+    }
-+    _last_Java_fp = src->_last_Java_fp;
-+    _last_Java_pc = src->_last_Java_pc;
-+    // Must be last so profiler will always see valid frame if has_last_frame() is true
-+    _last_Java_sp = src->_last_Java_sp;
-+  }
++  effect(TEMP tmp, KILL cr);
++  format %{
++    "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah"
++  %}
 +
-+  bool walkable(void)                            { return _last_Java_sp != NULL && _last_Java_pc != NULL; }
-+  void make_walkable(JavaThread* thread);
-+  void capture_last_Java_pc(void);
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
 +
-+  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
++  ins_pipe(pipe_slow);
++%}
 +
-+  const address last_Java_pc(void)               { return _last_Java_pc; }
++instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
++  predicate(needs_acquiring_load_reserved(n));
++  match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+private:
++  effect(TEMP tmp, KILL cr);
++  format %{
++    "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah"
++    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
++  %}
 +
-+  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
 +
-+public:
++  ins_pipe(pipe_slow);
++%}
 +
-+  void set_last_Java_sp(intptr_t* java_sp)           { _last_Java_sp = java_sp; OrderAccess::release(); }
++instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
++  predicate(needs_acquiring_load_reserved(n));
++  match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
-+  // Assert (last_Java_sp == NULL || fp == NULL)
-+  void set_last_Java_fp(intptr_t* java_fp)           { OrderAccess::release(); _last_Java_fp = java_fp; }
++  effect(TEMP tmp, KILL cr);
++  format %{
++    "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah"
++    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
++  %}
 +
-+#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
+diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
 new file mode 100644
-index 000000000..9bab8e78f
+index 00000000000..3d3f4d4d774
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
-@@ -0,0 +1,193 @@
++++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
+@@ -0,0 +1,441 @@
 +/*
-+ * Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -18078,181 +18107,429 @@ index 000000000..9bab8e78f
 + */
 +
 +#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/codeBlob.hpp"
++#include "code/vmreg.inline.hpp"
++#include "gc/z/zBarrier.inline.hpp"
++#include "gc/z/zBarrierSet.hpp"
++#include "gc/z/zBarrierSetAssembler.hpp"
++#include "gc/z/zBarrierSetRuntime.hpp"
++#include "gc/z/zThreadLocalData.hpp"
 +#include "memory/resourceArea.hpp"
-+#include "prims/jniFastGetField.hpp"
-+#include "prims/jvm_misc.hpp"
-+#include "runtime/safepoint.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "utilities/macros.hpp"
++#ifdef COMPILER1
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/z/c1/zBarrierSetC1.hpp"
++#endif // COMPILER1
++#ifdef COMPILER2
++#include "gc/z/c2/zBarrierSetC2.hpp"
++#endif // COMPILER2
 +
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++#undef __
 +#define __ masm->
 +
-+#define BUFFER_SIZE 30*wordSize
++void ZBarrierSetAssembler::load_at(MacroAssembler* masm,
++                                   DecoratorSet decorators,
++                                   BasicType type,
++                                   Register dst,
++                                   Address src,
++                                   Register tmp1,
++                                   Register tmp_thread) {
++  if (!ZBarrierSet::barrier_needed(decorators, type)) {
++    // Barrier not needed
++    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++    return;
++  }
 +
-+// Instead of issuing a LoadLoad barrier we create an address
-+// dependency between loads; this might be more efficient.
++  assert_different_registers(t1, src.base());
++  assert_different_registers(t0, t1, dst);
 +
-+// Common register usage:
-+// x10/f10:      result
-+// c_rarg0:    jni env
-+// c_rarg1:    obj
-+// c_rarg2:    jfield id
++  Label done;
 +
-+static const Register robj          = x13;
-+static const Register rcounter      = x14;
-+static const Register roffset       = x15;
-+static const Register rcounter_addr = x16;
-+static const Register result        = x17;
++  // Load bad mask into temp register.
++  __ la(t0, src);
++  __ ld(t1, address_bad_mask_from_thread(xthread));
++  __ ld(dst, Address(t0));
 +
-+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
-+  const char *name;
-+  switch (type) {
-+    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
-+    case T_BYTE:    name = "jni_fast_GetByteField";    break;
-+    case T_CHAR:    name = "jni_fast_GetCharField";    break;
-+    case T_SHORT:   name = "jni_fast_GetShortField";   break;
-+    case T_INT:     name = "jni_fast_GetIntField";     break;
-+    case T_LONG:    name = "jni_fast_GetLongField";    break;
-+    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
-+    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
-+    default:        ShouldNotReachHere();
-+      name = NULL;  // unreachable
++  // Test reference against bad mask. If mask bad, then we need to fix it up.
++  __ andr(t1, dst, t1);
++  __ beqz(t1, done);
++
++  __ enter();
++
++  __ push_call_clobbered_registers_except(RegSet::of(dst));
++
++  if (c_rarg0 != dst) {
++    __ mv(c_rarg0, dst);
 +  }
-+  ResourceMark rm;
-+  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
-+  CodeBuffer cbuf(blob);
-+  MacroAssembler* masm = new MacroAssembler(&cbuf);
-+  address fast_entry = __ pc();
 +
-+  Label slow;
-+  int32_t offset = 0;
-+  __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset);
-+  __ addi(rcounter_addr, rcounter_addr, offset);
++  __ mv(c_rarg1, t0);
 +
-+  Address safepoint_counter_addr(rcounter_addr, 0);
-+  __ lwu(rcounter, safepoint_counter_addr);
-+  // An even value means there are no ongoing safepoint operations
-+  __ andi(t0, rcounter, 1);
-+  __ bnez(t0, slow);
-+  __ xorr(robj, c_rarg1, rcounter);
-+  __ xorr(robj, robj, rcounter);               // obj, since
-+                                              // robj ^ rcounter ^ rcounter == robj
-+                                              // robj is address dependent on rcounter.
++  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
 +
++  // Make sure dst has the return value.
++  if (dst != x10) {
++    __ mv(dst, x10);
++  }
 +
-+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  assert_cond(bs != NULL);
-+  bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow);
++  __ pop_call_clobbered_registers_except(RegSet::of(dst));
++  __ leave();
 +
-+  __ srli(roffset, c_rarg2, 2);                // offset
++  __ bind(done);
++}
 +
-+  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
-+  speculative_load_pclist[count] = __ pc();   // Used by the segfault handler
-+  __ add(roffset, robj, roffset);
-+  switch (type) {
-+    case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break;
-+    case T_BYTE:    __ lb(result, Address(roffset, 0)); break;
-+    case T_CHAR:    __ lhu(result, Address(roffset, 0)); break;
-+    case T_SHORT:   __ lh(result, Address(roffset, 0)); break;
-+    case T_INT:     __ lw(result, Address(roffset, 0)); break;
-+    case T_LONG:    __ ld(result, Address(roffset, 0)); break;
-+    case T_FLOAT: {
-+      __ flw(f28, Address(roffset, 0)); // f28 as temporaries
-+      __ fmv_x_w(result, f28); // f{31--0}-->x
-+      break;
-+    }
-+    case T_DOUBLE: {
-+      __ fld(f28, Address(roffset, 0)); // f28 as temporaries
-+      __ fmv_x_d(result, f28); // d{63--0}-->x
-+      break;
++#ifdef ASSERT
++
++void ZBarrierSetAssembler::store_at(MacroAssembler* masm,
++                                    DecoratorSet decorators,
++                                    BasicType type,
++                                    Address dst,
++                                    Register val,
++                                    Register tmp1,
++                                    Register tmp2) {
++  // Verify value
++  if (is_reference_type(type)) {
++    // Note that src could be noreg, which means we
++    // are storing null and can skip verification.
++    if (val != noreg) {
++      Label done;
++
++      // tmp1 and tmp2 are often set to noreg.
++      RegSet savedRegs = RegSet::of(t0);
++      __ push_reg(savedRegs, sp);
++
++      __ ld(t0, address_bad_mask_from_thread(xthread));
++      __ andr(t0, val, t0);
++      __ beqz(t0, done);
++      __ stop("Verify oop store failed");
++      __ should_not_reach_here();
++      __ bind(done);
++      __ pop_reg(savedRegs, sp);
 +    }
-+    default:        ShouldNotReachHere();
 +  }
 +
-+  // counter_addr is address dependent on result.
-+  __ xorr(rcounter_addr, rcounter_addr, result);
-+  __ xorr(rcounter_addr, rcounter_addr, result);
-+  __ lw(t0, safepoint_counter_addr);
-+  __ bne(rcounter, t0, slow);
++  // Store value
++  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++}
 +
-+  switch (type) {
-+    case T_FLOAT:   __ fmv_w_x(f10, result); break;
-+    case T_DOUBLE:  __ fmv_d_x(f10, result); break;
-+    default:        __ mv(x10, result);   break;
-+  }
-+  __ ret();
++#endif // ASSERT
 +
-+  slowcase_entry_pclist[count++] = __ pc();
-+  __ bind(slow);
-+  address slow_case_addr;
-+  switch (type) {
-+    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
-+    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
-+    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
-+    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
-+    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
-+    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
-+    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
-+    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
-+    default:        ShouldNotReachHere();
-+      slow_case_addr = NULL;  // unreachable
++void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm,
++                                              DecoratorSet decorators,
++                                              bool is_oop,
++                                              Register src,
++                                              Register dst,
++                                              Register count,
++                                              RegSet saved_regs) {
++  if (!is_oop) {
++    // Barrier not needed
++    return;
 +  }
 +
-+  {
-+    __ enter();
-+    int32_t tmp_offset = 0;
-+    __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset);
-+    __ jalr(x1, t0, tmp_offset);
-+    __ leave();
-+    __ ret();
++  BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {");
++
++  assert_different_registers(src, count, t0);
++
++  __ push_reg(saved_regs, sp);
++
++  if (count == c_rarg0 && src == c_rarg1) {
++    // exactly backwards!!
++    __ xorr(c_rarg0, c_rarg0, c_rarg1);
++    __ xorr(c_rarg1, c_rarg0, c_rarg1);
++    __ xorr(c_rarg0, c_rarg0, c_rarg1);
++  } else {
++    __ mv(c_rarg0, src);
++    __ mv(c_rarg1, count);
 +  }
-+  __ flush();
 +
-+  return fast_entry;
-+}
++  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2);
 +
++  __ pop_reg(saved_regs, sp);
 +
-+address JNI_FastGetField::generate_fast_get_boolean_field() {
-+  return generate_fast_get_int_field0(T_BOOLEAN);
++  BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue");
 +}
 +
-+address JNI_FastGetField::generate_fast_get_byte_field() {
-+  return generate_fast_get_int_field0(T_BYTE);
-+}
++void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm,
++                                                         Register jni_env,
++                                                         Register robj,
++                                                         Register tmp,
++                                                         Label& slowpath) {
++  BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {");
 +
-+address JNI_FastGetField::generate_fast_get_char_field() {
-+  return generate_fast_get_int_field0(T_CHAR);
++  assert_different_registers(jni_env, robj, tmp);
++
++  // Resolve jobject
++  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath);
++
++  // Compute the offset of address bad mask from the field of jni_environment
++  long int bad_mask_relative_offset = (long int) (in_bytes(ZThreadLocalData::address_bad_mask_offset()) -
++                                                  in_bytes(JavaThread::jni_environment_offset()));
++
++  // Load the address bad mask
++  __ ld(tmp, Address(jni_env, bad_mask_relative_offset));
++
++  // Check address bad mask
++  __ andr(tmp, robj, tmp);
++  __ bnez(tmp, slowpath);
++
++  BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native");
 +}
 +
-+address JNI_FastGetField::generate_fast_get_short_field() {
-+  return generate_fast_get_int_field0(T_SHORT);
++#ifdef COMPILER2
++
++OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
++  if (!OptoReg::is_reg(opto_reg)) {
++    return OptoReg::Bad;
++  }
++
++  const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
++  if (vm_reg->is_FloatRegister()) {
++    return opto_reg & ~1;
++  }
++
++  return opto_reg;
 +}
 +
-+address JNI_FastGetField::generate_fast_get_int_field() {
-+  return generate_fast_get_int_field0(T_INT);
++#undef __
++#define __ _masm->
++
++class ZSaveLiveRegisters {
++private:
++  MacroAssembler* const _masm;
++  RegSet                _gp_regs;
++  FloatRegSet           _fp_regs;
++  VectorRegSet          _vp_regs;
++
++public:
++  void initialize(ZLoadBarrierStubC2* stub) {
++    // Record registers that needs to be saved/restored
++    RegMaskIterator rmi(stub->live());
++    while (rmi.has_next()) {
++      const OptoReg::Name opto_reg = rmi.next();
++      if (OptoReg::is_reg(opto_reg)) {
++        const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
++        if (vm_reg->is_Register()) {
++          _gp_regs += RegSet::of(vm_reg->as_Register());
++        } else if (vm_reg->is_FloatRegister()) {
++          _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
++        } else if (vm_reg->is_VectorRegister()) {
++          const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegisterImpl::max_slots_per_register - 1));
++          _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister());
++        } else {
++          fatal("Unknown register type");
++        }
++      }
++    }
++
++    // Remove C-ABI SOE registers, tmp regs and _ref register that will be updated
++    _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2) + RegSet::of(x8, x9) + RegSet::of(x5, stub->ref());
++  }
++
++  ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
++      _masm(masm),
++      _gp_regs(),
++      _fp_regs(),
++      _vp_regs() {
++    // Figure out what registers to save/restore
++    initialize(stub);
++
++    // Save registers
++    __ push_reg(_gp_regs, sp);
++    __ push_fp(_fp_regs, sp);
++    __ push_vp(_vp_regs, sp);
++  }
++
++  ~ZSaveLiveRegisters() {
++    // Restore registers
++    __ pop_vp(_vp_regs, sp);
++    __ pop_fp(_fp_regs, sp);
++    __ pop_reg(_gp_regs, sp);
++  }
++};
++
++class ZSetupArguments {
++private:
++  MacroAssembler* const _masm;
++  const Register        _ref;
++  const Address         _ref_addr;
++
++public:
++  ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
++      _masm(masm),
++      _ref(stub->ref()),
++      _ref_addr(stub->ref_addr()) {
++
++    // Setup arguments
++    if (_ref_addr.base() == noreg) {
++      // No self healing
++      if (_ref != c_rarg0) {
++        __ mv(c_rarg0, _ref);
++      }
++      __ mv(c_rarg1, zr);
++    } else {
++      // Self healing
++      if (_ref == c_rarg0) {
++        // _ref is already at correct place
++        __ la(c_rarg1, _ref_addr);
++      } else if (_ref != c_rarg1) {
++        // _ref is in wrong place, but not in c_rarg1, so fix it first
++        __ la(c_rarg1, _ref_addr);
++        __ mv(c_rarg0, _ref);
++      } else if (_ref_addr.base() != c_rarg0) {
++        assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0");
++        __ mv(c_rarg0, _ref);
++        __ la(c_rarg1, _ref_addr);
++      } else {
++        assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0");
++        if (_ref_addr.base() == c_rarg0) {
++          __ mv(t1, c_rarg1);
++          __ la(c_rarg1, _ref_addr);
++          __ mv(c_rarg0, t1);
++        } else {
++          ShouldNotReachHere();
++        }
++      }
++    }
++  }
++
++  ~ZSetupArguments() {
++    // Transfer result
++    if (_ref != x10) {
++      __ mv(_ref, x10);
++    }
++  }
++};
++
++#undef __
++#define __ masm->
++
++void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
++  BLOCK_COMMENT("ZLoadBarrierStubC2");
++
++  // Stub entry
++  __ bind(*stub->entry());
++
++  {
++    ZSaveLiveRegisters save_live_registers(masm, stub);
++    ZSetupArguments setup_arguments(masm, stub);
++    int32_t offset = 0;
++    __ la_patchable(t0, stub->slow_path(), offset);
++    __ jalr(x1, t0, offset);
++  }
++
++  // Stub exit
++  __ j(*stub->continuation());
 +}
 +
-+address JNI_FastGetField::generate_fast_get_long_field() {
-+  return generate_fast_get_int_field0(T_LONG);
++#undef __
++
++#endif // COMPILER2
++
++#ifdef COMPILER1
++#undef __
++#define __ ce->masm()->
++
++void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce,
++                                                         LIR_Opr ref) const {
++  assert_different_registers(xthread, ref->as_register(), t1);
++  __ ld(t1, address_bad_mask_from_thread(xthread));
++  __ andr(t1, t1, ref->as_register());
 +}
 +
-+address JNI_FastGetField::generate_fast_get_float_field() {
-+  return generate_fast_get_int_field0(T_FLOAT);
++void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce,
++                                                         ZLoadBarrierStubC1* stub) const {
++  // Stub entry
++  __ bind(*stub->entry());
++
++  Register ref = stub->ref()->as_register();
++  Register ref_addr = noreg;
++  Register tmp = noreg;
++
++  if (stub->tmp()->is_valid()) {
++    // Load address into tmp register
++    ce->leal(stub->ref_addr(), stub->tmp());
++    ref_addr = tmp = stub->tmp()->as_pointer_register();
++  } else {
++    // Address already in register
++    ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register();
++  }
++
++  assert_different_registers(ref, ref_addr, noreg);
++
++  // Save x10 unless it is the result or tmp register
++  // Set up SP to accomodate parameters and maybe x10.
++  if (ref != x10 && tmp != x10) {
++    __ sub(sp, sp, 32);
++    __ sd(x10, Address(sp, 16));
++  } else {
++    __ sub(sp, sp, 16);
++  }
++
++  // Setup arguments and call runtime stub
++  ce->store_parameter(ref_addr, 1);
++  ce->store_parameter(ref, 0);
++
++  __ far_call(stub->runtime_stub());
++
++  // Verify result
++  __ verify_oop(x10, "Bad oop");
++
++
++  // Move result into place
++  if (ref != x10) {
++    __ mv(ref, x10);
++  }
++
++  // Restore x10 unless it is the result or tmp register
++  if (ref != x10 && tmp != x10) {
++    __ ld(x10, Address(sp, 16));
++    __ add(sp, sp, 32);
++  } else {
++    __ add(sp, sp, 16);
++  }
++
++  // Stub exit
++  __ j(*stub->continuation());
 +}
 +
-+address JNI_FastGetField::generate_fast_get_double_field() {
-+  return generate_fast_get_int_field0(T_DOUBLE);
++#undef __
++#define __ sasm->
++
++void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
++                                                                 DecoratorSet decorators) const {
++  __ prologue("zgc_load_barrier stub", false);
++
++  __ push_call_clobbered_registers_except(RegSet::of(x10));
++
++  // Setup arguments
++  __ load_parameter(0, c_rarg0);
++  __ load_parameter(1, c_rarg1);
++
++  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
++
++  __ pop_call_clobbered_registers_except(RegSet::of(x10));
++
++  __ epilogue();
 +}
-diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
++
++#undef __
++#endif // COMPILER1
+diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
 new file mode 100644
-index 000000000..96775e0db
+index 00000000000..dc07ab635fe
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-@@ -0,0 +1,108 @@
++++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
+@@ -0,0 +1,101 @@
 +/*
-+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -18276,98 +18553,91 @@ index 000000000..96775e0db
 + *
 + */
 +
-+#ifndef CPU_RISCV_JNITYPES_RISCV_HPP
-+#define CPU_RISCV_JNITYPES_RISCV_HPP
++#ifndef CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
 +
-+#include "jni.h"
-+#include "memory/allocation.hpp"
-+#include "oops/oop.hpp"
++#include "code/vmreg.hpp"
++#include "oops/accessDecorators.hpp"
++#ifdef COMPILER2
++#include "opto/optoreg.hpp"
++#endif // COMPILER2
 +
-+// This file holds platform-dependent routines used to write primitive jni
-+// types to the array of arguments passed into JavaCalls::call
++#ifdef COMPILER1
++class LIR_Assembler;
++class LIR_Opr;
++class StubAssembler;
++class ZLoadBarrierStubC1;
++#endif // COMPILER1
 +
-+class JNITypes : private AllStatic {
-+  // These functions write a java primitive type (in native format)
-+  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
-+  // I.e., they are functionally 'push' operations if they have a 'pos'
-+  // formal parameter.  Note that jlong's and jdouble's are written
-+  // _in reverse_ of the order in which they appear in the interpreter
-+  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
-+  // reverse the argument list constructed by JavaCallArguments (see
-+  // javaCalls.hpp).
++#ifdef COMPILER2
++class Node;
++class ZLoadBarrierStubC2;
++#endif // COMPILER2
 +
++class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
 +public:
-+  // Ints are stored in native format in one JavaCallArgument slot at *to.
-+  static inline void    put_int(jint  from, intptr_t *to)           { *(jint *)(to +   0  ) =  from; }
-+  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(jint *)(to + pos++) =  from; }
-+  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; }
++  virtual void load_at(MacroAssembler* masm,
++                       DecoratorSet decorators,
++                       BasicType type,
++                       Register dst,
++                       Address src,
++                       Register tmp1,
++                       Register tmp_thread);
 +
-+  // Longs are stored in native format in one JavaCallArgument slot at
-+  // *(to+1).
-+  static inline void put_long(jlong  from, intptr_t *to) {
-+    *(jlong*) (to + 1) = from;
-+  }
-+
-+  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
-+    *(jlong*) (to + 1 + pos) = from;
-+    pos += 2;
-+  }
++#ifdef ASSERT
++  virtual void store_at(MacroAssembler* masm,
++                        DecoratorSet decorators,
++                        BasicType type,
++                        Address dst,
++                        Register val,
++                        Register tmp1,
++                        Register tmp2);
++#endif // ASSERT
 +
-+  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
-+    *(jlong*) (to + 1 + pos) = *from;
-+    pos += 2;
-+  }
++  virtual void arraycopy_prologue(MacroAssembler* masm,
++                                  DecoratorSet decorators,
++                                  bool is_oop,
++                                  Register src,
++                                  Register dst,
++                                  Register count,
++                                  RegSet saved_regs);
 +
-+  // Oops are stored in native format in one JavaCallArgument slot at *to.
-+  static inline void    put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
-+  static inline void    put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
-+  static inline void    put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm,
++                                             Register jni_env,
++                                             Register robj,
++                                             Register tmp,
++                                             Label& slowpath);
 +
-+  // Floats are stored in native format in one JavaCallArgument slot at *to.
-+  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
-+  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
-+  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
++#ifdef COMPILER1
++  void generate_c1_load_barrier_test(LIR_Assembler* ce,
++                                     LIR_Opr ref) const;
 +
-+#undef _JNI_SLOT_OFFSET
-+#define _JNI_SLOT_OFFSET 1
-+  // Doubles are stored in native word format in one JavaCallArgument
-+  // slot at *(to+1).
-+  static inline void put_double(jdouble  from, intptr_t *to) {
-+    *(jdouble*) (to + 1) = from;
-+  }
++  void generate_c1_load_barrier_stub(LIR_Assembler* ce,
++                                     ZLoadBarrierStubC1* stub) const;
 +
-+  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
-+    *(jdouble*) (to + 1 + pos) = from;
-+    pos += 2;
-+  }
++  void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
++                                             DecoratorSet decorators) const;
++#endif // COMPILER1
 +
-+  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
-+    *(jdouble*) (to + 1 + pos) = *from;
-+    pos += 2;
-+  }
++#ifdef COMPILER2
++  OptoReg::Name refine_register(const Node* node,
++                                OptoReg::Name opto_reg);
 +
-+  // The get_xxx routines, on the other hand, actually _do_ fetch
-+  // java primitive types from the interpreter stack.
-+  // No need to worry about alignment on Intel.
-+  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
-+  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
-+  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
-+  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
-+  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
-+#undef _JNI_SLOT_OFFSET
++  void generate_c2_load_barrier_stub(MacroAssembler* masm,
++                                     ZLoadBarrierStubC2* stub) const;
++#endif // COMPILER2
 +};
 +
-+#endif // CPU_RISCV_JNITYPES_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++#endif // CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
 new file mode 100644
-index 000000000..5d6078bb3
+index 00000000000..d14997790af
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -0,0 +1,5861 @@
++++ b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
+@@ -0,0 +1,212 @@
 +/*
-+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -18391,8679 +18661,7893 @@ index 000000000..5d6078bb3
 + */
 +
 +#include "precompiled.hpp"
-+#include "asm/assembler.hpp"
-+#include "asm/assembler.inline.hpp"
-+#include "compiler/disassembler.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "gc/shared/cardTable.hpp"
-+#include "gc/shared/cardTableBarrierSet.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/accessDecorators.hpp"
-+#include "oops/compressedOops.inline.hpp"
-+#include "oops/klass.inline.hpp"
-+#include "runtime/biasedLocking.hpp"
-+#include "runtime/interfaceSupport.inline.hpp"
-+#include "runtime/jniHandles.inline.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/thread.hpp"
-+#include "utilities/macros.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_LIRAssembler.hpp"
-+#endif
-+#ifdef COMPILER2
-+#include "oops/oop.hpp"
-+#include "opto/compile.hpp"
-+#include "opto/intrinsicnode.hpp"
-+#include "opto/subnode.hpp"
-+#endif
++#include "gc/shared/gcLogPrecious.hpp"
++#include "gc/shared/gc_globals.hpp"
++#include "gc/z/zGlobals.hpp"
++#include "runtime/globals.hpp"
++#include "runtime/os.hpp"
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/powerOfTwo.hpp"
 +
-+#ifdef PRODUCT
-+#define BLOCK_COMMENT(str) /* nothing */
-+#else
-+#define BLOCK_COMMENT(str) block_comment(str)
-+#endif
-+#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
++#ifdef LINUX
++#include <sys/mman.h>
++#endif // LINUX
 +
-+static void pass_arg0(MacroAssembler* masm, Register arg) {
-+  if (c_rarg0 != arg) {
-+    masm->mv(c_rarg0, arg);
-+  }
-+}
++//
++// The heap can have three different layouts, depending on the max heap size.
++//
++// Address Space & Pointer Layout 1
++// --------------------------------
++//
++//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
++//  .                                .
++//  .                                .
++//  .                                .
++//  +--------------------------------+ 0x0000014000000000 (20TB)
++//  |         Remapped View          |
++//  +--------------------------------+ 0x0000010000000000 (16TB)
++//  .                                .
++//  +--------------------------------+ 0x00000c0000000000 (12TB)
++//  |         Marked1 View           |
++//  +--------------------------------+ 0x0000080000000000 (8TB)
++//  |         Marked0 View           |
++//  +--------------------------------+ 0x0000040000000000 (4TB)
++//  .                                .
++//  +--------------------------------+ 0x0000000000000000
++//
++//   6                  4 4  4 4
++//   3                  6 5  2 1                                             0
++//  +--------------------+----+-----------------------------------------------+
++//  |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111|
++//  +--------------------+----+-----------------------------------------------+
++//  |                    |    |
++//  |                    |    * 41-0 Object Offset (42-bits, 4TB address space)
++//  |                    |
++//  |                    * 45-42 Metadata Bits (4-bits)  0001 = Marked0      (Address view 4-8TB)
++//  |                                                    0010 = Marked1      (Address view 8-12TB)
++//  |                                                    0100 = Remapped     (Address view 16-20TB)
++//  |                                                    1000 = Finalizable  (Address view N/A)
++//  |
++//  * 63-46 Fixed (18-bits, always zero)
++//
++//
++// Address Space & Pointer Layout 2
++// --------------------------------
++//
++//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
++//  .                                .
++//  .                                .
++//  .                                .
++//  +--------------------------------+ 0x0000280000000000 (40TB)
++//  |         Remapped View          |
++//  +--------------------------------+ 0x0000200000000000 (32TB)
++//  .                                .
++//  +--------------------------------+ 0x0000180000000000 (24TB)
++//  |         Marked1 View           |
++//  +--------------------------------+ 0x0000100000000000 (16TB)
++//  |         Marked0 View           |
++//  +--------------------------------+ 0x0000080000000000 (8TB)
++//  .                                .
++//  +--------------------------------+ 0x0000000000000000
++//
++//   6                 4 4  4 4
++//   3                 7 6  3 2                                              0
++//  +------------------+-----+------------------------------------------------+
++//  |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111|
++//  +-------------------+----+------------------------------------------------+
++//  |                   |    |
++//  |                   |    * 42-0 Object Offset (43-bits, 8TB address space)
++//  |                   |
++//  |                   * 46-43 Metadata Bits (4-bits)  0001 = Marked0      (Address view 8-16TB)
++//  |                                                   0010 = Marked1      (Address view 16-24TB)
++//  |                                                   0100 = Remapped     (Address view 32-40TB)
++//  |                                                   1000 = Finalizable  (Address view N/A)
++//  |
++//  * 63-47 Fixed (17-bits, always zero)
++//
++//
++// Address Space & Pointer Layout 3
++// --------------------------------
++//
++//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
++//  .                                .
++//  .                                .
++//  .                                .
++//  +--------------------------------+ 0x0000500000000000 (80TB)
++//  |         Remapped View          |
++//  +--------------------------------+ 0x0000400000000000 (64TB)
++//  .                                .
++//  +--------------------------------+ 0x0000300000000000 (48TB)
++//  |         Marked1 View           |
++//  +--------------------------------+ 0x0000200000000000 (32TB)
++//  |         Marked0 View           |
++//  +--------------------------------+ 0x0000100000000000 (16TB)
++//  .                                .
++//  +--------------------------------+ 0x0000000000000000
++//
++//   6               4  4  4 4
++//   3               8  7  4 3                                               0
++//  +------------------+----+-------------------------------------------------+
++//  |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111|
++//  +------------------+----+-------------------------------------------------+
++//  |                  |    |
++//  |                  |    * 43-0 Object Offset (44-bits, 16TB address space)
++//  |                  |
++//  |                  * 47-44 Metadata Bits (4-bits)  0001 = Marked0      (Address view 16-32TB)
++//  |                                                  0010 = Marked1      (Address view 32-48TB)
++//  |                                                  0100 = Remapped     (Address view 64-80TB)
++//  |                                                  1000 = Finalizable  (Address view N/A)
++//  |
++//  * 63-48 Fixed (16-bits, always zero)
++//
 +
-+static void pass_arg1(MacroAssembler* masm, Register arg) {
-+  if (c_rarg1 != arg) {
-+    masm->mv(c_rarg1, arg);
++// Default value if probing is not implemented for a certain platform: 128TB
++static const size_t DEFAULT_MAX_ADDRESS_BIT = 47;
++// Minimum value returned, if probing fails: 64GB
++static const size_t MINIMUM_MAX_ADDRESS_BIT = 36;
++
++static size_t probe_valid_max_address_bit() {
++#ifdef LINUX
++  size_t max_address_bit = 0;
++  const size_t page_size = os::vm_page_size();
++  for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) {
++    const uintptr_t base_addr = ((uintptr_t) 1U) << i;
++    if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) {
++      // msync suceeded, the address is valid, and maybe even already mapped.
++      max_address_bit = i;
++      break;
++    }
++    if (errno != ENOMEM) {
++      // Some error occured. This should never happen, but msync
++      // has some undefined behavior, hence ignore this bit.
++#ifdef ASSERT
++      fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
++#else // ASSERT
++      log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
++#endif // ASSERT
++      continue;
++    }
++    // Since msync failed with ENOMEM, the page might not be mapped.
++    // Try to map it, to see if the address is valid.
++    void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
++    if (result_addr != MAP_FAILED) {
++      munmap(result_addr, page_size);
++    }
++    if ((uintptr_t) result_addr == base_addr) {
++      // address is valid
++      max_address_bit = i;
++      break;
++    }
 +  }
-+}
-+
-+static void pass_arg2(MacroAssembler* masm, Register arg) {
-+  if (c_rarg2 != arg) {
-+    masm->mv(c_rarg2, arg);
++  if (max_address_bit == 0) {
++    // probing failed, allocate a very high page and take that bit as the maximum
++    const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT;
++    void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
++    if (result_addr != MAP_FAILED) {
++      max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1;
++      munmap(result_addr, page_size);
++    }
 +  }
++  log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit);
++  return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT);
++#else // LINUX
++  return DEFAULT_MAX_ADDRESS_BIT;
++#endif // LINUX
 +}
 +
-+static void pass_arg3(MacroAssembler* masm, Register arg) {
-+  if (c_rarg3 != arg) {
-+    masm->mv(c_rarg3, arg);
-+  }
++size_t ZPlatformAddressOffsetBits() {
++  const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
++  const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
++  const size_t min_address_offset_bits = max_address_offset_bits - 2;
++  const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
++  const size_t address_offset_bits = log2i_exact(address_offset);
++  return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
 +}
 +
-+void MacroAssembler::align(int modulus) {
-+  while (offset() % modulus != 0) { nop(); }
++size_t ZPlatformAddressMetadataShift() {
++  return ZPlatformAddressOffsetBits();
 +}
+diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
+new file mode 100644
+index 00000000000..f20ecd9b073
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
+@@ -0,0 +1,36 @@
++/*
++ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
-+  call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
-+}
++#ifndef CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
++#define CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
 +
-+// Implementation of call_VM versions
++const size_t ZPlatformGranuleSizeShift = 21; // 2MB
++const size_t ZPlatformHeapViews        = 3;
++const size_t ZPlatformCacheLineSize    = 64;
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             address entry_point,
-+                             bool check_exceptions) {
-+  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
-+}
++size_t ZPlatformAddressOffsetBits();
++size_t ZPlatformAddressMetadataShift();
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             address entry_point,
-+                             Register arg_1,
-+                             bool check_exceptions) {
-+  pass_arg1(this, arg_1);
-+  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
-+}
++#endif // CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
+new file mode 100644
+index 00000000000..6b6f87814a5
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
+@@ -0,0 +1,233 @@
++//
++// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             address entry_point,
-+                             Register arg_1,
-+                             Register arg_2,
-+                             bool check_exceptions) {
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
-+  pass_arg1(this, arg_1);
-+  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
-+}
++source_hpp %{
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             address entry_point,
-+                             Register arg_1,
-+                             Register arg_2,
-+                             Register arg_3,
-+                             bool check_exceptions) {
-+  assert(arg_1 != c_rarg3, "smashed arg");
-+  assert(arg_2 != c_rarg3, "smashed arg");
-+  pass_arg3(this, arg_3);
++#include "gc/shared/gc_globals.hpp"
++#include "gc/z/c2/zBarrierSetC2.hpp"
++#include "gc/z/zThreadLocalData.hpp"
 +
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
++%}
 +
-+  pass_arg1(this, arg_1);
-+  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
-+}
++source %{
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             Register last_java_sp,
-+                             address entry_point,
-+                             int number_of_arguments,
-+                             bool check_exceptions) {
-+  call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
-+}
-+
-+void MacroAssembler::call_VM(Register oop_result,
-+                             Register last_java_sp,
-+                             address entry_point,
-+                             Register arg_1,
-+                             bool check_exceptions) {
-+  pass_arg1(this, arg_1);
-+  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
-+}
-+
-+void MacroAssembler::call_VM(Register oop_result,
-+                             Register last_java_sp,
-+                             address entry_point,
-+                             Register arg_1,
-+                             Register arg_2,
-+                             bool check_exceptions) {
-+
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
-+  pass_arg1(this, arg_1);
-+  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
++static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) {
++  if (barrier_data == ZLoadBarrierElided) {
++    return;
++  }
++  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data);
++  __ ld(tmp, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
++  __ andr(tmp, tmp, ref);
++  __ bnez(tmp, *stub->entry(), true /* far */);
++  __ bind(*stub->continuation());
 +}
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             Register last_java_sp,
-+                             address entry_point,
-+                             Register arg_1,
-+                             Register arg_2,
-+                             Register arg_3,
-+                             bool check_exceptions) {
-+  assert(arg_1 != c_rarg3, "smashed arg");
-+  assert(arg_2 != c_rarg3, "smashed arg");
-+  pass_arg3(this, arg_3);
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
-+  pass_arg1(this, arg_1);
-+  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
++static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
++  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong);
++  __ j(*stub->entry());
++  __ bind(*stub->continuation());
 +}
 +
-+// these are no-ops overridden by InterpreterMacroAssembler
-+void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
-+void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
++%}
 +
-+// Calls to C land
-+//
-+// When entering C land, the fp, & esp of the last Java frame have to be recorded
-+// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
-+// has to be reset to 0. This is required to allow proper stack traversal.
-+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
-+                                         Register last_java_fp,
-+                                         Register last_java_pc,
-+                                         Register tmp) {
++// Load Pointer
++instruct zLoadP(iRegPNoSp dst, memory mem)
++%{
++  match(Set dst (LoadP mem));
++  predicate(UseZGC && (n->as_Load()->barrier_data() != 0));
++  effect(TEMP dst);
 +
-+  if (last_java_pc->is_valid()) {
-+      sd(last_java_pc, Address(xthread,
-+                               JavaThread::frame_anchor_offset() +
-+                               JavaFrameAnchor::last_Java_pc_offset()));
-+  }
++  ins_cost(4 * DEFAULT_COST);
 +
-+  // determine last_java_sp register
-+  if (last_java_sp == sp) {
-+    mv(tmp, sp);
-+    last_java_sp = tmp;
-+  } else if (!last_java_sp->is_valid()) {
-+    last_java_sp = esp;
-+  }
++  format %{ "ld  $dst, $mem, #@zLoadP" %}
 +
-+  sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset()));
++  ins_encode %{
++    const Address ref_addr (as_Register($mem$$base), $mem$$disp);
++    __ ld($dst$$Register, ref_addr);
++    z_load_barrier(_masm, this, ref_addr, $dst$$Register, t0 /* tmp */, barrier_data());
++  %}
 +
-+  // last_java_fp is optional
-+  if (last_java_fp->is_valid()) {
-+    sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset()));
-+  }
-+}
++  ins_pipe(iload_reg_mem);
++%}
 +
-+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
-+                                         Register last_java_fp,
-+                                         address  last_java_pc,
-+                                         Register tmp) {
-+  assert(last_java_pc != NULL, "must provide a valid PC");
++instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
++  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
++  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
++  effect(KILL cr, TEMP_DEF res);
 +
-+  la(tmp, last_java_pc);
-+  sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  ins_cost(2 * VOLATILE_REF_COST);
 +
-+  set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp);
-+}
++  format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapP\n\t"
++            "mv $res, $res == $oldval" %}
 +
-+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
-+                                         Register last_java_fp,
-+                                         Label &L,
-+                                         Register tmp) {
-+  if (L.is_bound()) {
-+    set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
-+  } else {
-+    L.add_patch_at(code(), locator());
-+    set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
-+  }
-+}
++  ins_encode %{
++    Label failed;
++    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
++    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
++               Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
++               true /* result_as_bool */);
++    __ beqz($res$$Register, failed);
++    __ mv(t0, $oldval$$Register);
++    __ bind(failed);
++    if (barrier_data() != ZLoadBarrierElided) {
++      Label good;
++      __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */);
++      __ andr(t1, t1, t0);
++      __ beqz(t1, good);
++      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */);
++      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
++                 Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
++                 true /* result_as_bool */);
++      __ bind(good);
++    }
++  %}
 +
-+void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
-+  // we must set sp to zero to clear frame
-+  sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
++  ins_pipe(pipe_slow);
++%}
 +
-+  // must clear fp, so that compiled frames are not confused; it is
-+  // possible that we need it only for debugging
-+  if (clear_fp) {
-+    sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
-+  }
++instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
++  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
++  predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
++  effect(KILL cr, TEMP_DEF res);
 +
-+  // Always clear the pc because it could have been set by make_walkable()
-+  sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
-+}
++  ins_cost(2 * VOLATILE_REF_COST);
 +
-+void MacroAssembler::call_VM_base(Register oop_result,
-+                                  Register java_thread,
-+                                  Register last_java_sp,
-+                                  address  entry_point,
-+                                  int      number_of_arguments,
-+                                  bool     check_exceptions) {
-+   // determine java_thread register
-+  if (!java_thread->is_valid()) {
-+    java_thread = xthread;
-+  }
-+  // determine last_java_sp register
-+  if (!last_java_sp->is_valid()) {
-+    last_java_sp = esp;
-+  }
++  format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapPAcq\n\t"
++            "mv $res, $res == $oldval" %}
 +
-+  // debugging support
-+  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
-+  assert(java_thread == xthread, "unexpected register");
++  ins_encode %{
++    Label failed;
++    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
++    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
++               Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
++               true /* result_as_bool */);
++    __ beqz($res$$Register, failed);
++    __ mv(t0, $oldval$$Register);
++    __ bind(failed);
++    if (barrier_data() != ZLoadBarrierElided) {
++      Label good;
++      __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */);
++      __ andr(t1, t1, t0);
++      __ beqz(t1, good);
++      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */);
++      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
++                 Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
++                 true /* result_as_bool */);
++      __ bind(good);
++    }
++  %}
 +
-+  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
-+  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
++  ins_pipe(pipe_slow);
++%}
 +
-+  // push java thread (becomes first argument of C function)
-+  mv(c_rarg0, java_thread);
++instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{
++  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
++  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
++  effect(TEMP_DEF res);
 +
-+  // set last Java frame before call
-+  assert(last_java_sp != fp, "can't use fp");
++  ins_cost(2 * VOLATILE_REF_COST);
 +
-+  Label l;
-+  set_last_Java_frame(last_java_sp, fp, l, t0);
++  format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangeP" %}
 +
-+  // do the call, remove parameters
-+  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
++  ins_encode %{
++    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
++    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
++               Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
++    if (barrier_data() != ZLoadBarrierElided) {
++      Label good;
++      __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
++      __ andr(t0, t0, $res$$Register);
++      __ beqz(t0, good);
++      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */);
++      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
++                 Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
++      __ bind(good);
++    }
++  %}
 +
-+  // reset last Java frame
-+  // Only interpreter should have to clear fp
-+  reset_last_Java_frame(true);
++  ins_pipe(pipe_slow);
++%}
 +
-+   // C++ interp handles this in the interpreter
-+  check_and_handle_popframe(java_thread);
-+  check_and_handle_earlyret(java_thread);
++instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{
++  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
++  predicate(UseZGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
++  effect(TEMP_DEF res);
 +
-+  if (check_exceptions) {
-+    // check for pending exceptions (java_thread is set upon return)
-+    ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
-+    Label ok;
-+    beqz(t0, ok);
-+    int32_t offset = 0;
-+    la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset);
-+    jalr(x0, t0, offset);
-+    bind(ok);
-+  }
++  ins_cost(2 * VOLATILE_REF_COST);
 +
-+  // get oop result if there is one and reset the value in the thread
-+  if (oop_result->is_valid()) {
-+    get_vm_result(oop_result, java_thread);
-+  }
-+}
++  format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangePAcq" %}
 +
-+void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
-+  ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
-+  sd(zr, Address(java_thread, JavaThread::vm_result_offset()));
-+  verify_oop(oop_result, "broken oop in call_VM_base");
-+}
++  ins_encode %{
++    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
++    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
++               Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
++    if (barrier_data() != ZLoadBarrierElided) {
++      Label good;
++      __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
++      __ andr(t0, t0, $res$$Register);
++      __ beqz(t0, good);
++      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */);
++      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
++                 Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
++      __ bind(good);
++    }
++  %}
 +
-+void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
-+  ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
-+  sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
-+}
++  ins_pipe(pipe_slow);
++%}
 +
++instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
++  match(Set prev (GetAndSetP mem newv));
++  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
++  effect(TEMP_DEF prev, KILL cr);
 +
-+void MacroAssembler::verify_oop(Register reg, const char* s) {
-+  if (!VerifyOops) { return; }
++  ins_cost(2 * VOLATILE_REF_COST);
 +
-+  // Pass register number to verify_oop_subroutine
-+  const char* b = NULL;
-+  {
-+    ResourceMark rm;
-+    stringStream ss;
-+    ss.print("verify_oop: %s: %s", reg->name(), s);
-+    b = code_string(ss.as_string());
-+  }
-+  BLOCK_COMMENT("verify_oop {");
++  format %{ "atomic_xchg  $prev, $newv, [$mem], #@zGetAndSetP" %}
 +
-+  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++  ins_encode %{
++    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
++    z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data());
++  %}
 +
-+  mv(c_rarg0, reg); // c_rarg0 : x10
-+  if(b != NULL) {
-+    movptr(t0, (uintptr_t)(address)b);
-+  } else {
-+    ShouldNotReachHere();
-+  }
++  ins_pipe(pipe_serial);
++%}
 +
-+  // call indirectly to solve generation ordering problem
-+  int32_t offset = 0;
-+  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
-+  ld(t1, Address(t1, offset));
-+  jalr(t1);
++instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
++  match(Set prev (GetAndSetP mem newv));
++  predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0));
++  effect(TEMP_DEF prev, KILL cr);
 +
-+  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++  ins_cost(VOLATILE_REF_COST);
 +
-+  BLOCK_COMMENT("} verify_oop");
-+}
++  format %{ "atomic_xchg_acq  $prev, $newv, [$mem], #@zGetAndSetPAcq" %}
 +
-+void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
-+  if (!VerifyOops) {
-+    return;
-+  }
++  ins_encode %{
++    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
++    z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data());
++  %}
++  ins_pipe(pipe_serial);
++%}
+diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
+new file mode 100644
+index 00000000000..2936837d951
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  const char* b = NULL;
-+  {
-+    ResourceMark rm;
-+    stringStream ss;
-+    ss.print("verify_oop_addr: %s", s);
-+    b = code_string(ss.as_string());
-+  }
-+  BLOCK_COMMENT("verify_oop_addr {");
++#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
++#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
 +
-+  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++const int StackAlignmentInBytes = 16;
 +
-+  if (addr.uses(sp)) {
-+    la(x10, addr);
-+    ld(x10, Address(x10, 4 * wordSize));
-+  } else {
-+    ld(x10, addr);
-+  }
-+  if(b != NULL) {
-+    movptr(t0, (uintptr_t)(address)b);
-+  } else {
-+    ShouldNotReachHere();
-+  }
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are extended to 64 bits.
++const bool CCallingConventionRequiresIntsAsLongs = false;
 +
-+  // call indirectly to solve generation ordering problem
-+  int32_t offset = 0;
-+  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
-+  ld(t1, Address(t1, offset));
-+  jalr(t1);
++// RISCV has adopted a multicopy atomic model closely following
++// that of ARMv8.
++#define CPU_MULTI_COPY_ATOMIC
 +
-+  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++// To be safe, we deoptimize when we come across an access that needs
++// patching. This is similar to what is done on aarch64.
++#define DEOPTIMIZE_WHEN_PATCHING
 +
-+  BLOCK_COMMENT("} verify_oop_addr");
-+}
++#define SUPPORTS_NATIVE_CX8
 +
-+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
-+                                         int extra_slot_offset) {
-+  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
-+  int stackElementSize = Interpreter::stackElementSize;
-+  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
-+#ifdef ASSERT
-+  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
-+  assert(offset1 - offset == stackElementSize, "correct arithmetic");
-+#endif
-+  if (arg_slot.is_constant()) {
-+    return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
-+  } else {
-+    assert_different_registers(t0, arg_slot.as_register());
-+    shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize));
-+    return Address(t0, offset);
-+  }
-+}
++#define SUPPORT_RESERVED_STACK_AREA
 +
-+#ifndef PRODUCT
-+extern "C" void findpc(intptr_t x);
-+#endif
++#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false
 +
-+void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
-+{
-+  // In order to get locks to work, we need to fake a in_VM state
-+  if (ShowMessageBoxOnError) {
-+    JavaThread* thread = JavaThread::current();
-+    JavaThreadState saved_state = thread->thread_state();
-+    thread->set_thread_state(_thread_in_vm);
-+#ifndef PRODUCT
-+    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
-+      ttyLocker ttyl;
-+      BytecodeCounter::print();
-+    }
-+#endif
-+    if (os::message_box(msg, "Execution stopped, print registers?")) {
-+      ttyLocker ttyl;
-+      tty->print_cr(" pc = 0x%016" PRIX64, pc);
-+#ifndef PRODUCT
-+      tty->cr();
-+      findpc(pc);
-+      tty->cr();
-+#endif
-+      tty->print_cr(" x0 = 0x%016" PRIx64, regs[0]);
-+      tty->print_cr(" x1 = 0x%016" PRIx64, regs[1]);
-+      tty->print_cr(" x2 = 0x%016" PRIx64, regs[2]);
-+      tty->print_cr(" x3 = 0x%016" PRIx64, regs[3]);
-+      tty->print_cr(" x4 = 0x%016" PRIx64, regs[4]);
-+      tty->print_cr(" x5 = 0x%016" PRIx64, regs[5]);
-+      tty->print_cr(" x6 = 0x%016" PRIx64, regs[6]);
-+      tty->print_cr(" x7 = 0x%016" PRIx64, regs[7]);
-+      tty->print_cr(" x8 = 0x%016" PRIx64, regs[8]);
-+      tty->print_cr(" x9 = 0x%016" PRIx64, regs[9]);
-+      tty->print_cr("x10 = 0x%016" PRIx64, regs[10]);
-+      tty->print_cr("x11 = 0x%016" PRIx64, regs[11]);
-+      tty->print_cr("x12 = 0x%016" PRIx64, regs[12]);
-+      tty->print_cr("x13 = 0x%016" PRIx64, regs[13]);
-+      tty->print_cr("x14 = 0x%016" PRIx64, regs[14]);
-+      tty->print_cr("x15 = 0x%016" PRIx64, regs[15]);
-+      tty->print_cr("x16 = 0x%016" PRIx64, regs[16]);
-+      tty->print_cr("x17 = 0x%016" PRIx64, regs[17]);
-+      tty->print_cr("x18 = 0x%016" PRIx64, regs[18]);
-+      tty->print_cr("x19 = 0x%016" PRIx64, regs[19]);
-+      tty->print_cr("x20 = 0x%016" PRIx64, regs[20]);
-+      tty->print_cr("x21 = 0x%016" PRIx64, regs[21]);
-+      tty->print_cr("x22 = 0x%016" PRIx64, regs[22]);
-+      tty->print_cr("x23 = 0x%016" PRIx64, regs[23]);
-+      tty->print_cr("x24 = 0x%016" PRIx64, regs[24]);
-+      tty->print_cr("x25 = 0x%016" PRIx64, regs[25]);
-+      tty->print_cr("x26 = 0x%016" PRIx64, regs[26]);
-+      tty->print_cr("x27 = 0x%016" PRIx64, regs[27]);
-+      tty->print_cr("x28 = 0x%016" PRIx64, regs[28]);
-+      tty->print_cr("x30 = 0x%016" PRIx64, regs[30]);
-+      tty->print_cr("x31 = 0x%016" PRIx64, regs[31]);
-+      BREAKPOINT;
-+    }
-+    ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
-+  } else {
-+    ttyLocker ttyl;
-+    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
-+    assert(false, "DEBUG MESSAGE: %s", msg);
-+  }
-+}
++#define USE_POINTERS_TO_REGISTER_IMPL_ARRAY
 +
-+void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) {
-+  Label done, not_weak;
-+  beqz(value, done);           // Use NULL as-is.
++#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
+new file mode 100644
+index 00000000000..cbfc0583883
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
+@@ -0,0 +1,99 @@
++/*
++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // Test for jweak tag.
-+  andi(t0, value, JNIHandles::weak_tag_mask);
-+  beqz(t0, not_weak);
++#ifndef CPU_RISCV_GLOBALS_RISCV_HPP
++#define CPU_RISCV_GLOBALS_RISCV_HPP
 +
-+  // Resolve jweak.
-+  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
-+                 Address(value, -JNIHandles::weak_tag_value), tmp, thread);
-+  verify_oop(value);
-+  j(done);
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
 +
-+  bind(not_weak);
-+  // Resolve (untagged) jobject.
-+  access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
-+  verify_oop(value);
-+  bind(done);
-+}
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
 +
-+void MacroAssembler::stop(const char* msg) {
-+  address ip = pc();
-+  push_reg(RegSet::range(x0, x31), sp);
-+  if(msg != NULL && ip != NULL) {
-+    mv(c_rarg0, (uintptr_t)(address)msg);
-+    mv(c_rarg1, (uintptr_t)(address)ip);
-+  } else {
-+    ShouldNotReachHere();
-+  }
-+  mv(c_rarg2, sp);
-+  mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
-+  jalr(c_rarg3);
-+  ebreak();
-+}
++define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
++define_pd_global(bool, TrapBasedNullChecks,      false);
++define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
 +
-+void MacroAssembler::unimplemented(const char* what) {
-+  const char* buf = NULL;
-+  {
-+    ResourceMark rm;
-+    stringStream ss;
-+    ss.print("unimplemented: %s", what);
-+    buf = code_string(ss.as_string());
-+  }
-+  stop(buf);
-+}
++define_pd_global(uintx, CodeCacheSegmentSize,    64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment.
++define_pd_global(intx, CodeEntryAlignment,       64);
++define_pd_global(intx, OptoLoopAlignment,        16);
 +
-+void MacroAssembler::emit_static_call_stub() {
-+  // CompiledDirectStaticCall::set_to_interpreted knows the
-+  // exact layout of this stub.
++#define DEFAULT_STACK_YELLOW_PAGES (2)
++#define DEFAULT_STACK_RED_PAGES (1)
++// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the
++// stack if compiled for unix and LP64. To pass stack overflow tests we need
++// 20 shadow pages.
++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5))
++#define DEFAULT_STACK_RESERVED_PAGES (1)
 +
-+  mov_metadata(xmethod, (Metadata*)NULL);
++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
++#define MIN_STACK_RED_PAGES    DEFAULT_STACK_RED_PAGES
++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
++#define MIN_STACK_RESERVED_PAGES (0)
 +
-+  // Jump to the entry point of the i2c stub.
-+  int32_t offset = 0;
-+  movptr_with_offset(t0, 0, offset);
-+  jalr(x0, t0, offset);
-+}
++define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES);
++define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES);
++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
 +
-+void MacroAssembler::call_VM_leaf_base(address entry_point,
-+                                       int number_of_arguments,
-+                                       Label *retaddr) {
-+  int32_t offset = 0;
-+  push_reg(RegSet::of(t0, xmethod), sp);   // push << t0 & xmethod >> to sp
-+  movptr_with_offset(t0, entry_point, offset);
-+  jalr(x1, t0, offset);
-+  if (retaddr != NULL) {
-+    bind(*retaddr);
-+  }
-+  pop_reg(RegSet::of(t0, xmethod), sp);   // pop << t0 & xmethod >> from sp
-+}
++define_pd_global(bool, RewriteBytecodes,     true);
++define_pd_global(bool, RewriteFrequentPairs, true);
 +
-+void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
-+  call_VM_leaf_base(entry_point, number_of_arguments);
-+}
++define_pd_global(bool, PreserveFramePointer, false);
 +
-+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
-+  pass_arg0(this, arg_0);
-+  call_VM_leaf_base(entry_point, 1);
-+}
++define_pd_global(uintx, TypeProfileLevel, 111);
 +
-+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
-+  pass_arg0(this, arg_0);
-+  pass_arg1(this, arg_1);
-+  call_VM_leaf_base(entry_point, 2);
-+}
++define_pd_global(bool, CompactStrings, true);
 +
-+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
-+                                  Register arg_1, Register arg_2) {
-+  pass_arg0(this, arg_0);
-+  pass_arg1(this, arg_1);
-+  pass_arg2(this, arg_2);
-+  call_VM_leaf_base(entry_point, 3);
-+}
++// Clear short arrays bigger than one word in an arch-specific way
++define_pd_global(intx, InitArrayShortSize, BytesPerLong);
 +
-+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
-+  pass_arg0(this, arg_0);
-+  MacroAssembler::call_VM_leaf_base(entry_point, 1);
-+}
++define_pd_global(intx, InlineSmallCode,          1000);
 +
-+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++#define ARCH_FLAGS(develop,                                                      \
++                   product,                                                      \
++                   notproduct,                                                   \
++                   range,                                                        \
++                   constraint)                                                   \
++                                                                                 \
++  product(bool, NearCpool, true,                                                 \
++         "constant pool is close to instructions")                               \
++  product(intx, BlockZeroingLowLimit, 256,                                       \
++          "Minimum size in bytes when block zeroing will be used")               \
++          range(1, max_jint)                                                     \
++  product(bool, TraceTraps, false, "Trace all traps the signal handler")         \
++  /* For now we're going to be safe and add the I/O bits to userspace fences. */ \
++  product(bool, UseConservativeFence, true,                                      \
++          "Extend i for r and o for w in the pred/succ flags of fence;"          \
++          "Extend fence.i to fence.i + fence.")                                  \
++  product(bool, AvoidUnalignedAccesses, true,                                    \
++          "Avoid generating unaligned memory accesses")                          \
++  product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions")             \
++  product(bool, UseRVB, false, EXPERIMENTAL, "Use RVB instructions")             \
++  product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions")             \
++  product(bool, UseRVVForBigIntegerShiftIntrinsics, true,                        \
++          "Use RVV instructions for left/right shift of BigInteger")
 +
-+  assert(arg_0 != c_rarg1, "smashed arg");
-+  pass_arg1(this, arg_1);
-+  pass_arg0(this, arg_0);
-+  MacroAssembler::call_VM_leaf_base(entry_point, 2);
-+}
++#endif // CPU_RISCV_GLOBALS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
+new file mode 100644
+index 00000000000..cc93103dc55
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
+@@ -0,0 +1,79 @@
++/*
++ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
-+  assert(arg_0 != c_rarg2, "smashed arg");
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
-+  assert(arg_0 != c_rarg1, "smashed arg");
-+  pass_arg1(this, arg_1);
-+  pass_arg0(this, arg_0);
-+  MacroAssembler::call_VM_leaf_base(entry_point, 3);
-+}
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/icBuffer.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/bytecodes.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/oop.inline.hpp"
 +
-+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
-+  assert(arg_0 != c_rarg3, "smashed arg");
-+  assert(arg_1 != c_rarg3, "smashed arg");
-+  assert(arg_2 != c_rarg3, "smashed arg");
-+  pass_arg3(this, arg_3);
-+  assert(arg_0 != c_rarg2, "smashed arg");
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
-+  assert(arg_0 != c_rarg1, "smashed arg");
-+  pass_arg1(this, arg_1);
-+  pass_arg0(this, arg_0);
-+  MacroAssembler::call_VM_leaf_base(entry_point, 4);
++int InlineCacheBuffer::ic_stub_code_size() {
++  // 6: auipc + ld + auipc + jalr + address(2 * instruction_size)
++  // 5: auipc + ld + j + address(2 * instruction_size)
++  return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size;
 +}
 +
-+void MacroAssembler::nop() {
-+  addi(x0, x0, 0);
-+}
++#define __ masm->
 +
-+void MacroAssembler::mv(Register Rd, Register Rs) {
-+  if (Rd != Rs) {
-+    addi(Rd, Rs, 0);
-+  }
-+}
++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
++  assert_cond(code_begin != NULL && entry_point != NULL);
++  ResourceMark rm;
++  CodeBuffer      code(code_begin, ic_stub_code_size());
++  MacroAssembler* masm            = new MacroAssembler(&code);
++  // Note: even though the code contains an embedded value, we do not need reloc info
++  // because
++  // (1) the value is old (i.e., doesn't matter for scavenges)
++  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
 +
-+void MacroAssembler::notr(Register Rd, Register Rs) {
-+  xori(Rd, Rs, -1);
++  address start = __ pc();
++  Label l;
++  __ ld(t1, l);
++  __ far_jump(ExternalAddress(entry_point));
++  __ align(wordSize);
++  __ bind(l);
++  __ emit_int64((intptr_t)cached_value);
++  // Only need to invalidate the 1st two instructions - not the whole ic stub
++  ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());
++  assert(__ pc() - start == ic_stub_code_size(), "must be");
 +}
 +
-+void MacroAssembler::neg(Register Rd, Register Rs) {
-+  sub(Rd, x0, Rs);
++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
++  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
++  NativeJump* jump = nativeJump_at(move->next_instruction_address());
++  return jump->jump_destination();
 +}
 +
-+void MacroAssembler::negw(Register Rd, Register Rs) {
-+  subw(Rd, x0, Rs);
-+}
 +
-+void MacroAssembler::sext_w(Register Rd, Register Rs) {
-+  addiw(Rd, Rs, 0);
++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
++  // The word containing the cached value is at the end of this IC buffer
++  uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize);
++  void* o = (void*)*p;
++  return o;
 +}
+diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp
+new file mode 100644
+index 00000000000..922a80f9f3e
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/icache_riscv.cpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+void MacroAssembler::zext_b(Register Rd, Register Rs) {
-+  andi(Rd, Rs, 0xFF);
-+}
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
 +
-+void MacroAssembler::seqz(Register Rd, Register Rs) {
-+  sltiu(Rd, Rs, 1);
-+}
++#define __ _masm->
 +
-+void MacroAssembler::snez(Register Rd, Register Rs) {
-+  sltu(Rd, x0, Rs);
++static int icache_flush(address addr, int lines, int magic) {
++  os::icache_flush((long int) addr, (long int) (addr + (lines << ICache::log2_line_size)));
++  return magic;
 +}
 +
-+void MacroAssembler::sltz(Register Rd, Register Rs) {
-+  slt(Rd, Rs, x0);
-+}
++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
++  address start = (address)icache_flush;
++  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
 +
-+void MacroAssembler::sgtz(Register Rd, Register Rs) {
-+  slt(Rd, x0, Rs);
-+}
++  // ICache::invalidate_range() contains explicit condition that the first
++  // call is invoked on the generated icache flush stub code range.
++  ICache::invalidate_range(start, 0);
 +
-+void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) {
-+  if (Rd != Rs) {
-+    fsgnj_s(Rd, Rs, Rs);
++  {
++    StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush");
++    __ ret();
 +  }
 +}
 +
-+void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) {
-+  fsgnjx_s(Rd, Rs, Rs);
-+}
++#undef __
+diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp
+new file mode 100644
+index 00000000000..5bf40ca8204
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/icache_riscv.hpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) {
-+  fsgnjn_s(Rd, Rs, Rs);
-+}
++#ifndef CPU_RISCV_ICACHE_RISCV_HPP
++#define CPU_RISCV_ICACHE_RISCV_HPP
 +
-+void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) {
-+  if (Rd != Rs) {
-+    fsgnj_d(Rd, Rs, Rs);
-+  }
-+}
++// Interface for updating the instruction cache. Whenever the VM
++// modifies code, part of the processor instruction cache potentially
++// has to be flushed.
 +
-+void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) {
-+  fsgnjx_d(Rd, Rs, Rs);
-+}
++class ICache : public AbstractICache {
++public:
++  enum {
++    stub_size      = 16,                // Size of the icache flush stub in bytes
++    line_size      = BytesPerWord,      // conservative
++    log2_line_size = LogBytesPerWord    // log2(line_size)
++  };
++};
 +
-+void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) {
-+  fsgnjn_d(Rd, Rs, Rs);
-+}
++#endif // CPU_RISCV_ICACHE_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+new file mode 100644
+index 00000000000..d12dcb2af19
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+@@ -0,0 +1,1940 @@
++/*
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) {
-+  vmnand_mm(vd, vs, vs);
-+}
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interp_masm_riscv.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "logging/log.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/markWord.hpp"
++#include "oops/method.hpp"
++#include "oops/methodData.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.inline.hpp"
++#include "utilities/powerOfTwo.hpp"
 +
-+void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
-+  vnsrl_wx(vd, vs, x0, vm);
++void InterpreterMacroAssembler::narrow(Register result) {
++  // Get method->_constMethod->_result_type
++  ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize));
++  ld(t0, Address(t0, Method::const_offset()));
++  lbu(t0, Address(t0, ConstMethod::result_type_offset()));
++
++  Label done, notBool, notByte, notChar;
++
++  // common case first
++  mv(t1, T_INT);
++  beq(t0, t1, done);
++
++  // mask integer result to narrower return type.
++  mv(t1, T_BOOLEAN);
++  bne(t0, t1, notBool);
++
++  andi(result, result, 0x1);
++  j(done);
++
++  bind(notBool);
++  mv(t1, T_BYTE);
++  bne(t0, t1, notByte);
++  sign_extend(result, result, 8);
++  j(done);
++
++  bind(notByte);
++  mv(t1, T_CHAR);
++  bne(t0, t1, notChar);
++  zero_extend(result, result, 16);
++  j(done);
++
++  bind(notChar);
++  sign_extend(result, result, 16);
++
++  // Nothing to do for T_INT
++  bind(done);
++  addw(result, result, zr);
 +}
 +
-+void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
-+  vfsgnjn_vv(vd, vs, vs);
++void InterpreterMacroAssembler::jump_to_entry(address entry) {
++  assert(entry != NULL, "Entry must have been generated by now");
++  j(entry);
 +}
 +
-+void MacroAssembler::la(Register Rd, const address &dest) {
-+  int64_t offset = dest - pc();
-+  if (is_offset_in_range(offset, 32)) {
-+    auipc(Rd, (int32_t)offset + 0x800);  //0x800, Note:the 11th sign bit
-+    addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
-+  } else {
-+    movptr(Rd, dest);
++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
++  if (JvmtiExport::can_pop_frame()) {
++    Label L;
++    // Initiate popframe handling only if it is not already being
++    // processed. If the flag has the popframe_processing bit set,
++    // it means that this code is called *during* popframe handling - we
++    // don't want to reenter.
++    // This method is only called just after the call into the vm in
++    // call_VM_base, so the arg registers are available.
++    lwu(t1, Address(xthread, JavaThread::popframe_condition_offset()));
++    andi(t0, t1, JavaThread::popframe_pending_bit);
++    beqz(t0, L);
++    andi(t0, t1, JavaThread::popframe_processing_bit);
++    bnez(t0, L);
++    // Call Interpreter::remove_activation_preserving_args_entry() to get the
++    // address of the same-named entrypoint in the generated interpreter code.
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
++    jr(x10);
++    bind(L);
 +  }
 +}
 +
-+void MacroAssembler::la(Register Rd, const Address &adr) {
-+  code_section()->relocate(pc(), adr.rspec());
-+  relocInfo::relocType rtype = adr.rspec().reloc()->type();
 +
-+  switch(adr.getMode()) {
-+    case Address::literal: {
-+      if (rtype == relocInfo::none) {
-+        mv(Rd, (intptr_t)(adr.target()));
-+      } else {
-+        movptr(Rd, adr.target());
-+      }
++void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
++  ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset()));
++  const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset());
++  const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset());
++  const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset());
++  switch (state) {
++    case atos:
++      ld(x10, oop_addr);
++      sd(zr, oop_addr);
++      verify_oop(x10);
 +      break;
-+    }
-+    case Address::base_plus_offset:{
-+      Register base = adr.base();
-+      int64_t offset = adr.offset();
-+      if (offset == 0 && Rd != base) {
-+        mv(Rd, base);
-+      } else if (offset != 0 && Rd != base) {
-+        add(Rd, base, offset, Rd);
-+      } else if (offset != 0 && Rd == base) {
-+        Register tmp = (Rd == t0) ? t1 : t0;
-+        add(base, base, offset, tmp);
-+      }
++    case ltos:
++      ld(x10, val_addr);
++      break;
++    case btos:  // fall through
++    case ztos:  // fall through
++    case ctos:  // fall through
++    case stos:  // fall through
++    case itos:
++      lwu(x10, val_addr);
++      break;
++    case ftos:
++      flw(f10, val_addr);
++      break;
++    case dtos:
++      fld(f10, val_addr);
++      break;
++    case vtos:
++      /* nothing to do */
 +      break;
-+    }
 +    default:
 +      ShouldNotReachHere();
 +  }
++  // Clean up tos value in the thread object
++  mvw(t0, (int) ilgl);
++  sw(t0, tos_addr);
++  sw(zr, val_addr);
 +}
 +
-+void MacroAssembler::la(Register Rd, Label &label) {
-+  la(Rd, target(label));
-+}
-+
-+#define INSN(NAME)                                                                \
-+  void MacroAssembler::NAME##z(Register Rs, const address &dest) {                \
-+    NAME(Rs, zr, dest);                                                           \
-+  }                                                                               \
-+  void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) {              \
-+    NAME(Rs, zr, l, is_far);                                                      \
-+  }                                                                               \
-+
-+  INSN(beq);
-+  INSN(bne);
-+  INSN(blt);
-+  INSN(ble);
-+  INSN(bge);
-+  INSN(bgt);
-+
-+#undef INSN
-+
-+// Float compare branch instructions
-+
-+#define INSN(NAME, FLOATCMP, BRANCH)                                                                                   \
-+  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) {  \
-+    FLOATCMP##_s(t0, Rs1, Rs2);                                                                                        \
-+    BRANCH(t0, l, is_far);                                                                                             \
-+  }                                                                                                                    \
-+  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
-+    FLOATCMP##_d(t0, Rs1, Rs2);                                                                                        \
-+    BRANCH(t0, l, is_far);                                                                                             \
-+  }
 +
-+  INSN(beq, feq, bnez);
-+  INSN(bne, feq, beqz);
-+#undef INSN
++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
++  if (JvmtiExport::can_force_early_return()) {
++    Label L;
++    ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
++    beqz(t0, L);  // if [thread->jvmti_thread_state() == NULL] then exit
 +
++    // Initiate earlyret handling only if it is not already being processed.
++    // If the flag has the earlyret_processing bit set, it means that this code
++    // is called *during* earlyret handling - we don't want to reenter.
++    lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset()));
++    mv(t1, JvmtiThreadState::earlyret_pending);
++    bne(t0, t1, L);
 +
-+#define INSN(NAME, FLOATCMP1, FLOATCMP2)                                              \
-+  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,   \
-+                                    bool is_far, bool is_unordered) {                 \
-+    if(is_unordered) {                                                                \
-+      FLOATCMP2##_s(t0, Rs2, Rs1);                                                    \
-+      beqz(t0, l, is_far);                                                            \
-+    } else {                                                                          \
-+      FLOATCMP1##_s(t0, Rs1, Rs2);                                                    \
-+      bnez(t0, l, is_far);                                                            \
-+    }                                                                                 \
-+  }                                                                                   \
-+  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
-+                                     bool is_far, bool is_unordered) {                \
-+    if(is_unordered) {                                                                \
-+      FLOATCMP2##_d(t0, Rs2, Rs1);                                                    \
-+      beqz(t0, l, is_far);                                                            \
-+    } else {                                                                          \
-+      FLOATCMP1##_d(t0, Rs1, Rs2);                                                    \
-+      bnez(t0, l, is_far);                                                            \
-+    }                                                                                 \
++    // Call Interpreter::remove_activation_early_entry() to get the address of the
++    // same-named entrypoint in the generated interpreter code.
++    ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
++    lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset()));
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0);
++    jr(x10);
++    bind(L);
 +  }
++}
 +
-+  INSN(ble, fle, flt);
-+  INSN(blt, flt, fle);
++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) {
++  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
++  lhu(reg, Address(xbcp, bcp_offset));
++  revb_h(reg, reg);
++}
 +
-+#undef INSN
++void InterpreterMacroAssembler::get_dispatch() {
++  int32_t offset = 0;
++  la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset);
++  addi(xdispatch, xdispatch, offset);
++}
 +
-+#define INSN(NAME, CMP)                                                              \
-+  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
-+                                    bool is_far, bool is_unordered) {                \
-+    float_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                  \
-+  }                                                                                  \
-+  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
-+                                     bool is_far, bool is_unordered) {               \
-+    double_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                 \
++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
++                                                       int bcp_offset,
++                                                       size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  if (index_size == sizeof(u2)) {
++    load_unsigned_short(index, Address(xbcp, bcp_offset));
++  } else if (index_size == sizeof(u4)) {
++    lwu(index, Address(xbcp, bcp_offset));
++    // Check if the secondary index definition is still ~x, otherwise
++    // we have to change the following assembler code to calculate the
++    // plain index.
++    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
++    xori(index, index, -1);
++    addw(index, index, zr);
++  } else if (index_size == sizeof(u1)) {
++    load_unsigned_byte(index, Address(xbcp, bcp_offset));
++  } else {
++    ShouldNotReachHere();
 +  }
++}
 +
-+  INSN(bgt, blt);
-+  INSN(bge, ble);
-+
-+#undef INSN
-+
-+
-+#define INSN(NAME, CSR)                       \
-+  void MacroAssembler::NAME(Register Rd) {    \
-+    csrr(Rd, CSR);                            \
-+  }
-+
-+  INSN(rdinstret,  CSR_INSTERT);
-+  INSN(rdcycle,    CSR_CYCLE);
-+  INSN(rdtime,     CSR_TIME);
-+  INSN(frcsr,      CSR_FCSR);
-+  INSN(frrm,       CSR_FRM);
-+  INSN(frflags,    CSR_FFLAGS);
++// Return
++// Rindex: index into constant pool
++// Rcache: address of cache entry - ConstantPoolCache::base_offset()
++//
++// A caller must add ConstantPoolCache::base_offset() to Rcache to get
++// the true address of the cache entry.
++//
++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
++                                                           Register index,
++                                                           int bcp_offset,
++                                                           size_t index_size) {
++  assert_different_registers(cache, index);
++  assert_different_registers(cache, xcpool);
++  get_cache_index_at_bcp(index, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // Convert from field index to ConstantPoolCacheEntry
++  // riscv already has the cache in xcpool so there is no need to
++  // install it in cache. Instead we pre-add the indexed offset to
++  // xcpool and return it in cache. All clients of this method need to
++  // be modified accordingly.
++  shadd(cache, index, xcpool, cache, 5);
++}
 +
-+#undef INSN
 +
-+void MacroAssembler::csrr(Register Rd, unsigned csr) {
-+  csrrs(Rd, csr, x0);
++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
++                                                                        Register index,
++                                                                        Register bytecode,
++                                                                        int byte_no,
++                                                                        int bcp_offset,
++                                                                        size_t index_size) {
++  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
++  // We use a 32-bit load here since the layout of 64-bit words on
++  // little-endian machines allow us that.
++  // n.b. unlike x86 cache already includes the index offset
++  la(bytecode, Address(cache,
++                       ConstantPoolCache::base_offset() +
++                       ConstantPoolCacheEntry::indices_offset()));
++  membar(MacroAssembler::AnyAny);
++  lwu(bytecode, bytecode);
++  membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++  const int shift_count = (1 + byte_no) * BitsPerByte;
++  slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte));
++  srli(bytecode, bytecode, XLEN - BitsPerByte);
 +}
 +
-+#define INSN(NAME, OPFUN)                                      \
-+  void MacroAssembler::NAME(unsigned csr, Register Rs) {       \
-+    OPFUN(x0, csr, Rs);                                        \
-+  }
-+
-+  INSN(csrw, csrrw);
-+  INSN(csrs, csrrs);
-+  INSN(csrc, csrrc);
++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
++                                                               Register tmp,
++                                                               int bcp_offset,
++                                                               size_t index_size) {
++  assert(cache != tmp, "must use different register");
++  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // Convert from field index to ConstantPoolCacheEntry index
++  // and from word offset to byte offset
++  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord,
++         "else change next line");
++  ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
++  // skip past the header
++  add(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
++  // construct pointer to cache entry
++  shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord);
++}
 +
-+#undef INSN
++// Load object from cpool->resolved_references(index)
++void InterpreterMacroAssembler::load_resolved_reference_at_index(
++                                Register result, Register index, Register tmp) {
++  assert_different_registers(result, index);
 +
-+#define INSN(NAME, OPFUN)                                      \
-+  void MacroAssembler::NAME(unsigned csr, unsigned imm) {      \
-+    OPFUN(x0, csr, imm);                                       \
-+  }
++  get_constant_pool(result);
++  // Load pointer for resolved_references[] objArray
++  ld(result, Address(result, ConstantPool::cache_offset_in_bytes()));
++  ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
++  resolve_oop_handle(result, tmp);
++  // Add in the index
++  addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
++  shadd(result, index, result, index, LogBytesPerHeapOop);
++  load_heap_oop(result, Address(result, 0));
++}
 +
-+  INSN(csrwi, csrrwi);
-+  INSN(csrsi, csrrsi);
-+  INSN(csrci, csrrci);
++void InterpreterMacroAssembler::load_resolved_klass_at_offset(
++                                Register cpool, Register index, Register klass, Register temp) {
++  shadd(temp, index, cpool, temp, LogBytesPerWord);
++  lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index
++  ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses
++  shadd(klass, temp, klass, temp, LogBytesPerWord);
++  ld(klass, Address(klass, Array<Klass*>::base_offset_in_bytes()));
++}
 +
-+#undef INSN
++void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no,
++                                                              Register method,
++                                                              Register cache) {
++  const int method_offset = in_bytes(
++    ConstantPoolCache::base_offset() +
++      ((byte_no == TemplateTable::f2_byte)
++       ? ConstantPoolCacheEntry::f2_offset()
++       : ConstantPoolCacheEntry::f1_offset()));
 +
-+#define INSN(NAME, CSR)                                      \
-+  void MacroAssembler::NAME(Register Rd, Register Rs) {      \
-+    csrrw(Rd, CSR, Rs);                                      \
-+  }
++  ld(method, Address(cache, method_offset)); // get f1 Method*
++}
 +
-+  INSN(fscsr,   CSR_FCSR);
-+  INSN(fsrm,    CSR_FRM);
-+  INSN(fsflags, CSR_FFLAGS);
++// Generate a subtype check: branch to ok_is_subtype if sub_klass is a
++// subtype of super_klass.
++//
++// Args:
++//      x10: superklass
++//      Rsub_klass: subklass
++//
++// Kills:
++//      x12, x15
++void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
++                                                  Label& ok_is_subtype) {
++  assert(Rsub_klass != x10, "x10 holds superklass");
++  assert(Rsub_klass != x12, "x12 holds 2ndary super array length");
++  assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr");
 +
-+#undef INSN
++  // Profile the not-null value's klass.
++  profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15
 +
-+#define INSN(NAME)                              \
-+  void MacroAssembler::NAME(Register Rs) {      \
-+    NAME(x0, Rs);                               \
-+  }
++  // Do the check.
++  check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12
 +
-+  INSN(fscsr);
-+  INSN(fsrm);
-+  INSN(fsflags);
++  // Profile the failure of the check.
++  profile_typecheck_failed(x12); // blows x12
++}
 +
-+#undef INSN
++// Java Expression Stack
 +
-+void MacroAssembler::fsrmi(Register Rd, unsigned imm) {
-+  guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register");
-+  csrrwi(Rd, CSR_FRM, imm);
++void InterpreterMacroAssembler::pop_ptr(Register r) {
++  ld(r, Address(esp, 0));
++  addi(esp, esp, wordSize);
 +}
 +
-+void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
-+   csrrwi(Rd, CSR_FFLAGS, imm);
++void InterpreterMacroAssembler::pop_i(Register r) {
++  lw(r, Address(esp, 0)); // lw do signed extended
++  addi(esp, esp, wordSize);
 +}
 +
-+#define INSN(NAME)                             \
-+  void MacroAssembler::NAME(unsigned imm) {    \
-+    NAME(x0, imm);                             \
-+  }
-+
-+  INSN(fsrmi);
-+  INSN(fsflagsi);
-+
-+#undef INSN
-+
-+#ifdef COMPILER2
++void InterpreterMacroAssembler::pop_l(Register r) {
++  ld(r, Address(esp, 0));
++  addi(esp, esp, 2 * Interpreter::stackElementSize);
++}
 +
-+typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
-+typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
-+                                                              bool is_far, bool is_unordered);
++void InterpreterMacroAssembler::push_ptr(Register r) {
++  addi(esp, esp, -wordSize);
++  sd(r, Address(esp, 0));
++}
 +
-+static conditional_branch_insn conditional_branches[] =
-+{
-+  /* SHORT branches */
-+  (conditional_branch_insn)&Assembler::beq,
-+  (conditional_branch_insn)&Assembler::bgt,
-+  NULL, // BoolTest::overflow
-+  (conditional_branch_insn)&Assembler::blt,
-+  (conditional_branch_insn)&Assembler::bne,
-+  (conditional_branch_insn)&Assembler::ble,
-+  NULL, // BoolTest::no_overflow
-+  (conditional_branch_insn)&Assembler::bge,
++void InterpreterMacroAssembler::push_i(Register r) {
++  addi(esp, esp, -wordSize);
++  addw(r, r, zr); // signed extended
++  sd(r, Address(esp, 0));
++}
 +
-+  /* UNSIGNED branches */
-+  (conditional_branch_insn)&Assembler::beq,
-+  (conditional_branch_insn)&Assembler::bgtu,
-+  NULL,
-+  (conditional_branch_insn)&Assembler::bltu,
-+  (conditional_branch_insn)&Assembler::bne,
-+  (conditional_branch_insn)&Assembler::bleu,
-+  NULL,
-+  (conditional_branch_insn)&Assembler::bgeu
-+};
++void InterpreterMacroAssembler::push_l(Register r) {
++  addi(esp, esp, -2 * wordSize);
++  sd(zr, Address(esp, wordSize));
++  sd(r, Address(esp));
++}
 +
-+static float_conditional_branch_insn float_conditional_branches[] =
-+{
-+  /* FLOAT SHORT branches */
-+  (float_conditional_branch_insn)&MacroAssembler::float_beq,
-+  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
-+  NULL,  // BoolTest::overflow
-+  (float_conditional_branch_insn)&MacroAssembler::float_blt,
-+  (float_conditional_branch_insn)&MacroAssembler::float_bne,
-+  (float_conditional_branch_insn)&MacroAssembler::float_ble,
-+  NULL, // BoolTest::no_overflow
-+  (float_conditional_branch_insn)&MacroAssembler::float_bge,
++void InterpreterMacroAssembler::pop_f(FloatRegister r) {
++  flw(r, esp, 0);
++  addi(esp, esp, wordSize);
++}
 +
-+  /* DOUBLE SHORT branches */
-+  (float_conditional_branch_insn)&MacroAssembler::double_beq,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
-+  NULL,
-+  (float_conditional_branch_insn)&MacroAssembler::double_blt,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bne,
-+  (float_conditional_branch_insn)&MacroAssembler::double_ble,
-+  NULL,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bge
-+};
++void InterpreterMacroAssembler::pop_d(FloatRegister r) {
++  fld(r, esp, 0);
++  addi(esp, esp, 2 * Interpreter::stackElementSize);
++}
 +
-+void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
-+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
-+         "invalid conditional branch index");
-+  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
++void InterpreterMacroAssembler::push_f(FloatRegister r) {
++  addi(esp, esp, -wordSize);
++  fsw(r, Address(esp, 0));
 +}
 +
-+// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
-+// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
-+void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
-+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
-+         "invalid float conditional branch index");
-+  int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask);
-+  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
-+   (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
++void InterpreterMacroAssembler::push_d(FloatRegister r) {
++  addi(esp, esp, -2 * wordSize);
++  fsd(r, Address(esp, 0));
 +}
 +
-+void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
-+  switch (cmpFlag) {
-+    case BoolTest::eq:
-+    case BoolTest::le:
-+      beqz(op1, L, is_far);
++void InterpreterMacroAssembler::pop(TosState state) {
++  switch (state) {
++    case atos:
++      pop_ptr();
++      verify_oop(x10);
 +      break;
-+    case BoolTest::ne:
-+    case BoolTest::gt:
-+      bnez(op1, L, is_far);
++    case btos:  // fall through
++    case ztos:  // fall through
++    case ctos:  // fall through
++    case stos:  // fall through
++    case itos:
++      pop_i();
++      break;
++    case ltos:
++      pop_l();
++      break;
++    case ftos:
++      pop_f();
++      break;
++    case dtos:
++      pop_d();
++      break;
++    case vtos:
++      /* nothing to do */
 +      break;
 +    default:
 +      ShouldNotReachHere();
 +  }
 +}
 +
-+void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
-+  switch (cmpFlag) {
-+    case BoolTest::eq:
-+      beqz(op1, L, is_far);
++void InterpreterMacroAssembler::push(TosState state) {
++  switch (state) {
++    case atos:
++      verify_oop(x10);
++      push_ptr();
 +      break;
-+    case BoolTest::ne:
-+      bnez(op1, L, is_far);
++    case btos:  // fall through
++    case ztos:  // fall through
++    case ctos:  // fall through
++    case stos:  // fall through
++    case itos:
++      push_i();
++      break;
++    case ltos:
++      push_l();
++      break;
++    case ftos:
++      push_f();
++      break;
++    case dtos:
++      push_d();
++      break;
++    case vtos:
++      /* nothing to do */
 +      break;
 +    default:
 +      ShouldNotReachHere();
 +  }
 +}
 +
-+void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
-+  Label L;
-+  cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
-+  mv(dst, src);
-+  bind(L);
++// Helpers for swap and dup
++void InterpreterMacroAssembler::load_ptr(int n, Register val) {
++  ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
 +}
-+#endif
 +
-+void MacroAssembler::push_reg(Register Rs)
-+{
-+  addi(esp, esp, 0 - wordSize);
-+  sd(Rs, Address(esp, 0));
++void InterpreterMacroAssembler::store_ptr(int n, Register val) {
++  sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
 +}
 +
-+void MacroAssembler::pop_reg(Register Rd)
-+{
-+  ld(Rd, esp, 0);
-+  addi(esp, esp, wordSize);
++void InterpreterMacroAssembler::load_float(Address src) {
++  flw(f10, src);
 +}
 +
-+int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
-+  int count = 0;
-+  // Scan bitset to accumulate register pairs
-+  for (int reg = 31; reg >= 0; reg --) {
-+    if ((1U << 31) & bitset) {
-+      regs[count++] = reg;
-+    }
-+    bitset <<= 1;
-+  }
-+  return count;
++void InterpreterMacroAssembler::load_double(Address src) {
++  fld(f10, src);
 +}
 +
-+// Push lots of registers in the bit set supplied.  Don't push sp.
-+// Return the number of words pushed
-+int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
-+  DEBUG_ONLY(int words_pushed = 0;)
-+
-+  unsigned char regs[32];
-+  int count = bitset_to_regs(bitset, regs);
-+  // reserve one slot to align for odd count
-+  int offset = is_even(count) ? 0 : wordSize;
-+
-+  if (count) {
-+    addi(stack, stack, - count * wordSize - offset);
-+  }
-+  for (int i = count - 1; i >= 0; i--) {
-+    sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
-+    DEBUG_ONLY(words_pushed ++;)
-+  }
-+
-+  assert(words_pushed == count, "oops, pushed != count");
-+
-+  return count;
++void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
++  // set sender sp
++  mv(x30, sp);
++  // record last_sp
++  sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
 +}
 +
-+int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
-+  DEBUG_ONLY(int words_popped = 0;)
-+
-+  unsigned char regs[32];
-+  int count = bitset_to_regs(bitset, regs);
-+  // reserve one slot to align for odd count
-+  int offset = is_even(count) ? 0 : wordSize;
-+
-+  for (int i = count - 1; i >= 0; i--) {
-+    ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
-+    DEBUG_ONLY(words_popped ++;)
-+  }
-+
-+  if (count) {
-+    addi(stack, stack, count * wordSize + offset);
++// Jump to from_interpreted entry of a call unless single stepping is possible
++// in this thread in which case we must call the i2i entry
++void InterpreterMacroAssembler::jump_from_interpreted(Register method) {
++  prepare_to_jump_from_interpreted();
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++    lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
++    beqz(t0, run_compiled_code);
++    ld(t0, Address(method, Method::interpreter_entry_offset()));
++    jr(t0);
++    bind(run_compiled_code);
 +  }
-+  assert(words_popped == count, "oops, popped != count");
 +
-+  return count;
++  ld(t0, Address(method, Method::from_interpreted_offset()));
++  jr(t0);
 +}
 +
-+RegSet MacroAssembler::call_clobbered_registers() {
-+  // Push integer registers x7, x10-x17, x28-x31.
-+  return RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31);
++// The following two routines provide a hook so that an implementation
++// can schedule the dispatch in two parts.  amd64 does not do this.
++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
 +}
 +
-+void MacroAssembler::push_call_clobbered_registers() {
-+  push_reg(call_clobbered_registers(), sp);
-+
-+  // Push float registers f0-f7, f10-f17, f28-f31.
-+  addi(sp, sp, - wordSize * 20);
-+  int offset = 0;
-+  for (int i = 0; i < 32; i++) {
-+    if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
-+      fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
-+    }
-+  }
++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
++  dispatch_next(state, step);
 +}
 +
-+void MacroAssembler::pop_call_clobbered_registers() {
-+  int offset = 0;
-+  for (int i = 0; i < 32; i++) {
-+    if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
-+      fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
-+    }
++void InterpreterMacroAssembler::dispatch_base(TosState state,
++                                              address* table,
++                                              bool verifyoop,
++                                              bool generate_poll,
++                                              Register Rs) {
++  // Pay attention to the argument Rs, which is acquiesce in t0.
++  if (VerifyActivationFrameSize) {
++    Unimplemented();
 +  }
-+  addi(sp, sp, wordSize * 20);
-+
-+  pop_reg(call_clobbered_registers(), sp);
-+}
-+
-+void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
-+  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
-+  push_reg(RegSet::range(x5, x31), sp);
-+
-+  // float registers
-+  addi(sp, sp, - 32 * wordSize);
-+  for (int i = 0; i < 32; i++) {
-+    fsd(as_FloatRegister(i), Address(sp, i * wordSize));
++  if (verifyoop && state == atos) {
++    verify_oop(x10);
 +  }
 +
-+ // vector registers
-+ if (save_vectors) {
-+    sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers);
-+    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
-+    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
-+        add(t0, sp, vector_size_in_bytes * i);
-+        vse64_v(as_VectorRegister(i), t0);
-+    }
-+  }
-+}
++  Label safepoint;
++  address* const safepoint_table = Interpreter::safept_table(state);
++  bool needs_thread_local_poll = generate_poll && table != safepoint_table;
 +
-+void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
-+  // vector registers
-+  if (restore_vectors) {
-+    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
-+    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
-+      vle64_v(as_VectorRegister(i), sp);
-+      add(sp, sp, vector_size_in_bytes * 8);
-+    }
++  if (needs_thread_local_poll) {
++    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
++    ld(t1, Address(xthread, JavaThread::polling_word_offset()));
++    andi(t1, t1, SafepointMechanism::poll_bit());
++    bnez(t1, safepoint);
++  }
++  if (table == Interpreter::dispatch_table(state)) {
++    li(t1, Interpreter::distance_from_dispatch_table(state));
++    add(t1, Rs, t1);
++    shadd(t1, t1, xdispatch, t1, 3);
++  } else {
++    mv(t1, (address)table);
++    shadd(t1, Rs, t1, Rs, 3);
 +  }
++  ld(t1, Address(t1));
++  jr(t1);
 +
-+  // float registers
-+  for (int i = 0; i < 32; i++) {
-+    fld(as_FloatRegister(i), Address(sp, i * wordSize));
++  if (needs_thread_local_poll) {
++    bind(safepoint);
++    la(t1, ExternalAddress((address)safepoint_table));
++    shadd(t1, Rs, t1, Rs, 3);
++    ld(t1, Address(t1));
++    jr(t1);
 +  }
-+  addi(sp, sp, 32 * wordSize);
++}
 +
-+  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
-+  pop_reg(RegSet::range(x5, x31), sp);
++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) {
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs);
 +}
 +
-+static int patch_offset_in_jal(address branch, int64_t offset) {
-+  assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n");
-+  Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1);                       // offset[20]    ==> branch[31]
-+  Assembler::patch(branch, 30, 21, (offset >> 1)  & 0x3ff);                     // offset[10:1]  ==> branch[30:21]
-+  Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1);                       // offset[11]    ==> branch[20]
-+  Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff);                      // offset[19:12] ==> branch[19:12]
-+  return NativeInstruction::instruction_size;                                   // only one instruction
++void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) {
++  dispatch_base(state, Interpreter::normal_table(state), Rs);
 +}
 +
-+static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
-+  assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n");
-+  Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1);                       // offset[12]    ==> branch[31]
-+  Assembler::patch(branch, 30, 25, (offset >> 5)  & 0x3f);                      // offset[10:5]  ==> branch[30:25]
-+  Assembler::patch(branch, 7,  7,  (offset >> 11) & 0x1);                       // offset[11]    ==> branch[7]
-+  Assembler::patch(branch, 11, 8,  (offset >> 1)  & 0xf);                       // offset[4:1]   ==> branch[11:8]
-+  return NativeInstruction::instruction_size;                                   // only one instruction
++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) {
++  dispatch_base(state, Interpreter::normal_table(state), false, Rs);
 +}
 +
-+static int patch_offset_in_pc_relative(address branch, int64_t offset) {
-+  const int PC_RELATIVE_INSTRUCTION_NUM = 2;                                    // auipc, addi/jalr/load
-+  Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff);         // Auipc.          offset[31:12]  ==> branch[31:12]
-+  Assembler::patch(branch + 4, 31, 20, offset & 0xfff);                         // Addi/Jalr/Load. offset[11:0]   ==> branch[31:20]
-+  return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
++  // load next bytecode
++  load_unsigned_byte(t0, Address(xbcp, step));
++  add(xbcp, xbcp, step);
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
 +}
 +
-+static int patch_addr_in_movptr(address branch, address target) {
-+  const int MOVPTR_INSTRUCTIONS_NUM = 6;                                        // lui + addi + slli + addi + slli + addi/jalr/load
-+  int32_t lower = ((intptr_t)target << 35) >> 35;
-+  int64_t upper = ((intptr_t)target - lower) >> 29;
-+  Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[48:29] + target[28] ==> branch[31:12]
-+  Assembler::patch(branch + 4,  31, 20, (lower >> 17) & 0xfff);                 // Addi.            target[28:17] ==> branch[31:20]
-+  Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff);                  // Addi.            target[16: 6] ==> branch[31:20]
-+  Assembler::patch(branch + 20, 31, 20, lower & 0x3f);                          // Addi/Jalr/Load.  target[ 5: 0] ==> branch[31:20]
-+  return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
++  // load current bytecode
++  lbu(t0, Address(xbcp, 0));
++  dispatch_base(state, table);
 +}
 +
-+static int patch_imm_in_li64(address branch, address target) {
-+  const int LI64_INSTRUCTIONS_NUM = 8;                                          // lui + addi + slli + addi + slli + addi + slli + addi
-+  int64_t lower = (intptr_t)target & 0xffffffff;
-+  lower = lower - ((lower << 44) >> 44);
-+  int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower;
-+  int32_t upper =  (tmp_imm - (int32_t)lower) >> 32;
-+  int64_t tmp_upper = upper, tmp_lower = upper;
-+  tmp_lower = (tmp_lower << 52) >> 52;
-+  tmp_upper -= tmp_lower;
-+  tmp_upper >>= 12;
-+  // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1),
-+  // upper = target[63:32] + 1.
-+  Assembler::patch(branch + 0,  31, 12, tmp_upper & 0xfffff);                       // Lui.
-+  Assembler::patch(branch + 4,  31, 20, tmp_lower & 0xfff);                         // Addi.
-+  // Load the rest 32 bits.
-+  Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff);            // Addi.
-+  Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff);  // Addi.
-+  Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff);                   // Addi.
-+  return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
-+}
++// remove activation
++//
++// Apply stack watermark barrier.
++// Unlock the receiver if this is a synchronized method.
++// Unlock any Java monitors from syncronized blocks.
++// Remove the activation from the stack.
++//
++// If there are locked Java monitors
++//    If throw_monitor_exception
++//       throws IllegalMonitorStateException
++//    Else if install_monitor_exception
++//       installs IllegalMonitorStateException
++//    Else
++//       no error processing
++void InterpreterMacroAssembler::remove_activation(
++                                TosState state,
++                                bool throw_monitor_exception,
++                                bool install_monitor_exception,
++                                bool notify_jvmdi) {
++  // Note: Registers x13 may be in use for the
++  // result check if synchronized method
++  Label unlocked, unlock, no_unlock;
 +
-+static int patch_imm_in_li32(address branch, int32_t target) {
-+  const int LI32_INSTRUCTIONS_NUM = 2;                                          // lui + addiw
-+  int64_t upper = (intptr_t)target;
-+  int32_t lower = (((int32_t)target) << 20) >> 20;
-+  upper -= lower;
-+  upper = (int32_t)upper;
-+  Assembler::patch(branch + 0,  31, 12, (upper >> 12) & 0xfffff);               // Lui.
-+  Assembler::patch(branch + 4,  31, 20, lower & 0xfff);                         // Addiw.
-+  return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
-+}
++  // The below poll is for the stack watermark barrier. It allows fixing up frames lazily,
++  // that would normally not be safe to use. Such bad returns into unsafe territory of
++  // the stack, will call InterpreterRuntime::at_unwind.
++  Label slow_path;
++  Label fast_path;
++  safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */);
++  j(fast_path);
++
++  bind(slow_path);
++  push(state);
++  set_last_Java_frame(esp, fp, (address)pc(), t0);
++  super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread);
++  reset_last_Java_frame(true);
++  pop(state);
 +
-+static long get_offset_of_jal(address insn_addr) {
-+  assert_cond(insn_addr != NULL);
-+  long offset = 0;
-+  unsigned insn = *(unsigned*)insn_addr;
-+  long val = (long)Assembler::sextract(insn, 31, 12);
-+  offset |= ((val >> 19) & 0x1) << 20;
-+  offset |= (val & 0xff) << 12;
-+  offset |= ((val >> 8) & 0x1) << 11;
-+  offset |= ((val >> 9) & 0x3ff) << 1;
-+  offset = (offset << 43) >> 43;
-+  return offset;
-+}
++  bind(fast_path);
 +
-+static long get_offset_of_conditional_branch(address insn_addr) {
-+  long offset = 0;
-+  assert_cond(insn_addr != NULL);
-+  unsigned insn = *(unsigned*)insn_addr;
-+  offset = (long)Assembler::sextract(insn, 31, 31);
-+  offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
-+  offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
-+  offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
-+  offset = (offset << 41) >> 41;
-+  return offset;
-+}
++  // get the value of _do_not_unlock_if_synchronized into x13
++  const Address do_not_unlock_if_synchronized(xthread,
++    in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  lbu(x13, do_not_unlock_if_synchronized);
++  sb(zr, do_not_unlock_if_synchronized); // reset the flag
 +
-+static long get_offset_of_pc_relative(address insn_addr) {
-+  long offset = 0;
-+  assert_cond(insn_addr != NULL);
-+  offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12;                                  // Auipc.
-+  offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                                         // Addi/Jalr/Load.
-+  offset = (offset << 32) >> 32;
-+  return offset;
-+}
++  // get method access flags
++  ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize));
++  ld(x12, Address(x11, Method::access_flags_offset()));
++  andi(t0, x12, JVM_ACC_SYNCHRONIZED);
++  beqz(t0, unlocked);
 +
-+static address get_target_of_movptr(address insn_addr) {
-+  assert_cond(insn_addr != NULL);
-+  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29;    // Lui.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17;                        // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6;                         // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20));                              // Addi/Jalr/Load.
-+  return (address) target_address;
-+}
++  // Don't unlock anything if the _do_not_unlock_if_synchronized flag
++  // is set.
++  bnez(x13, no_unlock);
 +
-+static address get_target_of_li64(address insn_addr) {
-+  assert_cond(insn_addr != NULL);
-+  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44;    // Lui.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32;                        // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20;                        // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8;                         // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20));                              // Addi.
-+  return (address)target_address;
-+}
++  // unlock monitor
++  push(state); // save result
 +
-+static address get_target_of_li32(address insn_addr) {
-+  assert_cond(insn_addr != NULL);
-+  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12;    // Lui.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                              // Addiw.
-+  return (address)target_address;
-+}
++  // BasicObjectLock will be first in list, since this is a
++  // synchronized method. However, need to check that the object has
++  // not been unlocked by an explicit monitorexit bytecode.
++  const Address monitor(fp, frame::interpreter_frame_initial_sp_offset *
++                        wordSize - (int) sizeof(BasicObjectLock));
++  // We use c_rarg1 so that if we go slow path it will be the correct
++  // register for unlock_object to pass to VM directly
++  la(c_rarg1, monitor); // address of first monitor
 +
-+// Patch any kind of instruction; there may be several instructions.
-+// Return the total length (in bytes) of the instructions.
-+int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
-+  assert_cond(branch != NULL);
-+  int64_t offset = target - branch;
-+  if (NativeInstruction::is_jal_at(branch)) {                         // jal
-+    return patch_offset_in_jal(branch, offset);
-+  } else if (NativeInstruction::is_branch_at(branch)) {               // beq/bge/bgeu/blt/bltu/bne
-+    return patch_offset_in_conditional_branch(branch, offset);
-+  } else if (NativeInstruction::is_pc_relative_at(branch)) {          // auipc, addi/jalr/load
-+    return patch_offset_in_pc_relative(branch, offset);
-+  } else if (NativeInstruction::is_movptr_at(branch)) {               // movptr
-+    return patch_addr_in_movptr(branch, target);
-+  } else if (NativeInstruction::is_li64_at(branch)) {                 // li64
-+    return patch_imm_in_li64(branch, target);
-+  } else if (NativeInstruction::is_li32_at(branch)) {                 // li32
-+    int64_t imm = (intptr_t)target;
-+    return patch_imm_in_li32(branch, (int32_t)imm);
-+  } else {
-+    tty->print_cr("pd_patch_instruction_size: instruction 0x%x could not be patched!\n", *(unsigned*)branch);
-+    ShouldNotReachHere();
-+  }
-+  return -1;
-+}
++  ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
++  bnez(x10, unlock);
 +
-+address MacroAssembler::target_addr_for_insn(address insn_addr) {
-+  long offset = 0;
-+  assert_cond(insn_addr != NULL);
-+  if (NativeInstruction::is_jal_at(insn_addr)) {                     // jal
-+    offset = get_offset_of_jal(insn_addr);
-+  } else if (NativeInstruction::is_branch_at(insn_addr)) {           // beq/bge/bgeu/blt/bltu/bne
-+    offset = get_offset_of_conditional_branch(insn_addr);
-+  } else if (NativeInstruction::is_pc_relative_at(insn_addr)) {      // auipc, addi/jalr/load
-+    offset = get_offset_of_pc_relative(insn_addr);
-+  } else if (NativeInstruction::is_movptr_at(insn_addr)) {           // movptr
-+    return get_target_of_movptr(insn_addr);
-+  } else if (NativeInstruction::is_li64_at(insn_addr)) {             // li64
-+    return get_target_of_li64(insn_addr);
-+  } else if (NativeInstruction::is_li32_at(insn_addr)) {             // li32
-+    return get_target_of_li32(insn_addr);
++  pop(state);
++  if (throw_monitor_exception) {
++    // Entry already unlocked, need to throw exception
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::throw_illegal_monitor_state_exception));
++    should_not_reach_here();
 +  } else {
-+    ShouldNotReachHere();
++    // Monitor already unlocked during a stack unroll. If requested,
++    // install an illegal_monitor_state_exception.  Continue with
++    // stack unrolling.
++    if (install_monitor_exception) {
++      call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                      InterpreterRuntime::new_illegal_monitor_state_exception));
++    }
++    j(unlocked);
 +  }
-+  return address(((uintptr_t)insn_addr + offset));
-+}
 +
-+int MacroAssembler::patch_oop(address insn_addr, address o) {
-+  // OOPs are either narrow (32 bits) or wide (48 bits).  We encode
-+  // narrow OOPs by setting the upper 16 bits in the first
-+  // instruction.
-+  if (NativeInstruction::is_li32_at(insn_addr)) {
-+    // Move narrow OOP
-+    narrowOop n = CompressedOops::encode((oop)o);
-+    return patch_imm_in_li32(insn_addr, (int32_t)n);
-+  } else if (NativeInstruction::is_movptr_at(insn_addr)) {
-+    // Move wide OOP
-+    return patch_addr_in_movptr(insn_addr, o);
-+  }
-+  ShouldNotReachHere();
-+  return -1;
-+}
++  bind(unlock);
++  unlock_object(c_rarg1);
++  pop(state);
 +
-+void MacroAssembler::reinit_heapbase() {
-+  if (UseCompressedOops) {
-+    if (Universe::is_fully_initialized()) {
-+      mv(xheapbase, Universe::narrow_ptrs_base());
-+    } else {
-+      int32_t offset = 0;
-+      la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset);
-+      ld(xheapbase, Address(xheapbase, offset));
-+    }
-+  }
-+}
++  // Check that for block-structured locking (i.e., that all locked
++  // objects has been unlocked)
++  bind(unlocked);
 +
-+void MacroAssembler::mv(Register Rd, Address dest) {
-+  assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
-+  code_section()->relocate(pc(), dest.rspec());
-+  movptr(Rd, dest.target());
-+}
-+void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
-+  if (src.is_register()) {
-+    mv(Rd, src.as_register());
-+  } else {
-+    mv(Rd, src.as_constant());
-+  }
-+}
++  // x10: Might contain return value
 +
-+void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
-+  andr(Rd, Rs1, Rs2);
-+  // addw: The result is clipped to 32 bits, then the sign bit is extended,
-+  // and the result is stored in Rd
-+  addw(Rd, Rd, zr);
-+}
++  // Check that all monitors are unlocked
++  {
++    Label loop, exception, entry, restart;
++    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++    const Address monitor_block_top(
++      fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    const Address monitor_block_bot(
++      fp, frame::interpreter_frame_initial_sp_offset * wordSize);
 +
-+void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
-+  orr(Rd, Rs1, Rs2);
-+  // addw: The result is clipped to 32 bits, then the sign bit is extended,
-+  // and the result is stored in Rd
-+  addw(Rd, Rd, zr);
-+}
++    bind(restart);
++    // We use c_rarg1 so that if we go slow path it will be the correct
++    // register for unlock_object to pass to VM directly
++    ld(c_rarg1, monitor_block_top); // points to current entry, starting
++                                     // with top-most entry
++    la(x9, monitor_block_bot);  // points to word before bottom of
++                                  // monitor block
 +
-+void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
-+  xorr(Rd, Rs1, Rs2);
-+  // addw: The result is clipped to 32 bits, then the sign bit is extended,
-+  // and the result is stored in Rd
-+  addw(Rd, Rd, zr);
-+}
++    j(entry);
 +
-+// Note: load_unsigned_short used to be called load_unsigned_word.
-+int MacroAssembler::load_unsigned_short(Register dst, Address src) {
-+  int off = offset();
-+  lhu(dst, src);
-+  return off;
-+}
++    // Entry already locked, need to throw exception
++    bind(exception);
 +
-+int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
-+  int off = offset();
-+  lbu(dst, src);
-+  return off;
-+}
++    if (throw_monitor_exception) {
++      // Throw exception
++      MacroAssembler::call_VM(noreg,
++                              CAST_FROM_FN_PTR(address, InterpreterRuntime::
++                                               throw_illegal_monitor_state_exception));
 +
-+int MacroAssembler::load_signed_short(Register dst, Address src) {
-+  int off = offset();
-+  lh(dst, src);
-+  return off;
-+}
++      should_not_reach_here();
++    } else {
++      // Stack unrolling. Unlock object and install illegal_monitor_exception.
++      // Unlock does not block, so don't have to worry about the frame.
++      // We don't have to preserve c_rarg1 since we are going to throw an exception.
 +
-+int MacroAssembler::load_signed_byte(Register dst, Address src) {
-+  int off = offset();
-+  lb(dst, src);
-+  return off;
-+}
++      push(state);
++      unlock_object(c_rarg1);
++      pop(state);
 +
-+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
-+  switch (size_in_bytes) {
-+    case  8:  ld(dst, src); break;
-+    case  4:  is_signed ? lw(dst, src) : lwu(dst, src); break;
-+    case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
-+    case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
-+    default:  ShouldNotReachHere();
-+  }
-+}
++      if (install_monitor_exception) {
++        call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                        InterpreterRuntime::
++                                        new_illegal_monitor_state_exception));
++      }
 +
-+void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
-+  switch (size_in_bytes) {
-+    case  8:  sd(src, dst); break;
-+    case  4:  sw(src, dst); break;
-+    case  2:  sh(src, dst); break;
-+    case  1:  sb(src, dst); break;
-+    default:  ShouldNotReachHere();
-+  }
-+}
++      j(restart);
++    }
 +
-+// rotate right with imm bits
-+void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
-+{
-+  if (UseZbb) {
-+    rori(dst, src, shift);
-+    return;
++    bind(loop);
++    // check if current entry is used
++    add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes());
++    ld(t0, Address(t0, 0));
++    bnez(t0, exception);
++
++    add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry
++    bind(entry);
++    bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry
 +  }
 +
-+  assert_different_registers(dst, tmp);
-+  assert_different_registers(src, tmp);
-+  assert(shift < 64, "shift amount must be < 64");
-+  slli(tmp, src, 64 - shift);
-+  srli(dst, src, shift);
-+  orr(dst, dst, tmp);
-+}
++  bind(no_unlock);
 +
-+// reverse bytes in halfword in lower 16 bits and sign-extend
-+// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
-+void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
-+  if (UseZbb) {
-+    rev8(Rd, Rs);
-+    srai(Rd, Rd, 48);
-+    return;
-+  }
-+  assert_different_registers(Rs, tmp);
-+  assert_different_registers(Rd, tmp);
-+  srli(tmp, Rs, 8);
-+  andi(tmp, tmp, 0xFF);
-+  slli(Rd, Rs, 56);
-+  srai(Rd, Rd, 48); // sign-extend
-+  orr(Rd, Rd, tmp);
-+}
++  // jvmti support
++  if (notify_jvmdi) {
++    notify_method_exit(state, NotifyJVMTI);    // preserve TOSCA
 +
-+// reverse bytes in lower word and sign-extend
-+// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
-+void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  if (UseZbb) {
-+    rev8(Rd, Rs);
-+    srai(Rd, Rd, 32);
-+    return;
++  } else {
++    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
 +  }
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1, tmp2);
-+  revb_h_w_u(Rd, Rs, tmp1, tmp2);
-+  slli(tmp2, Rd, 48);
-+  srai(tmp2, tmp2, 32); // sign-extend
-+  srli(Rd, Rd, 16);
-+  orr(Rd, Rd, tmp2);
-+}
 +
-+// reverse bytes in halfword in lower 16 bits and zero-extend
-+// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
-+void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
-+  if (UseZbb) {
-+    rev8(Rd, Rs);
-+    srli(Rd, Rd, 48);
-+    return;
-+  }
-+  assert_different_registers(Rs, tmp);
-+  assert_different_registers(Rd, tmp);
-+  srli(tmp, Rs, 8);
-+  andi(tmp, tmp, 0xFF);
-+  andi(Rd, Rs, 0xFF);
-+  slli(Rd, Rd, 8);
-+  orr(Rd, Rd, tmp);
-+}
++  // remove activation
++  // get sender esp
++  ld(t1,
++     Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
++  if (StackReservedPages > 0) {
++    // testing if reserved zone needs to be re-enabled
++    Label no_reserved_zone_enabling;
 +
-+// reverse bytes in halfwords in lower 32 bits and zero-extend
-+// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
-+void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  if (UseZbb) {
-+    rev8(Rd, Rs);
-+    rori(Rd, Rd, 32);
-+    roriw(Rd, Rd, 16);
-+    zero_extend(Rd, Rd, 32);
-+    return;
-+  }
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1, tmp2);
-+  srli(tmp2, Rs, 16);
-+  revb_h_h_u(tmp2, tmp2, tmp1);
-+  revb_h_h_u(Rd, Rs, tmp1);
-+  slli(tmp2, tmp2, 16);
-+  orr(Rd, Rd, tmp2);
-+}
++    ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
++    ble(t1, t0, no_reserved_zone_enabling);
 +
-+// This method is only used for revb_h
-+// Rd = Rs[47:0] Rs[55:48] Rs[63:56]
-+void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1);
-+  srli(tmp1, Rs, 48);
-+  andi(tmp2, tmp1, 0xFF);
-+  slli(tmp2, tmp2, 8);
-+  srli(tmp1, tmp1, 8);
-+  orr(tmp1, tmp1, tmp2);
-+  slli(Rd, Rs, 16);
-+  orr(Rd, Rd, tmp1);
-+}
-+// reverse bytes in each halfword
-+// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
-+void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  if (UseZbb) {
-+    assert_different_registers(Rs, tmp1);
-+    assert_different_registers(Rd, tmp1);
-+    rev8(Rd, Rs);
-+    zero_extend(tmp1, Rd, 32);
-+    roriw(tmp1, tmp1, 16);
-+    slli(tmp1, tmp1, 32);
-+    srli(Rd, Rd, 32);
-+    roriw(Rd, Rd, 16);
-+    zero_extend(Rd, Rd, 32);
-+    orr(Rd, Rd, tmp1);
-+    return;
-+  }
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1, tmp2);
-+  revb_h_helper(Rd, Rs, tmp1, tmp2);
-+  for (int i = 0; i < 3; ++i) {
-+    revb_h_helper(Rd, Rd, tmp1, tmp2);
-+  }
-+}
++    call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::throw_delayed_StackOverflowError));
++    should_not_reach_here();
 +
-+// reverse bytes in each word
-+// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
-+void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  if (UseZbb) {
-+    rev8(Rd, Rs);
-+    rori(Rd, Rd, 32);
-+    return;
++    bind(no_reserved_zone_enabling);
 +  }
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1, tmp2);
-+  revb(Rd, Rs, tmp1, tmp2);
-+  ror_imm(Rd, Rd, 32);
++
++  // restore sender esp
++  mv(esp, t1);
++
++  // remove frame anchor
++  leave();
++  // If we're returning to interpreted code we will shortly be
++  // adjusting SP to allow some space for ESP.  If we're returning to
++  // compiled code the saved sender SP was saved in sender_sp, so this
++  // restores it.
++  andi(sp, esp, -16);
 +}
 +
-+// reverse bytes in doubleword
-+// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
-+void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  if (UseZbb) {
-+    rev8(Rd, Rs);
-+    return;
-+  }
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1, tmp2);
-+  andi(tmp1, Rs, 0xFF);
-+  slli(tmp1, tmp1, 8);
-+  for (int step = 8; step < 56; step += 8) {
-+    srli(tmp2, Rs, step);
-+    andi(tmp2, tmp2, 0xFF);
-+    orr(tmp1, tmp1, tmp2);
-+    slli(tmp1, tmp1, 8);
-+  }
-+  srli(Rd, Rs, 56);
-+  andi(Rd, Rd, 0xFF);
-+  orr(Rd, tmp1, Rd);
-+}
-+
-+void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
-+  if (is_imm_in_range(imm, 12, 0)) {
-+    and_imm12(Rd, Rn, imm);
++// Lock object
++//
++// Args:
++//      c_rarg1: BasicObjectLock to be used for locking
++//
++// Kills:
++//      x10
++//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
++//      t0, t1 (temp regs)
++void InterpreterMacroAssembler::lock_object(Register lock_reg)
++{
++  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
++  if (UseHeavyMonitors) {
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
++            lock_reg);
 +  } else {
-+    assert_different_registers(Rn, tmp);
-+    mv(tmp, imm);
-+    andr(Rd, Rn, tmp);
-+  }
-+}
++    Label done;
 +
-+void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) {
-+  ld(tmp1, adr);
-+  if (src.is_register()) {
-+    orr(tmp1, tmp1, src.as_register());
-+  } else {
-+    if(is_imm_in_range(src.as_constant(), 12, 0)) {
-+      ori(tmp1, tmp1, src.as_constant());
-+    } else {
-+      assert_different_registers(tmp1, tmp2);
-+      mv(tmp2, src.as_constant());
-+      orr(tmp1, tmp1, tmp2);
-+    }
-+  }
-+  sd(tmp1, adr);
-+}
++    const Register swap_reg = x10;
++    const Register tmp = c_rarg2;
++    const Register obj_reg = c_rarg3; // Will contain the oop
 +
-+void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
-+  if (UseCompressedClassPointers) {
-+      lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
-+    if (Universe::narrow_klass_base() == NULL) {
-+      slli(tmp, tmp, Universe::narrow_klass_shift());
-+      beq(trial_klass, tmp, L);
-+      return;
-+    }
-+    decode_klass_not_null(tmp);
-+  } else {
-+    ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
-+  }
-+  beq(trial_klass, tmp, L);
-+}
++    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
++    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
++    const int mark_offset = lock_offset +
++                            BasicLock::displaced_header_offset_in_bytes();
 +
-+// Move an oop into a register.  immediate is true if we want
-+// immediate instrcutions, i.e. we are not going to patch this
-+// instruction while the code is being executed by another thread.  In
-+// that case we can use move immediates rather than the constant pool.
-+void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
-+  int oop_index;
-+  if (obj == NULL) {
-+    oop_index = oop_recorder()->allocate_oop_index(obj);
-+  } else {
-+#ifdef ASSERT
-+    {
-+      ThreadInVMfromUnknown tiv;
-+      assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
-+    }
-+#endif
-+    oop_index = oop_recorder()->find_index(obj);
-+  }
-+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
-+  if (!immediate) {
-+    address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
-+    ld_constant(dst, Address(dummy, rspec));
-+  } else
-+    mv(dst, Address((address)obj, rspec));
-+}
++    Label slow_case;
 +
-+// Move a metadata address into a register.
-+void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
-+  int oop_index;
-+  if (obj == NULL) {
-+    oop_index = oop_recorder()->allocate_metadata_index(obj);
-+  } else {
-+    oop_index = oop_recorder()->find_index(obj);
-+  }
-+  RelocationHolder rspec = metadata_Relocation::spec(oop_index);
-+  mv(dst, Address((address)obj, rspec));
-+}
++    // Load object pointer into obj_reg c_rarg3
++    ld(obj_reg, Address(lock_reg, obj_offset));
 +
-+// Writes to stack successive pages until offset reached to check for
-+// stack overflow + shadow pages.  This clobbers tmp.
-+void MacroAssembler::bang_stack_size(Register size, Register tmp) {
-+  assert_different_registers(tmp, size, t0);
-+  // Bang stack for total size given plus shadow page size.
-+  // Bang one page at a time because large size can bang beyond yellow and
-+  // red zones.
-+  mv(t0, os::vm_page_size());
-+  Label loop;
-+  bind(loop);
-+  sub(tmp, sp, t0);
-+  subw(size, size, t0);
-+  sd(size, Address(tmp));
-+  bgtz(size, loop);
++    if (DiagnoseSyncOnValueBasedClasses != 0) {
++      load_klass(tmp, obj_reg);
++      lwu(tmp, Address(tmp, Klass::access_flags_offset()));
++      andi(tmp, tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
++      bnez(tmp, slow_case);
++    }
 +
-+  // Bang down shadow pages too.
-+  // At this point, (tmp-0) is the last address touched, so don't
-+  // touch it again.  (It was touched as (tmp-pagesize) but then tmp
-+  // was post-decremented.)  Skip this address by starting at i=1, and
-+  // touch a few more pages below.  N.B.  It is important to touch all
-+  // the way down to and including i=StackShadowPages.
-+  for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
-+    // this could be any sized move but this is can be a debugging crumb
-+    // so the bigger the better.
-+    sub(tmp, tmp, os::vm_page_size());
-+    sd(size, Address(tmp, 0));
-+  }
-+}
++    // Load (object->mark() | 1) into swap_reg
++    ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++    ori(swap_reg, t0, 1);
 +
-+SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
-+  int32_t offset = 0;
-+  _masm = masm;
-+  _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset);
-+  _masm->lbu(t0, Address(t0, offset));
-+  _masm->beqz(t0, _label);
-+}
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    sd(swap_reg, Address(lock_reg, mark_offset));
 +
-+SkipIfEqual::~SkipIfEqual() {
-+  _masm->bind(_label);
-+  _masm = NULL;
-+}
++    assert(lock_offset == 0,
++           "displached header must be first word in BasicObjectLock");
 +
-+void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) {
-+  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
-+  ld(dst, Address(xmethod, Method::const_offset()));
-+  ld(dst, Address(dst, ConstMethod::constants_offset()));
-+  ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
-+  ld(dst, Address(dst, mirror_offset));
-+  resolve_oop_handle(dst, tmp);
-+}
++    cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL);
 +
-+void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
-+  // OopHandle::resolve is an indirection.
-+  assert_different_registers(result, tmp);
-+  access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg);
-+}
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 7) == 0, and
++    //  2) sp <= mark < mark + os::pagesize()
++    //
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (7 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 3 bits clear.
++    // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg
++    sub(swap_reg, swap_reg, sp);
++    li(t0, (int64_t)(7 - os::vm_page_size()));
++    andr(swap_reg, swap_reg, t0);
 +
-+void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
-+                                    Register dst, Address src,
-+                                    Register tmp1, Register thread_tmp) {
-+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  decorators = AccessInternal::decorator_fixup(decorators);
-+  bool as_raw = (decorators & AS_RAW) != 0;
-+  if (as_raw) {
-+    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
-+  } else {
-+    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
-+  }
-+}
++    // Save the test result, for recursive case, the result is zero
++    sd(swap_reg, Address(lock_reg, mark_offset));
++    beqz(swap_reg, done);
 +
-+void MacroAssembler::null_check(Register reg, int offset) {
-+  if (needs_explicit_null_check(offset)) {
-+    // provoke OS NULL exception if reg = NULL by
-+    // accessing M[reg] w/o changing any registers
-+    // NOTE: this is plenty to provoke a segv
-+    ld(zr, Address(reg, 0));
-+  } else {
-+    // nothing to do, (later) access of M[reg + offset]
-+    // will provoke OS NULL exception if reg = NULL
-+  }
-+}
++    bind(slow_case);
 +
-+void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
-+                                     Address dst, Register src,
-+                                     Register tmp1, Register tmp2, Register tmp3) {
-+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  decorators = AccessInternal::decorator_fixup(decorators);
-+  bool as_raw = (decorators & AS_RAW) != 0;
-+  if (as_raw) {
-+    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
-+  } else {
-+    bs->store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
-+  }
-+}
++    // Call the runtime routine for slow case
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
++            lock_reg);
 +
-+// Algorithm must match CompressedOops::encode.
-+void MacroAssembler::encode_heap_oop(Register d, Register s) {
-+  verify_oop(s, "broken oop in encode_heap_oop");
-+  if (Universe::narrow_oop_base() == NULL) {
-+    if (Universe::narrow_oop_shift() != 0) {
-+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-+      srli(d, s, LogMinObjAlignmentInBytes);
-+    } else {
-+      mv(d, s);
-+    }
-+  } else {
-+    Label notNull;
-+    sub(d, s, xheapbase);
-+    bgez(d, notNull);
-+    mv(d, zr);
-+    bind(notNull);
-+    if (Universe::narrow_oop_shift() != 0) {
-+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-+      srli(d, d, Universe::narrow_oop_shift());
-+    }
++    bind(done);
 +  }
 +}
 +
-+void MacroAssembler::load_klass(Register dst, Register src) {
-+  if (UseCompressedClassPointers) {
-+    lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-+    decode_klass_not_null(dst);
-+  } else {
-+    ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-+  }
-+}
 +
-+void MacroAssembler::store_klass(Register dst, Register src) {
-+  // FIXME: Should this be a store release? concurrent gcs assumes
-+  // klass length is valid if klass field is not null.
-+  if (UseCompressedClassPointers) {
-+    encode_klass_not_null(src);
-+    sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
++// Unlocks an object. Used in monitorexit bytecode and
++// remove_activation.  Throws an IllegalMonitorException if object is
++// not locked by current thread.
++//
++// Args:
++//      c_rarg1: BasicObjectLock for lock
++//
++// Kills:
++//      x10
++//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
++//      t0, t1 (temp regs)
++void InterpreterMacroAssembler::unlock_object(Register lock_reg)
++{
++  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
++
++  if (UseHeavyMonitors) {
++    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
 +  } else {
-+    sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
-+  }
-+}
++    Label done;
 +
-+void MacroAssembler::store_klass_gap(Register dst, Register src) {
-+  if (UseCompressedClassPointers) {
-+    // Store to klass gap in destination
-+    sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
-+  }
-+}
++    const Register swap_reg   = x10;
++    const Register header_reg = c_rarg2;  // Will contain the old oopMark
++    const Register obj_reg    = c_rarg3;  // Will contain the oop
 +
-+void  MacroAssembler::decode_klass_not_null(Register r) {
-+  decode_klass_not_null(r, r);
-+}
++    save_bcp(); // Save in case of exception
 +
-+void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
-+  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++    // Convert from BasicObjectLock structure to object and BasicLock
++    // structure Store the BasicLock address into x10
++    la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
 +
-+  if (Universe::narrow_klass_base() == NULL) {
-+    if (Universe::narrow_klass_shift() != 0) {
-+      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-+      slli(dst, src, LogKlassAlignmentInBytes);
-+    } else {
-+      mv(dst, src);
-+    }
-+    return;
-+  }
++    // Load oop into obj_reg(c_rarg3)
++    ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
 +
-+  Register xbase = dst;
-+  if (dst == src) {
-+    xbase = tmp;
-+  }
++    // Free entry
++    sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
 +
-+  assert_different_registers(src, xbase);
-+  mv(xbase, (uintptr_t)Universe::narrow_klass_base());
-+  if (Universe::narrow_klass_shift() != 0) {
-+    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-+    assert_different_registers(t0, xbase);
-+    shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
-+  } else {
-+    add(dst, xbase, src);
-+  }
-+  if (xbase == xheapbase) { reinit_heapbase(); }
++    // Load the old header from BasicLock structure
++    ld(header_reg, Address(swap_reg,
++                           BasicLock::displaced_header_offset_in_bytes()));
 +
-+}
++    // Test for recursion
++    beqz(header_reg, done);
 +
-+void MacroAssembler::encode_klass_not_null(Register r) {
-+  encode_klass_not_null(r, r);
-+}
++    // Atomic swap back the old header
++    cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL);
 +
-+void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
-+  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++    // Call the runtime routine for slow case.
++    sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
++    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
 +
-+  if (Universe::narrow_klass_base() == NULL) {
-+    if (Universe::narrow_klass_shift() != 0) {
-+      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-+      srli(dst, src, LogKlassAlignmentInBytes);
-+    } else {
-+      mv(dst, src);
-+    }
-+    return;
-+  }
++    bind(done);
 +
-+  if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 &&
-+      Universe::narrow_klass_shift() == 0) {
-+    zero_extend(dst, src, 32);
-+    return;
++    restore_bcp();
 +  }
++}
 +
-+  Register xbase = dst;
-+  if (dst == src) {
-+    xbase = tmp;
-+  }
 +
-+  assert_different_registers(src, xbase);
-+  mv(xbase, (intptr_t)Universe::narrow_klass_base());
-+  sub(dst, src, xbase);
-+  if (Universe::narrow_klass_shift() != 0) {
-+    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-+    srli(dst, dst, LogKlassAlignmentInBytes);
-+  }
-+  if (xbase == xheapbase) {
-+    reinit_heapbase();
-+  }
++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
++                                                         Label& zero_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++  beqz(mdp, zero_continue);
 +}
 +
-+void  MacroAssembler::decode_heap_oop_not_null(Register r) {
-+  decode_heap_oop_not_null(r, r);
-+}
++// Set the method data pointer for the current bcp.
++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Label set_mdp;
++  push_reg(0xc00, sp); // save x10, x11
 +
-+void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
-+  assert(UseCompressedOops, "should only be used for compressed headers");
-+  assert(Universe::heap() != NULL, "java heap should be initialized");
-+  // Cannot assert, unverified entry point counts instructions (see .ad file)
-+  // vtableStubs also counts instructions in pd_code_size_limit.
-+  // Also do not verify_oop as this is called by verify_oop.
-+  if (Universe::narrow_oop_shift() != 0) {
-+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-+    slli(dst, src, LogMinObjAlignmentInBytes);
-+    if (Universe::narrow_oop_base() != NULL) {
-+      add(dst, xheapbase, dst);
-+    }
-+  } else {
-+    assert(Universe::narrow_oop_base() == NULL, "sanity");
-+    mv(dst, src);
-+  }
++  // Test MDO to avoid the call if it is NULL.
++  ld(x10, Address(xmethod, in_bytes(Method::method_data_offset())));
++  beqz(x10, set_mdp);
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp);
++  // x10: mdi
++  // mdo is guaranteed to be non-zero here, we checked for it before the call.
++  ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
++  la(x11, Address(x11, in_bytes(MethodData::data_offset())));
++  add(x10, x11, x10);
++  sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++  bind(set_mdp);
++  pop_reg(0xc00, sp);
 +}
 +
-+void  MacroAssembler::decode_heap_oop(Register d, Register s) {
-+  if (Universe::narrow_oop_base() == NULL) {
-+    if (Universe::narrow_oop_shift() != 0 || d != s) {
-+      slli(d, s, Universe::narrow_oop_shift());
-+    }
-+  } else {
-+    Label done;
-+    mv(d, s);
-+    beqz(s, done);
-+    shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes);
-+    bind(done);
-+  }
-+  verify_oop(d, "broken oop in decode_heap_oop");
-+}
++void InterpreterMacroAssembler::verify_method_data_pointer() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++#ifdef ASSERT
++  Label verify_continue;
++  add(sp, sp, -4 * wordSize);
++  sd(x10, Address(sp, 0));
++  sd(x11, Address(sp, wordSize));
++  sd(x12, Address(sp, 2 * wordSize));
++  sd(x13, Address(sp, 3 * wordSize));
++  test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue
++  get_method(x11);
 +
-+void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
-+                                    Register tmp2, Register tmp3, DecoratorSet decorators) {
-+  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2, tmp3);
++  // If the mdp is valid, it will point to a DataLayout header which is
++  // consistent with the bcp.  The converse is highly probable also.
++  lh(x12, Address(x13, in_bytes(DataLayout::bci_offset())));
++  ld(t0, Address(x11, Method::const_offset()));
++  add(x12, x12, t0);
++  la(x12, Address(x12, ConstMethod::codes_offset()));
++  beq(x12, xbcp, verify_continue);
++  // x10: method
++  // xbcp: bcp // xbcp == 22
++  // x13: mdp
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp),
++               x11, xbcp, x13);
++  bind(verify_continue);
++  ld(x10, Address(sp, 0));
++  ld(x11, Address(sp, wordSize));
++  ld(x12, Address(sp, 2 * wordSize));
++  ld(x13, Address(sp, 3 * wordSize));
++  add(sp, sp, 4 * wordSize);
++#endif // ASSERT
 +}
 +
-+void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
-+                                   Register thread_tmp, DecoratorSet decorators) {
-+  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
-+}
 +
-+void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
-+                                            Register thread_tmp, DecoratorSet decorators) {
-+  access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp);
++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
++                                                int constant,
++                                                Register value) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Address data(mdp_in, constant);
++  sd(value, data);
 +}
 +
-+// Used for storing NULLs.
-+void MacroAssembler::store_heap_oop_null(Address dst) {
-+  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      int constant,
++                                                      bool decrement) {
++  increment_mdp_data_at(mdp_in, noreg, constant, decrement);
 +}
 +
-+int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
-+                                    bool want_remainder)
-+{
-+  // Full implementation of Java idiv and irem.  The function
-+  // returns the (pc) offset of the div instruction - may be needed
-+  // for implicit exceptions.
-+  //
-+  // input : rs1: dividend
-+  //         rs2: divisor
-+  //
-+  // result: either
-+  //         quotient  (= rs1 idiv rs2)
-+  //         remainder (= rs1 irem rs2)
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      Register reg,
++                                                      int constant,
++                                                      bool decrement) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  // %%% this does 64bit counters at best it is wasting space
++  // at worst it is a rare bug when counters overflow
 +
++  assert_different_registers(t1, t0, mdp_in, reg);
 +
-+  int idivl_offset = offset();
-+  if (!want_remainder) {
-+    divw(result, rs1, rs2);
-+  } else {
-+    remw(result, rs1, rs2); // result = rs1 % rs2;
++  Address addr1(mdp_in, constant);
++  Address addr2(t1, 0);
++  Address &addr = addr1;
++  if (reg != noreg) {
++    la(t1, addr1);
++    add(t1, t1, reg);
++    addr = addr2;
 +  }
-+  return idivl_offset;
-+}
 +
-+int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
-+                                    bool want_remainder)
-+{
-+  // Full implementation of Java ldiv and lrem.  The function
-+  // returns the (pc) offset of the div instruction - may be needed
-+  // for implicit exceptions.
-+  //
-+  // input : rs1: dividend
-+  //         rs2: divisor
-+  //
-+  // result: either
-+  //         quotient  (= rs1 idiv rs2)
-+  //         remainder (= rs1 irem rs2)
-+
-+  int idivq_offset = offset();
-+  if (!want_remainder) {
-+    div(result, rs1, rs2);
++  if (decrement) {
++    ld(t0, addr);
++    addi(t0, t0, -DataLayout::counter_increment);
++    Label L;
++    bltz(t0, L);      // skip store if counter underflow
++    sd(t0, addr);
++    bind(L);
 +  } else {
-+    rem(result, rs1, rs2); // result = rs1 % rs2;
++    assert(DataLayout::counter_increment == 1,
++           "flow-free idiom only works with 1");
++    ld(t0, addr);
++    addi(t0, t0, DataLayout::counter_increment);
++    Label L;
++    blez(t0, L);       // skip store if counter overflow
++    sd(t0, addr);
++    bind(L);
 +  }
-+  return idivq_offset;
 +}
 +
-+// Look up the method for a megamorpic invkkeinterface call.
-+// The target method is determined by <intf_klass, itable_index>.
-+// The receiver klass is in recv_klass.
-+// On success, the result will be in method_result, and execution falls through.
-+// On failure, execution transfers to the given label.
-+void MacroAssembler::lookup_interface_method(Register recv_klass,
-+                                             Register intf_klass,
-+                                             RegisterOrConstant itable_index,
-+                                             Register method_result,
-+                                             Register scan_tmp,
-+                                             Label& L_no_such_interface,
-+                                             bool return_method) {
-+  assert_different_registers(recv_klass, intf_klass, scan_tmp);
-+  assert_different_registers(method_result, intf_klass, scan_tmp);
-+  assert(recv_klass != method_result || !return_method,
-+         "recv_klass can be destroyed when mehtid isn't needed");
-+  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
-+         "caller must be same register for non-constant itable index as for method");
-+
-+  // Compute start of first itableOffsetEntry (which is at the end of the vtable).
-+  int vtable_base = in_bytes(Klass::vtable_start_offset());
-+  int itentry_off = itableMethodEntry::method_offset_in_bytes();
-+  int scan_step   = itableOffsetEntry::size() * wordSize;
-+  int vte_size    = vtableEntry::size_in_bytes();
-+  assert(vte_size == wordSize, "else adjust times_vte_scale");
-+
-+  lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset()));
-+
-+  // %%% Could store the aligned, prescaled offset in the klassoop.
-+  shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
-+  add(scan_tmp, scan_tmp, vtable_base);
-+
-+  if (return_method) {
-+    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
-+    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
-+    if (itable_index.is_register()) {
-+      slli(t0, itable_index.as_register(), 3);
-+    } else {
-+      mv(t0, itable_index.as_constant() << 3);
-+    }
-+    add(recv_klass, recv_klass, t0);
-+    if (itentry_off) {
-+      add(recv_klass, recv_klass, itentry_off);
-+    }
-+  }
-+
-+  Label search, found_method;
-+
-+  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
-+  beq(intf_klass, method_result, found_method);
-+  bind(search);
-+  // Check that the previous entry is non-null. A null entry means that
-+  // the receiver class doens't implement the interface, and wasn't the
-+  // same as when the caller was compiled.
-+  beqz(method_result, L_no_such_interface, /* is_far */ true);
-+  addi(scan_tmp, scan_tmp, scan_step);
-+  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
-+  bne(intf_klass, method_result, search);
-+
-+  bind(found_method);
-+
-+  // Got a hit.
-+  if (return_method) {
-+    lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes()));
-+    add(method_result, recv_klass, scan_tmp);
-+    ld(method_result, Address(method_result));
-+  }
++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
++                                                int flag_byte_constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  int flags_offset = in_bytes(DataLayout::flags_offset());
++  // Set the flag
++  lbu(t1, Address(mdp_in, flags_offset));
++  ori(t1, t1, flag_byte_constant);
++  sb(t1, Address(mdp_in, flags_offset));
 +}
 +
-+// virtual method calling
-+void MacroAssembler::lookup_virtual_method(Register recv_klass,
-+                                           RegisterOrConstant vtable_index,
-+                                           Register method_result) {
-+  const int base = in_bytes(Klass::vtable_start_offset());
-+  assert(vtableEntry::size() * wordSize == 8,
-+         "adjust the scaling in the code below");
-+  int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
 +
-+  if (vtable_index.is_register()) {
-+    shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord);
-+    ld(method_result, Address(method_result, vtable_offset_in_bytes));
++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
++                                                 int offset,
++                                                 Register value,
++                                                 Register test_value_out,
++                                                 Label& not_equal_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if (test_value_out == noreg) {
++    ld(t1, Address(mdp_in, offset));
++    bne(value, t1, not_equal_continue);
 +  } else {
-+    vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
-+    Address addr = form_address(recv_klass,             /* base */
-+                                vtable_offset_in_bytes, /* offset */
-+                                12,                     /* expect offset bits */
-+                                method_result);         /* temp reg */
-+    ld(method_result, addr);
++    // Put the test value into a register, so caller can use it:
++    ld(test_value_out, Address(mdp_in, offset));
++    bne(value, test_value_out, not_equal_continue);
 +  }
 +}
 +
-+void MacroAssembler::membar(uint32_t order_constraint) {
-+  if (!os::is_MP()) { return; }
-+
-+  address prev = pc() - NativeMembar::instruction_size;
-+  address last = code()->last_insn();
-+
-+  if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) {
-+    NativeMembar *bar = NativeMembar_at(prev);
-+    // We are merging two memory barrier instructions.  On RISCV we
-+    // can do this simply by ORing them together.
-+    bar->set_kind(bar->get_kind() | order_constraint);
-+    BLOCK_COMMENT("merged membar");
-+  } else {
-+    code()->set_last_insn(pc());
-+
-+    uint32_t predecessor = 0;
-+    uint32_t successor = 0;
 +
-+    membar_mask_to_pred_succ(order_constraint, predecessor, successor);
-+    fence(predecessor, successor);
-+  }
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ld(t1, Address(mdp_in, offset_of_disp));
++  add(mdp_in, mdp_in, t1);
++  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
 +}
 +
-+void MacroAssembler::check_klass_subtype(Register sub_klass,
-+                                         Register super_klass,
-+                                         Register tmp_reg,
-+                                         Label& L_success) {
-+  Label L_failure;
-+  check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL);
-+  check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL);
-+  bind(L_failure);
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     Register reg,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  add(t1, mdp_in, reg);
++  ld(t1, Address(t1, offset_of_disp));
++  add(mdp_in, mdp_in, t1);
++  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
 +}
 +
-+// Write serialization page so VM thread can do a pseudo remote membar.
-+// We use the current thread pointer to calculate a thread specific
-+// offset to write to within the page. This minimizes bus traffic
-+// due to cache line collision.
-+void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) {
-+  srli(tmp2, thread, os::get_serialize_page_shift_count());
-+
-+  int mask = os::vm_page_size() - sizeof(int);
-+  andi(tmp2, tmp2, mask, tmp1);
 +
-+  add(tmp1, tmp2, (intptr_t)os::get_memory_serialize_page());
-+  membar(MacroAssembler::AnyAny);
-+  sw(zr, Address(tmp1));
++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
++                                                       int constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  addi(mdp_in, mdp_in, (unsigned)constant);
++  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
 +}
 +
-+void MacroAssembler::safepoint_poll(Label& slow_path) {
-+  if (SafepointMechanism::uses_thread_local_poll()) {
-+    ld(t1, Address(xthread, Thread::polling_page_offset()));
-+    andi(t0, t1, SafepointMechanism::poll_bit());
-+    bnez(t0, slow_path);
-+  } else {
-+    int32_t offset = 0;
-+    la_patchable(t0, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
-+    lwu(t0, Address(t0, offset));
-+    assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
-+    bnez(t0, slow_path);
-+  }
-+}
 +
-+// Just like safepoint_poll, but use an acquiring load for thread-
-+// local polling.
-+//
-+// We need an acquire here to ensure that any subsequent load of the
-+// global SafepointSynchronize::_state flag is ordered after this load
-+// of the local Thread::_polling page.  We don't want this poll to
-+// return false (i.e. not safepointing) and a later poll of the global
-+// SafepointSynchronize::_state spuriously to return true.
-+//
-+// This is to avoid a race when we're in a native->Java transition
-+// racing the code which wakes up from a safepoint.
-+//
-+void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
-+  if (SafepointMechanism::uses_thread_local_poll()) {
-+    membar(MacroAssembler::AnyAny);
-+    ld(t1, Address(xthread, Thread::polling_page_offset()));
-+    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+    andi(t0, t1, SafepointMechanism::poll_bit());
-+    bnez(t0, slow_path);
-+  } else {
-+    safepoint_poll(slow_path);
-+  }
-+}
++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
 +
-+void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
-+                                Label &succeed, Label *fail) {
-+  // oldv holds comparison value
-+  // newv holds value to write in exchange
-+  // addr identifies memory word to compare against/update
-+  Label retry_load, nope;
-+  bind(retry_load);
-+  // flush and load exclusive from the memory location
-+  // and fail if it is not what we expect
-+  lr_d(tmp, addr, Assembler::aqrl);
-+  bne(tmp, oldv, nope);
-+  // if we store+flush with no intervening write tmp wil be zero
-+  sc_d(tmp, newv, addr, Assembler::rl);
-+  beqz(tmp, succeed);
-+  // retry so we only ever return after a load fails to compare
-+  // ensures we don't return a stale value after a failed write.
-+  j(retry_load);
-+  // if the memory word differs we return it in oldv and signal a fail
-+  bind(nope);
-+  membar(AnyAny);
-+  mv(oldv, tmp);
-+  if (fail != NULL) {
-+    j(*fail);
-+  }
++  // save/restore across call_VM
++  addi(sp, sp, -2 * wordSize);
++  sd(zr, Address(sp, 0));
++  sd(return_bci, Address(sp, wordSize));
++  call_VM(noreg,
++          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
++          return_bci);
++  ld(zr, Address(sp, 0));
++  ld(return_bci, Address(sp, wordSize));
++  addi(sp, sp, 2 * wordSize);
 +}
 +
-+void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
-+                                        Label &succeed, Label *fail) {
-+  assert(oopDesc::mark_offset_in_bytes() == 0, "assumption");
-+  cmpxchgptr(oldv, newv, obj, tmp, succeed, fail);
-+}
++void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
++                                                     Register bumped_count) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+void MacroAssembler::load_reserved(Register addr,
-+                                   enum operand_size size,
-+                                   Assembler::Aqrl acquire) {
-+  switch (size) {
-+    case int64:
-+      lr_d(t0, addr, acquire);
-+      break;
-+    case int32:
-+      lr_w(t0, addr, acquire);
-+      break;
-+    case uint32:
-+      lr_w(t0, addr, acquire);
-+      zero_extend(t0, t0, 32);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+}
++    // If no method data exists, go to profile_continue.
++    // Otherwise, assign to mdp
++    test_method_data_pointer(mdp, profile_continue);
 +
-+void MacroAssembler::store_conditional(Register addr,
-+                                       Register new_val,
-+                                       enum operand_size size,
-+                                       Assembler::Aqrl release) {
-+  switch (size) {
-+    case int64:
-+      sc_d(t0, new_val, addr, release);
-+      break;
-+    case int32:
-+    case uint32:
-+      sc_w(t0, new_val, addr, release);
-+      break;
-+    default:
-+      ShouldNotReachHere();
++    // We are taking a branch.  Increment the taken count.
++    Address data(mdp, in_bytes(JumpData::taken_offset()));
++    ld(bumped_count, data);
++    assert(DataLayout::counter_increment == 1,
++            "flow-free idiom only works with 1");
++    addi(bumped_count, bumped_count, DataLayout::counter_increment);
++    Label L;
++    // eg: bumped_count=0x7fff ffff ffff ffff  + 1 < 0. so we use <= 0;
++    blez(bumped_count, L);       // skip store if counter overflow,
++    sd(bumped_count, data);
++    bind(L);
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
++    bind(profile_continue);
 +  }
 +}
 +
++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected,
-+                                                 Register new_val,
-+                                                 enum operand_size size,
-+                                                 Register tmp1, Register tmp2, Register tmp3) {
-+  assert(size == int8 || size == int16, "unsupported operand size");
-+
-+  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3;
-+
-+  andi(shift, addr, 3);
-+  slli(shift, shift, 3);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  andi(aligned_addr, addr, ~3);
++    // We are taking a branch.  Increment the not taken count.
++    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
 +
-+  if (size == int8) {
-+    mv(mask, 0xff);
-+  } else {
-+    mv(mask, -1);
-+    zero_extend(mask, mask, 16);
++    // The method data pointer needs to be updated to correspond to
++    // the next bytecode
++    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
++    bind(profile_continue);
 +  }
-+  sll(mask, mask, shift);
-+
-+  xori(not_mask, mask, -1);
-+
-+  sll(expected, expected, shift);
-+  andr(expected, expected, mask);
-+
-+  sll(new_val, new_val, shift);
-+  andr(new_val, new_val, mask);
 +}
 +
-+// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
-+// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
-+// which are forced to work with 4-byte aligned address.
-+void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
-+                                          Register new_val,
-+                                          enum operand_size size,
-+                                          Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                                          Register result, bool result_as_bool,
-+                                          Register tmp1, Register tmp2, Register tmp3) {
-+  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
-+  assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
-+  cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
-+
-+  Label retry, fail, done;
++void InterpreterMacroAssembler::profile_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  bind(retry);
-+  lr_w(old, aligned_addr, acquire);
-+  andr(tmp, old, mask);
-+  bne(tmp, expected, fail);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  andr(tmp, old, not_mask);
-+  orr(tmp, tmp, new_val);
-+  sc_w(tmp, tmp, aligned_addr, release);
-+  bnez(tmp, retry);
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
 +
-+  if (result_as_bool) {
-+    mv(result, 1);
-+    j(done);
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
++    bind(profile_continue);
++  }
++}
 +
-+    bind(fail);
-+    mv(result, zr);
++void InterpreterMacroAssembler::profile_final_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+    bind(done);
-+  } else {
-+    andr(tmp, old, mask);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+    bind(fail);
-+    srl(result, tmp, shift);
-+  }
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
 +
-+  if (size == int8) {
-+    sign_extend(result, result, 8);
-+  } else if (size == int16) {
-+    sign_extend(result, result, 16);
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
 +  }
 +}
 +
-+// weak cmpxchg narrow value will kill t0, t1, expected, new_val and tmps.
-+// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
-+// the weak CAS stuff. The major difference is that it just failed when store conditional
-+// failed.
-+void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
-+                                               Register new_val,
-+                                               enum operand_size size,
-+                                               Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                                               Register result,
-+                                               Register tmp1, Register tmp2, Register tmp3) {
-+  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
-+  assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
-+  cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
 +
-+  Label fail, done;
++void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
++                                                     Register mdp,
++                                                     Register reg2,
++                                                     bool receiver_can_be_null) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  lr_w(old, aligned_addr, acquire);
-+  andr(tmp, old, mask);
-+  bne(tmp, expected, fail);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  andr(tmp, old, not_mask);
-+  orr(tmp, tmp, new_val);
-+  sc_w(tmp, tmp, aligned_addr, release);
-+  bnez(tmp, fail);
++    Label skip_receiver_profile;
++    if (receiver_can_be_null) {
++      Label not_null;
++      // We are making a call.  Increment the count for null receiver.
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++      j(skip_receiver_profile);
++      bind(not_null);
++    }
 +
-+  // Success
-+  mv(result, 1);
-+  j(done);
++    // Record the receiver type.
++    record_klass_in_profile(receiver, mdp, reg2, true);
++    bind(skip_receiver_profile);
 +
-+  // Fail
-+  bind(fail);
-+  mv(result, zr);
++    // The method data pointer needs to be updated to reflect the new target.
 +
-+  bind(done);
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
 +}
 +
-+void MacroAssembler::cmpxchg(Register addr, Register expected,
-+                             Register new_val,
-+                             enum operand_size size,
-+                             Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                             Register result, bool result_as_bool) {
-+  assert(size != int8 && size != int16, "unsupported operand size");
-+
-+  Label retry_load, done, ne_done;
-+  bind(retry_load);
-+  load_reserved(addr, size, acquire);
-+  bne(t0, expected, ne_done);
-+  store_conditional(addr, new_val, size, release);
-+  bnez(t0, retry_load);
++// This routine creates a state machine for updating the multi-row
++// type profile at a virtual call site (or other type-sensitive bytecode).
++// The machine visits each row (of receiver/count) until the receiver type
++// is found, or until it runs out of rows.  At the same time, it remembers
++// the location of the first empty row.  (An empty row records null for its
++// receiver, and can be allocated for a newly-observed receiver type.)
++// Because there are two degrees of freedom in the state, a simple linear
++// search will not work; it must be a decision tree.  Hence this helper
++// function is recursive, to generate the required tree structured code.
++// It's the interpreter, so we are trading off code space for speed.
++// See below for example code.
++void InterpreterMacroAssembler::record_klass_in_profile_helper(
++                                Register receiver, Register mdp,
++                                Register reg2,
++                                Label& done, bool is_virtual_call) {
++  if (TypeProfileWidth == 0) {
++    if (is_virtual_call) {
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++    }
 +
-+  // equal, succeed
-+  if (result_as_bool) {
-+    mv(result, 1);
 +  } else {
-+    mv(result, expected);
-+  }
-+  j(done);
++    int non_profiled_offset = -1;
++    if (is_virtual_call) {
++      non_profiled_offset = in_bytes(CounterData::count_offset());
++    }
 +
-+  // not equal, failed
-+  bind(ne_done);
-+  if (result_as_bool) {
-+    mv(result, zr);
-+  } else {
-+    mv(result, t0);
++    record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
++      &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
 +  }
-+
-+  bind(done);
 +}
 +
-+void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
-+                                  Register new_val,
-+                                  enum operand_size size,
-+                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                                  Register result) {
-+  assert(size != int8 && size != int16, "unsupported operand size");
-+
-+  Label fail, done;
-+  load_reserved(addr, size, acquire);
-+  bne(t0, expected, fail);
-+  store_conditional(addr, new_val, size, release);
-+  bnez(t0, fail);
-+
-+  // Success
-+  mv(result, 1);
-+  j(done);
++void InterpreterMacroAssembler::record_item_in_profile_helper(
++  Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows,
++  OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) {
++  int last_row = total_rows - 1;
++  assert(start_row <= last_row, "must be work left to do");
++  // Test this row for both the item and for null.
++  // Take any of three different outcomes:
++  //   1. found item => increment count and goto done
++  //   2. found null => keep looking for case 1, maybe allocate this cell
++  //   3. found something else => keep looking for cases 1 and 2
++  // Case 3 is handled by a recursive call.
++  for (int row = start_row; row <= last_row; row++) {
++    Label next_test;
++    bool test_for_null_also = (row == start_row);
 +
-+  // Fail
-+  bind(fail);
-+  mv(result, zr);
++    // See if the item is item[n].
++    int item_offset = in_bytes(item_offset_fn(row));
++    test_mdp_data_at(mdp, item_offset, item,
++                     (test_for_null_also ? reg2 : noreg),
++                     next_test);
++    // (Reg2 now contains the item from the CallData.)
 +
-+  bind(done);
-+}
++    // The item is item[n].  Increment count[n].
++    int count_offset = in_bytes(item_count_offset_fn(row));
++    increment_mdp_data_at(mdp, count_offset);
++    j(done);
++    bind(next_test);
 +
-+#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE)                                              \
-+void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
-+  prev = prev->is_valid() ? prev : zr;                                                      \
-+  if (incr.is_register()) {                                                                 \
-+    AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE));              \
-+  } else {                                                                                  \
-+    mv(t0, incr.as_constant());                                                             \
-+    AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE));                              \
-+  }                                                                                         \
-+  return;                                                                                   \
-+}
++    if (test_for_null_also) {
++      Label found_null;
++      // Failed the equality check on item[n]...  Test for null.
++      if (start_row == last_row) {
++        // The only thing left to do is handle the null case.
++        if (non_profiled_offset >= 0) {
++          beqz(reg2, found_null);
++          // Item did not match any saved item and there is no empty row for it.
++          // Increment total counter to indicate polymorphic case.
++          increment_mdp_data_at(mdp, non_profiled_offset);
++          j(done);
++          bind(found_null);
++        } else {
++          bnez(reg2, done);
++        }
++        break;
++      }
++      // Since null is rare, make it be the branch-taken case.
++      beqz(reg2, found_null);
 +
-+ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed)
-+ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed)
-+ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl)
-+ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl)
++      // Put all the "Case 3" tests here.
++      record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
++        item_offset_fn, item_count_offset_fn, non_profiled_offset);
 +
-+#undef ATOMIC_OP
++      // Found a null.  Keep searching for a matching item,
++      // but remember that this is an empty (unused) slot.
++      bind(found_null);
++    }
++  }
 +
-+#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE)                                       \
-+void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) {      \
-+  prev = prev->is_valid() ? prev : zr;                                               \
-+  AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE));                       \
-+  return;                                                                            \
++  // In the fall-through case, we found no matching item, but we
++  // observed the item[start_row] is NULL.
++  // Fill in the item field and increment the count.
++  int item_offset = in_bytes(item_offset_fn(start_row));
++  set_mdp_data_at(mdp, item_offset, item);
++  int count_offset = in_bytes(item_count_offset_fn(start_row));
++  mv(reg2, DataLayout::counter_increment);
++  set_mdp_data_at(mdp, count_offset, reg2);
++  if (start_row > 0) {
++    j(done);
++  }
 +}
 +
-+ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed)
-+ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed)
-+ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl)
-+ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
++// Example state machine code for three profile rows:
++//   # main copy of decision tree, rooted at row[1]
++//   if (row[0].rec == rec) then [
++//     row[0].incr()
++//     goto done
++//   ]
++//   if (row[0].rec != NULL) then [
++//     # inner copy of decision tree, rooted at row[1]
++//     if (row[1].rec == rec) then [
++//       row[1].incr()
++//       goto done
++//     ]
++//     if (row[1].rec != NULL) then [
++//       # degenerate decision tree, rooted at row[2]
++//       if (row[2].rec == rec) then [
++//         row[2].incr()
++//         goto done
++//       ]
++//       if (row[2].rec != NULL) then [
++//         count.incr()
++//         goto done
++//       ] # overflow
++//       row[2].init(rec)
++//       goto done
++//     ] else [
++//       # remember row[1] is empty
++//       if (row[2].rec == rec) then [
++//         row[2].incr()
++//         goto done
++//       ]
++//       row[1].init(rec)
++//       goto done
++//     ]
++//   else [
++//     # remember row[0] is empty
++//     if (row[1].rec == rec) then [
++//       row[1].incr()
++//       goto done
++//     ]
++//     if (row[2].rec == rec) then [
++//       row[2].incr()
++//       goto done
++//     ]
++//     row[0].init(rec)
++//     goto done
++//   ]
++//   done:
 +
-+#undef ATOMIC_XCHG
++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
++                                                        Register mdp, Register reg2,
++                                                        bool is_virtual_call) {
++  assert(ProfileInterpreter, "must be profiling");
++  Label done;
 +
-+#define ATOMIC_XCHGU(OP1, OP2)                                                       \
-+void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) {     \
-+  atomic_##OP2(prev, newv, addr);                                                    \
-+  zero_extend(prev, prev, 32);                                                       \
-+  return;                                                                            \
-+}
++  record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call);
 +
-+ATOMIC_XCHGU(xchgwu, xchgw)
-+ATOMIC_XCHGU(xchgalwu, xchgalw)
++  bind(done);
++}
 +
-+#undef ATOMIC_XCHGU
++void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) {
-+  assert(UseBiasedLocking, "why call this otherwise?");
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  // Check for biased locking unlock case, which is a no-op
-+  // Note: we do not have to check the thread ID for two reasons.
-+  // First, the interpreter checks for IllegalMonitorStateException at
-+  // a higher level. Second, if the bias was revoked while we held the
-+  // lock, the object could not be rebiased toward another thread, so
-+  // the bias bit would be clear.
-+  ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-+  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); // 1 << 3
-+  sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern);
-+  if (flag->is_valid()) { mv(flag, tmp_reg); }
-+  beqz(tmp_reg, done);
-+}
++    // Update the total ret count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
 +
-+void MacroAssembler::load_prototype_header(Register dst, Register src) {
-+  load_klass(dst, src);
-+  ld(dst, Address(dst, Klass::prototype_header_offset()));
-+}
++    for (uint row = 0; row < RetData::row_limit(); row++) {
++      Label next_test;
 +
-+int MacroAssembler::biased_locking_enter(Register lock_reg,
-+                                         Register obj_reg,
-+                                         Register swap_reg,
-+                                         Register tmp_reg,
-+                                         bool swap_reg_contains_mark,
-+                                         Label& done,
-+                                         Label* slow_case,
-+                                         BiasedLockingCounters* counters,
-+                                         Register flag) {
-+  assert(UseBiasedLocking, "why call this otherwise?");
-+  assert_different_registers(lock_reg, obj_reg, swap_reg);
++      // See if return_bci is equal to bci[n]:
++      test_mdp_data_at(mdp,
++                       in_bytes(RetData::bci_offset(row)),
++                       return_bci, noreg,
++                       next_test);
 +
-+  if (PrintBiasedLockingStatistics && counters == NULL) {
-+    counters = BiasedLocking::counters();
-+  }
++      // return_bci is equal to bci[n].  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
 +
-+  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0, flag);
-+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
-+  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
++      // The method data pointer needs to be updated to reflect the new target.
++      update_mdp_by_offset(mdp,
++                           in_bytes(RetData::bci_displacement_offset(row)));
++      j(profile_continue);
++      bind(next_test);
++    }
 +
-+  // Biased locking
-+  // See whether the lock is currently biased toward our thread and
-+  // whether the epoch is still valid
-+  // Note that the runtime guarantees sufficient alignment of JavaThread
-+  // pointers to allow age to be placed into low bits
-+  // First check to see whether biasing is even enabled for this object
-+  Label cas_label;
-+  int null_check_offset = -1;
-+  if (!swap_reg_contains_mark) {
-+    null_check_offset = offset();
-+    ld(swap_reg, mark_addr);
-+  }
-+  andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place);
-+  xori(t0, tmp_reg, markOopDesc::biased_lock_pattern);
-+  bnez(t0, cas_label); // don't care flag unless jumping to done
-+  // The bias pattern is present in the object's header. Need to check
-+  // whether the bias owner and the epoch are both still current.
-+  load_prototype_header(tmp_reg, obj_reg);
-+  orr(tmp_reg, tmp_reg, xthread);
-+  xorr(tmp_reg, swap_reg, tmp_reg);
-+  andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place));
-+  if (flag->is_valid()) {
-+    mv(flag, tmp_reg);
-+  }
++    update_mdp_for_ret(return_bci);
 +
-+  if (counters != NULL) {
-+    Label around;
-+    bnez(tmp_reg, around);
-+    atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0);
-+    j(done);
-+    bind(around);
-+  } else {
-+    beqz(tmp_reg, done);
++    bind(profile_continue);
 +  }
++}
 +
-+  Label try_revoke_bias;
-+  Label try_rebias;
-+
-+  // At this point we know that the header has the bias pattern and
-+  // that we are not the bias owner in the current epoch. We need to
-+  // figure out more details about the state of the header in order to
-+  // know what operations can be legally performed on the object's
-+  // header.
-+
-+  // If the low three bits in the xor result aren't clear, that means
-+  // the prototype header is no longer biased and we have to revoke
-+  // the bias on this object.
-+  andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place);
-+  bnez(t0, try_revoke_bias);
++void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  // Biasing is still enabled for this data type. See whether the
-+  // epoch of the current bias is still valid, meaning that the epoch
-+  // bits of the mark word are equal to the epoch bits of the
-+  // prototype header. (Note that the prototype header's epoch bits
-+  // only change at a safepoint.) If not, attempt to rebias the object
-+  // toward the current thread. Note that we must be absolutely sure
-+  // that the current epoch is invalid in order to do this because
-+  // otherwise the manipulations it performs on the mark word are
-+  // illegal.
-+  andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place);
-+  bnez(t0, try_rebias);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  // The epoch of the current bias is still valid but we know nothing
-+  // about the owner; it might be set or it might be clear. Try to
-+  // acquire the bias of the object using an atomic operation. If this
-+  // fails we will go in to the runtime to revoke the object's bias.
-+  // Note that we first construct the presumed unbiased header so we
-+  // don't accidentally blow away another thread's valid bias.
-+  {
-+    Label cas_success;
-+    Label counter;
-+    mv(t0, (int64_t)(markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
-+    andr(swap_reg, swap_reg, t0);
-+    orr(tmp_reg, swap_reg, xthread);
-+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
-+    // cas failed here if slow_cass == NULL
-+    if (flag->is_valid()) {
-+      mv(flag, 1);
-+      j(counter);
-+    }
++    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
 +
-+    // If the biasing toward our thread failed, this means that
-+    // another thread succeeded in biasing it toward itself and we
-+    // need to revoke that bias. The revocation will occur in the
-+    // interpreter runtime in the slow case.
-+    bind(cas_success);
-+    if (flag->is_valid()) {
-+      mv(flag, 0);
-+      bind(counter);
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
 +    }
++    update_mdp_by_constant(mdp, mdp_delta);
 +
-+    if (counters != NULL) {
-+      atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()),
-+                  tmp_reg, t0);
-+    }
++    bind(profile_continue);
 +  }
-+  j(done);
++}
 +
-+  bind(try_rebias);
-+  // At this point we know the epoch has expired, meaning that the
-+  // current "bias owner", if any, is actually invalid. Under these
-+  // circumstances _only_, we are allowed to use the current header's
-+  // value as the comparison value when doing the cas to acquire the
-+  // bias in the current epoch. In other words, we allow transfer of
-+  // the bias from one thread to another directly in this situation.
-+  //
-+  // FIXME: due to a lack of registers we currently blow away the age
-+  // bits in this situation. Should attempt to preserve them.
-+  {
-+    Label cas_success;
-+    Label counter;
-+    load_prototype_header(tmp_reg, obj_reg);
-+    orr(tmp_reg, xthread, tmp_reg);
-+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
-+    // cas failed here if slow_cass == NULL
-+    if (flag->is_valid()) {
-+      mv(flag, 1);
-+      j(counter);
-+    }
++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
++    if (ProfileInterpreter && TypeProfileCasts) {
++    Label profile_continue;
 +
-+    // If the biasing toward our thread failed, then another thread
-+    // succeeded in biasing it toward itself and we need to revoke that
-+    // bias. The revocation will occur in the runtime in the slow case.
-+    bind(cas_success);
-+    if (flag->is_valid()) {
-+      mv(flag, 0);
-+      bind(counter);
-+    }
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+    if (counters != NULL) {
-+      atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()),
-+                  tmp_reg, t0);
-+    }
-+  }
-+  j(done);
++    int count_offset = in_bytes(CounterData::count_offset());
++    // Back up the address, since we have already bumped the mdp.
++    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
 +
-+  // don't care flag unless jumping to done
-+  bind(try_revoke_bias);
-+  // The prototype mark in the klass doesn't have the bias bit set any
-+  // more, indicating that objects of this data type are not supposed
-+  // to be biased any more. We are going to try to reset the mark of
-+  // this object to the prototype value and fall through to the
-+  // CAS-based locking scheme. Note that if our CAS fails, it means
-+  // that another thread raced us for the privilege of revoking the
-+  // bias of this particular object, so it's okay to continue in the
-+  // normal locking code.
-+  //
-+  // FIXME: due to a lack of registers we currently blow away the age
-+  // bits in this situation. Should attempt to preserve them.
-+  {
-+    Label cas_success, nope;
-+    load_prototype_header(tmp_reg, obj_reg);
-+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope);
-+    bind(cas_success);
++    // *Decrement* the counter.  We expect to see zero or small negatives.
++    increment_mdp_data_at(mdp, count_offset, true);
 +
-+    // Fall through to the normal CAS-based lock, because no matter what
-+    // the result of the above CAS, some thread must have succeeded in
-+    // removing the bias bit from the object's header.
-+    if (counters != NULL) {
-+      atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg,
-+                  t0);
-+    }
-+    bind(nope);
++    bind (profile_continue);
 +  }
++}
 +
-+  bind(cas_label);
++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  return null_check_offset;
-+}
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) {
-+  Label retry_load;
-+  bind(retry_load);
-+  // flush and load exclusive from the memory location
-+  lr_w(tmp, counter_addr);
-+  addw(tmp, tmp, 1);
-+  // if we store+flush with no intervening write tmp wil be zero
-+  sc_w(tmp, tmp, counter_addr);
-+  bnez(tmp, retry_load);
-+}
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
 +
-+void MacroAssembler::far_jump(Address entry, Register tmp) {
-+  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
-+  assert(CodeCache::find_blob(entry.target()) != NULL,
-+         "destination of far call not found in code cache");
-+  int32_t offset = 0;
-+  if (far_branches()) {
-+    // We can use auipc + jalr here because we know that the total size of
-+    // the code cache cannot exceed 2Gb.
-+    la_patchable(tmp, entry, offset);
-+    jalr(x0, tmp, offset);
-+  } else {
-+    j(entry);
++      // Record the object type.
++      record_klass_in_profile(klass, mdp, reg2, false);
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
 +  }
 +}
 +
-+void MacroAssembler::far_call(Address entry, Register tmp) {
-+  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
-+  assert(CodeCache::find_blob(entry.target()) != NULL,
-+         "destination of far call not found in code cache");
-+  int32_t offset = 0;
-+  if (far_branches()) {
-+    // We can use auipc + jalr here because we know that the total size of
-+    // the code cache cannot exceed 2Gb.
-+    la_patchable(tmp, entry, offset);
-+    jalr(x1, tmp, offset); // link
-+  } else {
-+    jal(entry); // link
++void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the default case count
++    increment_mdp_data_at(mdp,
++                          in_bytes(MultiBranchData::default_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         in_bytes(MultiBranchData::
++                                  default_displacement_offset()));
++
++    bind(profile_continue);
 +  }
 +}
 +
-+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
-+                                                   Register super_klass,
-+                                                   Register tmp_reg,
-+                                                   Label* L_success,
-+                                                   Label* L_failure,
-+                                                   Label* L_slow_path,
-+                                                   Register super_check_offset) {
-+  assert_different_registers(sub_klass, super_klass, tmp_reg);
-+  bool must_load_sco = (super_check_offset == noreg);
-+  if (must_load_sco) {
-+    assert(tmp_reg != noreg, "supply either a tmp or a register offset");
-+  } else {
-+    assert_different_registers(sub_klass, super_klass, super_check_offset);
-+  }
++void InterpreterMacroAssembler::profile_switch_case(Register index,
++                                                    Register mdp,
++                                                    Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  Label L_fallthrough;
-+  int label_nulls = 0;
-+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
-+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
-+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
-+  assert(label_nulls <= 1, "at most one NULL in batch");
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
-+  int sco_offset = in_bytes(Klass::super_check_offset_offset());
-+  Address super_check_offset_addr(super_klass, sco_offset);
++    // Build the base (index * per_case_size_in_bytes()) +
++    // case_array_offset_in_bytes()
++    mvw(reg2, in_bytes(MultiBranchData::per_case_size()));
++    mvw(t0, in_bytes(MultiBranchData::case_array_offset()));
++    Assembler::mul(index, index, reg2);
++    Assembler::add(index, index, t0);
 +
-+  // Hacked jmp, which may only be used just before L_fallthrough.
-+#define final_jmp(label)                                                \
-+  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
-+  else                            j(label)             /*omit semi*/
++    // Update the case count
++    increment_mdp_data_at(mdp,
++                          index,
++                          in_bytes(MultiBranchData::relative_count_offset()));
 +
-+  // If the pointers are equal, we are done (e.g., String[] elements).
-+  // This self-check enables sharing of secondary supertype arrays among
-+  // non-primary types such as array-of-interface. Otherwise, each such
-+  // type would need its own customized SSA.
-+  // We move this check to the front fo the fast path because many
-+  // type checks are in fact trivially successful in this manner,
-+  // so we get a nicely predicted branch right at the start of the check.
-+  beq(sub_klass, super_klass, *L_success);
++    // The method data pointer need to be updated.
++    update_mdp_by_offset(mdp,
++                         index,
++                         in_bytes(MultiBranchData::
++                                  relative_displacement_offset()));
 +
-+  // Check the supertype display:
-+  if (must_load_sco) {
-+    lwu(tmp_reg, super_check_offset_addr);
-+    super_check_offset = tmp_reg;
++    bind(profile_continue);
 +  }
-+  add(t0, sub_klass, super_check_offset);
-+  Address super_check_addr(t0);
-+  ld(t0, super_check_addr); // load displayed supertype
++}
 +
-+  // Ths check has worked decisively for primary supers.
-+  // Secondary supers are sought in the super_cache ('super_cache_addr').
-+  // (Secondary supers are interfaces and very deeply nested subtypes.)
-+  // This works in the same check above because of a tricky aliasing
-+  // between the super_Cache and the primary super dispaly elements.
-+  // (The 'super_check_addr' can address either, as the case requires.)
-+  // Note that the cache is updated below if it does not help us find
-+  // what we need immediately.
-+  // So if it was a primary super, we can just fail immediately.
-+  // Otherwise, it's the slow path for us (no success at this point).
++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; }
 +
-+  beq(super_klass, t0, *L_success);
-+  mv(t1, sc_offset);
-+  if (L_failure == &L_fallthrough) {
-+    beq(super_check_offset, t1, *L_slow_path);
-+  } else {
-+    bne(super_check_offset, t1, *L_failure, /* is_far */ true);
-+    final_jmp(*L_slow_path);
++void InterpreterMacroAssembler::notify_method_entry() {
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
++    beqz(x13, L);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::post_method_entry));
++    bind(L);
 +  }
 +
-+  bind(L_fallthrough);
-+
-+#undef final_jmp
-+}
-+
-+// Scans count pointer sized words at [addr] for occurence of value,
-+// generic
-+void MacroAssembler::repne_scan(Register addr, Register value, Register count,
-+                                Register tmp) {
-+  Label Lloop, Lexit;
-+  beqz(count, Lexit);
-+  bind(Lloop);
-+  ld(tmp, addr);
-+  beq(value, tmp, Lexit);
-+  add(addr, addr, wordSize);
-+  sub(count, count, 1);
-+  bnez(count, Lloop);
-+  bind(Lexit);
-+}
-+
-+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
-+                                                   Register super_klass,
-+                                                   Register tmp_reg,
-+                                                   Register tmp2_reg,
-+                                                   Label* L_success,
-+                                                   Label* L_failure) {
-+  assert_different_registers(sub_klass, super_klass, tmp_reg);
-+  if (tmp2_reg != noreg) {
-+    assert_different_registers(sub_klass, super_klass, tmp_reg, tmp2_reg, t0);
++  {
++    SkipIfEqual skip(this, &DTraceMethodProbes, false);
++    get_method(c_rarg1);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++                 xthread, c_rarg1);
 +  }
-+#define IS_A_TEMP(reg) ((reg) == tmp_reg || (reg) == tmp2_reg)
 +
-+  Label L_fallthrough;
-+  int label_nulls = 0;
-+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
-+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  // RedefineClasses() tracing support for obsolete method entry
++  if (log_is_enabled(Trace, redefine, class, obsolete)) {
++    get_method(c_rarg1);
++    call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
++      xthread, c_rarg1);
++  }
++}
 +
-+  assert(label_nulls <= 1, "at most one NULL in the batch");
 +
-+  // A couple of useful fields in sub_klass:
-+  int ss_offset = in_bytes(Klass::secondary_supers_offset());
-+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
-+  Address secondary_supers_addr(sub_klass, ss_offset);
-+  Address super_cache_addr(     sub_klass, sc_offset);
++void InterpreterMacroAssembler::notify_method_exit(
++    TosState state, NotifyMethodExitMode mode) {
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    // Note: frame::interpreter_frame_result has a dependency on how the
++    // method result is saved across the call to post_method_exit. If this
++    // is changed then the interpreter_frame_result implementation will
++    // need to be updated too.
 +
-+  BLOCK_COMMENT("check_klass_subtype_slow_path");
++    // template interpreter will leave the result on the top of the stack.
++    push(state);
++    lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
++    beqz(x13, L);
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
++    bind(L);
++    pop(state);
++  }
 +
-+  // Do a linear scan of the secondary super-klass chain.
-+  // This code is rarely used, so simplicity is a virtue here.
-+  // The repne_scan instruction uses fixed registers, which we must spill.
-+  // Don't worry too much about pre-existing connecitons with the input regs.
++  {
++    SkipIfEqual skip(this, &DTraceMethodProbes, false);
++    push(state);
++    get_method(c_rarg1);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++                 xthread, c_rarg1);
++    pop(state);
++  }
++}
 +
-+  assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
-+  assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
 +
-+  RegSet pushed_registers;
-+  if (!IS_A_TEMP(x12)) {
-+    pushed_registers += x12;
++// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
++                                                        int increment, Address mask,
++                                                        Register tmp1, Register tmp2,
++                                                        bool preloaded, Label* where) {
++  Label done;
++  if (!preloaded) {
++    lwu(tmp1, counter_addr);
 +  }
-+  if (!IS_A_TEMP(x15)) {
-+    pushed_registers += x15;
++  add(tmp1, tmp1, increment);
++  sw(tmp1, counter_addr);
++  lwu(tmp2, mask);
++  andr(tmp1, tmp1, tmp2);
++  bnez(tmp1, done);
++  j(*where); // offset is too large so we have to use j instead of beqz here
++  bind(done);
++}
++
++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
++                                                  int number_of_arguments) {
++  // interpreter specific
++  //
++  // Note: No need to save/restore rbcp & rlocals pointer since these
++  //       are callee saved registers and no blocking/ GC can happen
++  //       in leaf calls.
++#ifdef ASSERT
++  {
++   Label L;
++   ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++   beqz(t0, L);
++   stop("InterpreterMacroAssembler::call_VM_leaf_base:"
++        " last_sp != NULL");
++   bind(L);
 +  }
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
++}
 +
-+  if (super_klass != x10 || UseCompressedOops) {
-+    if (!IS_A_TEMP(x10)) {
-+      pushed_registers += x10;
-+    }
++void InterpreterMacroAssembler::call_VM_base(Register oop_result,
++                                             Register java_thread,
++                                             Register last_java_sp,
++                                             address  entry_point,
++                                             int      number_of_arguments,
++                                             bool     check_exceptions) {
++  // interpreter specific
++  //
++  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
++  //       really make a difference for these runtime calls, since they are
++  //       slow anyway. Btw., bcp must be saved/restored since it may change
++  //       due to GC.
++  save_bcp();
++#ifdef ASSERT
++  {
++    Label L;
++    ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++    beqz(t0, L);
++    stop("InterpreterMacroAssembler::call_VM_base:"
++         " last_sp != NULL");
++    bind(L);
 +  }
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp,
++                               entry_point, number_of_arguments,
++                               check_exceptions);
++// interpreter specific
++  restore_bcp();
++  restore_locals();
++}
 +
-+  push_reg(pushed_registers, sp);
++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) {
++  assert_different_registers(obj, tmp, t0, mdo_addr.base());
++  Label update, next, none;
 +
-+  // Get super_klass value into x10 (even if it was in x15 or x12)
-+  mv(x10, super_klass);
++  verify_oop(obj);
 +
-+#ifndef PRODUCT
-+  mv(t1, (address)&SharedRuntime::_partial_subtype_ctr);
-+  Address pst_counter_addr(t1);
-+  ld(t0, pst_counter_addr);
-+  add(t0, t0, 1);
-+  sd(t0, pst_counter_addr);
-+#endif // PRODUCT
++  bnez(obj, update);
++  orptr(mdo_addr, TypeEntries::null_seen, t0, tmp);
++  j(next);
 +
-+  // We will consult the secondary-super array.
-+  ld(x15, secondary_supers_addr);
-+  // Load the array length.
-+  lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes()));
-+  // Skip to start of data.
-+  add(x15, x15, Array<Klass*>::base_offset_in_bytes());
++  bind(update);
++  load_klass(obj, obj);
 +
-+  // Set t0 to an obvious invalid value, falling through by default
-+  mv(t0, -1);
-+  // Scan X12 words at [X15] for an occurrence of X10.
-+  repne_scan(x15, x10, x12, t0);
++  ld(t0, mdo_addr);
++  xorr(obj, obj, t0);
++  andi(t0, obj, TypeEntries::type_klass_mask);
++  beqz(t0, next); // klass seen before, nothing to
++                  // do. The unknown bit may have been
++                  // set already but no need to check.
 +
-+  // pop will restore x10, so we should use a temp register to keep its value
-+  mv(t1, x10);
++  andi(t0, obj, TypeEntries::type_unknown);
++  bnez(t0, next);
++  // already unknown. Nothing to do anymore.
 +
-+  // Unspill the temp. registers:
-+  pop_reg(pushed_registers, sp);
++  ld(t0, mdo_addr);
++  beqz(t0, none);
++  li(tmp, (u1)TypeEntries::null_seen);
++  beq(t0, tmp, none);
++  // There is a chance that the checks above (re-reading profiling
++  // data from memory) fail if another thread has just set the
++  // profiling to this obj's klass
++  ld(t0, mdo_addr);
++  xorr(obj, obj, t0);
++  andi(t0, obj, TypeEntries::type_klass_mask);
++  beqz(t0, next);
 +
-+  bne(t1, t0, *L_failure);
++  // different than before. Cannot keep accurate profile.
++  orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp);
++  j(next);
 +
-+  // Success. Cache the super we found an proceed in triumph.
-+  sd(super_klass, super_cache_addr);
++  bind(none);
++  // first time here. Set profile type.
++  sd(obj, mdo_addr);
 +
-+  if (L_success != &L_fallthrough) {
-+    j(*L_success);
++  bind(next);
++}
++
++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
++  if (!ProfileInterpreter) {
++    return;
 +  }
 +
-+#undef IS_A_TEMP
++  if (MethodData::profile_arguments() || MethodData::profile_return()) {
++    Label profile_continue;
 +
-+  bind(L_fallthrough);
-+}
++    test_method_data_pointer(mdp, profile_continue);
 +
-+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
-+void MacroAssembler::tlab_allocate(Register obj,
-+                                   Register var_size_in_bytes,
-+                                   int con_size_in_bytes,
-+                                   Register tmp1,
-+                                   Register tmp2,
-+                                   Label& slow_case,
-+                                   bool is_far) {
-+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far);
-+}
++    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
 +
-+// Defines obj, preserves var_size_in_bytes
-+void MacroAssembler::eden_allocate(Register obj,
-+                                   Register var_size_in_bytes,
-+                                   int con_size_in_bytes,
-+                                   Register tmp1,
-+                                   Label& slow_case,
-+                                   bool is_far) {
-+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, slow_case, is_far);
-+}
++    lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start));
++    if (is_virtual) {
++      li(tmp, (u1)DataLayout::virtual_call_type_data_tag);
++      bne(t0, tmp, profile_continue);
++    } else {
++      li(tmp, (u1)DataLayout::call_type_data_tag);
++      bne(t0, tmp, profile_continue);
++    }
 +
++    // calculate slot step
++    static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0));
++    static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0;
 +
-+// get_thread() can be called anywhere inside generated code so we
-+// need to save whatever non-callee save context might get clobbered
-+// by the call to Thread::current() or, indeed, the call setup code
-+void MacroAssembler::get_thread(Register thread) {
-+  // save all call-clobbered regs except thread
-+  RegSet saved_regs = RegSet::of(x10) + ra - thread;
-+  push_reg(saved_regs, sp);
++    // calculate type step
++    static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0));
++    static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0;
 +
-+  mv(ra, CAST_FROM_FN_PTR(address, Thread::current));
-+  jalr(ra);
-+  if (thread != c_rarg0) {
-+    mv(thread, c_rarg0);
-+  }
++    if (MethodData::profile_arguments()) {
++      Label done, loop, loopEnd, profileArgument, profileReturnType;
++      RegSet pushed_registers;
++      pushed_registers += x15;
++      pushed_registers += x16;
++      pushed_registers += x17;
++      Register mdo_addr = x15;
++      Register index = x16;
++      Register off_to_args = x17;
++      push_reg(pushed_registers, sp);
 +
-+  // restore pushed registers
-+  pop_reg(saved_regs, sp);
-+}
++      mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset()));
++      mv(t0, TypeProfileArgsLimit);
++      beqz(t0, loopEnd);
 +
-+void MacroAssembler::load_byte_map_base(Register reg) {
-+  jbyte *byte_map_base = ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
-+  mv(reg, (uint64_t)byte_map_base);
-+}
++      mv(index, zr); // index < TypeProfileArgsLimit
++      bind(loop);
++      bgtz(index, profileReturnType);
++      li(t0, (int)MethodData::profile_return());
++      beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false
++      bind(profileReturnType);
++      // If return value type is profiled we may have no argument to profile
++      ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
++      mv(t1, - TypeStackSlotEntries::per_arg_count());
++      mul(t1, index, t1);
++      add(tmp, tmp, t1);
++      li(t1, TypeStackSlotEntries::per_arg_count());
++      add(t0, mdp, off_to_args);
++      blt(tmp, t1, done);
 +
-+void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
-+  relocInfo::relocType rtype = dest.rspec().reloc()->type();
-+  unsigned long low_address = (uintptr_t)CodeCache::low_bound();
-+  unsigned long high_address = (uintptr_t)CodeCache::high_bound();
-+  unsigned long dest_address = (uintptr_t)dest.target();
-+  long offset_low = dest_address - low_address;
-+  long offset_high = dest_address - high_address;
++      bind(profileArgument);
 +
-+  assert(is_valid_riscv64_address(dest.target()), "bad address");
-+  assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
++      ld(tmp, Address(callee, Method::const_offset()));
++      load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset()));
++      // stack offset o (zero based) from the start of the argument
++      // list, for n arguments translates into offset n - o - 1 from
++      // the end of the argument list
++      li(t0, stack_slot_offset0);
++      li(t1, slot_step);
++      mul(t1, index, t1);
++      add(t0, t0, t1);
++      add(t0, mdp, t0);
++      ld(t0, Address(t0));
++      sub(tmp, tmp, t0);
++      addi(tmp, tmp, -1);
++      Address arg_addr = argument_address(tmp);
++      ld(tmp, arg_addr);
 +
-+  code_section()->relocate(pc(), dest.rspec());
-+  // RISC-V doesn't compute a page-aligned address, in order to partially
-+  // compensate for the use of *signed* offsets in its base+disp12
-+  // addressing mode (RISC-V's PC-relative reach remains asymmetric
-+  // [-(2G + 2K), 2G - 2K)).
-+  if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
-+    int64_t distance = dest.target() - pc();
-+    auipc(reg1, (int32_t)distance + 0x800);
-+    offset = ((int32_t)distance << 20) >> 20;
-+  } else {
-+    movptr_with_offset(reg1, dest.target(), offset);
-+  }
-+}
++      li(t0, argument_type_offset0);
++      li(t1, type_step);
++      mul(t1, index, t1);
++      add(t0, t0, t1);
++      add(mdo_addr, mdp, t0);
++      Address mdo_arg_addr(mdo_addr, 0);
++      profile_obj_type(tmp, mdo_arg_addr, t1);
 +
-+void MacroAssembler::build_frame(int framesize) {
-+  assert(framesize > 0, "framesize must be > 0");
-+  sub(sp, sp, framesize);
-+  sd(fp, Address(sp, framesize - 2 * wordSize));
-+  sd(ra, Address(sp, framesize - wordSize));
-+  if (PreserveFramePointer) { add(fp, sp, framesize); }
-+}
++      int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
++      addi(off_to_args, off_to_args, to_add);
 +
-+void MacroAssembler::remove_frame(int framesize) {
-+  assert(framesize > 0, "framesize must be > 0");
-+  ld(fp, Address(sp, framesize - 2 * wordSize));
-+  ld(ra, Address(sp, framesize - wordSize));
-+  add(sp, sp, framesize);
-+}
++      // increment index by 1
++      addi(index, index, 1);
++      li(t1, TypeProfileArgsLimit);
++      blt(index, t1, loop);
++      bind(loopEnd);
 +
-+void MacroAssembler::reserved_stack_check() {
-+    // testing if reserved zone needs to be enabled
-+    Label no_reserved_zone_enabling;
++      if (MethodData::profile_return()) {
++        ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
++        addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count());
++      }
 +
-+    ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
-+    bltu(sp, t0, no_reserved_zone_enabling);
++      add(t0, mdp, off_to_args);
++      bind(done);
++      mv(mdp, t0);
 +
-+    enter();   // RA and FP are live.
-+    mv(c_rarg0, xthread);
-+    int32_t offset = 0;
-+    la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset);
-+    jalr(x1, t0, offset);
-+    leave();
++      // unspill the clobbered registers
++      pop_reg(pushed_registers, sp);
 +
-+    // We have already removed our own frame.
-+    // throw_delayed_StackOverflowError will think that it's been
-+    // called by our caller.
-+    offset = 0;
-+    la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset);
-+    jalr(x0, t0, offset);
-+    should_not_reach_here();
++      if (MethodData::profile_return()) {
++        // We're right after the type profile for the last
++        // argument. tmp is the number of cells left in the
++        // CallTypeData/VirtualCallTypeData to reach its end. Non null
++        // if there's a return to profile.
++        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
++        shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size));
++      }
++      sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++    } else {
++      assert(MethodData::profile_return(), "either profile call args or call ret");
++      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
++    }
 +
-+    bind(no_reserved_zone_enabling);
-+}
++    // mdp points right after the end of the
++    // CallTypeData/VirtualCallTypeData, right after the cells for the
++    // return value type if there's one
 +
-+// Move the address of the polling page into dest.
-+void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) {
-+  if (SafepointMechanism::uses_thread_local_poll()) {
-+    ld(dest, Address(xthread, Thread::polling_page_offset()));
-+  } else {
-+    uint64_t align = (uint64_t)page & 0xfff;
-+    assert(align == 0, "polling page must be page aligned");
-+    la_patchable(dest, Address(page, rtype), offset);
++    bind(profile_continue);
 +  }
 +}
 +
-+// Move the address of the polling page into dest.
-+void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) {
-+  int32_t offset = 0;
-+  get_polling_page(dest, page, offset, rtype);
-+  read_polling_page(dest, offset, rtype);
-+}
++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
++  assert_different_registers(mdp, ret, tmp, xbcp, t0, t1);
++  if (ProfileInterpreter && MethodData::profile_return()) {
++    Label profile_continue, done;
 +
-+// Read the polling page.  The address of the polling page must
-+// already be in r.
-+void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
-+  code_section()->relocate(pc(), rtype);
-+  lwu(zr, Address(r, offset));
-+}
++    test_method_data_pointer(mdp, profile_continue);
 +
-+void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
-+#ifdef ASSERT
-+  {
-+    ThreadInVMfromUnknown tiv;
-+    assert (UseCompressedOops, "should only be used for compressed oops");
-+    assert (Universe::heap() != NULL, "java heap should be initialized");
-+    assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-+    assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
++    if (MethodData::profile_return_jsr292_only()) {
++      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++      // If we don't profile all invoke bytecodes we must make sure
++      // it's a bytecode we indeed profile. We can't go back to the
++      // begining of the ProfileData we intend to update to check its
++      // type because we're right after it and we don't known its
++      // length
++      Label do_profile;
++      lbu(t0, Address(xbcp, 0));
++      li(tmp, (u1)Bytecodes::_invokedynamic);
++      beq(t0, tmp, do_profile);
++      li(tmp, (u1)Bytecodes::_invokehandle);
++      beq(t0, tmp, do_profile);
++      get_method(tmp);
++      lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
++      li(t1, static_cast<int>(vmIntrinsics::_compiledLambdaForm));
++      bne(t0, t1, profile_continue);
++      bind(do_profile);
++    }
++
++    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
++    mv(tmp, ret);
++    profile_obj_type(tmp, mdo_ret_addr, t1);
++
++    bind(profile_continue);
 +  }
-+#endif
-+  int oop_index = oop_recorder()->find_index(obj);
-+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
-+  code_section()->relocate(pc(), rspec);
-+  li32(dst, 0xDEADBEEF);
-+  zero_extend(dst, dst, 32);
 +}
 +
-+void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
-+  assert (UseCompressedClassPointers, "should only be used for compressed headers");
-+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-+  int index = oop_recorder()->find_index(k);
-+  assert(!Universe::heap()->is_in_reserved(k), "should not be an oop");
++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) {
++  assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3);
++  if (ProfileInterpreter && MethodData::profile_parameters()) {
++    Label profile_continue, done;
 +
-+  RelocationHolder rspec = metadata_Relocation::spec(index);
-+  code_section()->relocate(pc(), rspec);
-+  narrowKlass nk = Klass::encode_klass(k);
-+  li32(dst, nk);
-+  zero_extend(dst, dst, 32);
-+}
++    test_method_data_pointer(mdp, profile_continue);
 +
-+// Maybe emit a call via a trampoline.  If the code cache is small
-+// trampolines won't be emitted.
-+address MacroAssembler::trampoline_call(Address entry) {
-+  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
-+  assert(entry.rspec().type() == relocInfo::runtime_call_type ||
-+         entry.rspec().type() == relocInfo::opt_virtual_call_type ||
-+         entry.rspec().type() == relocInfo::static_call_type ||
-+         entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
++    // Load the offset of the area within the MDO used for
++    // parameters. If it's negative we're not profiling any parameters
++    lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())));
++    srli(tmp2, tmp1, 31);
++    bnez(tmp2, profile_continue);  // i.e. sign bit set
 +
-+  // We need a trampoline if branches are far.
-+  if (far_branches()) {
-+    bool in_scratch_emit_size = false;
-+#ifdef COMPILER2
-+    // We don't want to emit a trampoline if C2 is generating dummy
-+    // code during its branch shortening phase.
-+    CompileTask* task = ciEnv::current()->task();
-+    in_scratch_emit_size =
-+      (task != NULL && is_c2_compile(task->comp_level()) &&
-+       Compile::current()->in_scratch_emit_size());
-+#endif
-+    if (!in_scratch_emit_size) {
-+      address stub = emit_trampoline_stub(offset(), entry.target());
-+      if (stub == NULL) {
-+	postcond(pc() == badAddress);
-+        return NULL; // CodeCache is full
-+      }
-+    }
-+  }
-+
-+  address call_pc = pc();
-+  relocate(entry.rspec());
-+  if (!far_branches()) {
-+    jal(entry.target());
-+  } else {
-+    jal(pc());
-+  }
-+
-+  postcond(pc() != badAddress);
-+  return call_pc;
-+}
-+
-+address MacroAssembler::ic_call(address entry, jint method_index) {
-+  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
-+  movptr(t1, (address)Universe::non_oop_word());
-+  assert_cond(entry != NULL);
-+  return trampoline_call(Address(entry, rh));
-+}
-+
-+// Emit a trampoline stub for a call to a target which is too far away.
-+//
-+// code sequences:
-+//
-+// call-site:
-+//   branch-and-link to <destination> or <trampoline stub>
-+//
-+// Related trampoline stub for this call site in the stub section:
-+//   load the call target from the constant pool
-+//   branch (RA still points to the call site above)
-+
-+address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
-+                                             address dest) {
-+  // Max stub size: alignment nop, TrampolineStub.
-+  address stub = start_a_stub(NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size);
-+  if (stub == NULL) {
-+    return NULL;  // CodeBuffer::expand failed
-+  }
-+
-+  // Create a trampoline stub relocation which relates this trampoline stub
-+  // with the call instruction at insts_call_instruction_offset in the
-+  // instructions code-section.
++    // Compute a pointer to the area for parameters from the offset
++    // and move the pointer to the slot for the last
++    // parameters. Collect profiling from last parameter down.
++    // mdo start + parameters offset + array length - 1
++    add(mdp, mdp, tmp1);
++    ld(tmp1, Address(mdp, ArrayData::array_len_offset()));
++    add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
 +
-+  // make sure 4 byte aligned here, so that the destination address would be
-+  // 8 byte aligned after 3 intructions
-+  while (offset() % wordSize == 0) { nop(); }
++    Label loop;
++    bind(loop);
 +
-+  relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
-+                                            insts_call_instruction_offset));
-+  const int stub_start_offset = offset();
++    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
++    int type_base = in_bytes(ParametersTypeData::type_offset(0));
++    int per_arg_scale = exact_log2(DataLayout::cell_size);
++    add(t0, mdp, off_base);
++    add(t1, mdp, type_base);
 +
-+  // Now, create the trampoline stub's code:
-+  // - load the call
-+  // - call
-+  Label target;
-+  ld(t0, target);  // auipc + ld
-+  jr(t0);          // jalr
-+  bind(target);
-+  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
-+         "should be");
-+  emit_int64((intptr_t)dest);
++    shadd(tmp2, tmp1, t0, tmp2, per_arg_scale);
++    // load offset on the stack from the slot for this parameter
++    ld(tmp2, Address(tmp2, 0));
++    neg(tmp2, tmp2);
 +
-+  const address stub_start_addr = addr_at(stub_start_offset);
++    // read the parameter from the local area
++    shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize);
++    ld(tmp2, Address(tmp2, 0));
 +
-+  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
++    // profile the parameter
++    shadd(t1, tmp1, t1, t0, per_arg_scale);
++    Address arg_type(t1, 0);
++    profile_obj_type(tmp2, arg_type, tmp3);
 +
-+  end_a_stub();
-+  return stub_start_addr;
-+}
++    // go to next parameter
++    add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
++    bgez(tmp1, loop);
 +
-+Address MacroAssembler::add_memory_helper(const Address dst) {
-+  switch (dst.getMode()) {
-+    case Address::base_plus_offset:
-+      // This is the expected mode, although we allow all the other
-+      // forms below.
-+      return form_address(dst.base(), dst.offset(), 12, t1);
-+    default:
-+      la(t1, dst);
-+      return Address(t1);
++    bind(profile_continue);
 +  }
 +}
 +
-+void MacroAssembler::increment(const Address dst, int64_t value) {
-+  assert(((dst.getMode() == Address::base_plus_offset &&
-+           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
-+          "invalid value and address mode combination");
-+  Address adr = add_memory_helper(dst);
-+  assert(!adr.uses(t0), "invalid dst for address increment");
-+  ld(t0, adr);
-+  add(t0, t0, value, t1);
-+  sd(t0, adr);
++void InterpreterMacroAssembler::get_method_counters(Register method,
++                                                    Register mcs, Label& skip) {
++  Label has_counters;
++  ld(mcs, Address(method, Method::method_counters_offset()));
++  bnez(mcs, has_counters);
++  call_VM(noreg, CAST_FROM_FN_PTR(address,
++          InterpreterRuntime::build_method_counters), method);
++  ld(mcs, Address(method, Method::method_counters_offset()));
++  beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory
++  bind(has_counters);
 +}
 +
-+void MacroAssembler::incrementw(const Address dst, int32_t value) {
-+  assert(((dst.getMode() == Address::base_plus_offset &&
-+           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
-+          "invalid value and address mode combination");
-+  Address adr = add_memory_helper(dst);
-+  assert(!adr.uses(t0), "invalid dst for address increment");
-+  lwu(t0, adr);
-+  addw(t0, t0, value, t1);
-+  sw(t0, adr);
++#ifdef ASSERT
++void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits,
++                                                    const char* msg, bool stop_by_hit) {
++  Label L;
++  andi(t0, access_flags, flag_bits);
++  if (stop_by_hit) {
++    beqz(t0, L);
++  } else {
++    bnez(t0, L);
++  }
++  stop(msg);
++  bind(L);
 +}
 +
-+void MacroAssembler::decrement(const Address dst, int64_t value) {
-+  assert(((dst.getMode() == Address::base_plus_offset &&
-+           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
-+          "invalid value and address mode combination");
-+  Address adr = add_memory_helper(dst);
-+  assert(!adr.uses(t0), "invalid dst for address decrement");
-+  ld(t0, adr);
-+  sub(t0, t0, value, t1);
-+  sd(t0, adr);
++void InterpreterMacroAssembler::verify_frame_setup() {
++  Label L;
++  const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  ld(t0, monitor_block_top);
++  beq(esp, t0, L);
++  stop("broken stack frame setup in interpreter");
++  bind(L);
 +}
++#endif
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
+new file mode 100644
+index 00000000000..4d8cb086f82
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
+@@ -0,0 +1,285 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+void MacroAssembler::decrementw(const Address dst, int32_t value) {
-+  assert(((dst.getMode() == Address::base_plus_offset &&
-+           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
-+          "invalid value and address mode combination");
-+  Address adr = add_memory_helper(dst);
-+  assert(!adr.uses(t0), "invalid dst for address decrement");
-+  lwu(t0, adr);
-+  subw(t0, t0, value, t1);
-+  sw(t0, adr);
-+}
++#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP
++#define CPU_RISCV_INTERP_MASM_RISCV_HPP
 +
-+void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
-+  assert_different_registers(src1, t0);
-+  int32_t offset;
-+  la_patchable(t0, src2, offset);
-+  ld(t0, Address(t0, offset));
-+  beq(src1, t0, equal);
-+}
++#include "asm/macroAssembler.hpp"
++#include "interpreter/invocationCounter.hpp"
++#include "runtime/frame.hpp"
 +
-+void MacroAssembler::oop_equal(Register obj1, Register obj2, Label& equal, bool is_far) {
-+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->obj_equals(this, obj1, obj2, equal, is_far);
-+}
++// This file specializes the assember with interpreter-specific macros
 +
-+void MacroAssembler::oop_nequal(Register obj1, Register obj2, Label& nequal, bool is_far) {
-+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->obj_nequals(this, obj1, obj2, nequal, is_far);
-+}
++typedef ByteSize (*OffsetFunction)(uint);
 +
-+#ifdef COMPILER2
-+// Set dst NaN if either source is NaN.
-+void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
-+                                  bool is_double, bool is_min) {
-+  assert_different_registers(dst, src1, src2);
-+  Label Ldone;
-+  fsflags(zr);
-+  if (is_double) {
-+    if (is_min) {
-+      fmin_d(dst, src1, src2);
-+    } else {
-+      fmax_d(dst, src1, src2);
-+    }
-+    // flt is just used for set fflag NV
-+    flt_d(zr, src1, src2);
-+  } else {
-+    if (is_min) {
-+      fmin_s(dst, src1, src2);
-+    } else {
-+      fmax_s(dst, src1, src2);
-+    }
-+    // flt is just used for set fflag NV
-+    flt_s(zr, src1, src2);
-+  }
-+  frflags(t0);
-+  beqz(t0, Ldone);
++class InterpreterMacroAssembler: public MacroAssembler {
++ protected:
++  // Interpreter specific version of call_VM_base
++  using MacroAssembler::call_VM_leaf_base;
 +
-+  // Src1 or src2 must be NaN here. Set dst NaN.
-+  if (is_double) {
-+    fadd_d(dst, src1, src2);
-+  } else {
-+    fadd_s(dst, src1, src2);
-+  }
-+  bind(Ldone);
-+}
++  virtual void call_VM_leaf_base(address entry_point,
++                                 int number_of_arguments);
 +
-+address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
-+                                      Register tmp4, Register tmp5, Register tmp6, Register result,
-+                                      Register cnt1, int elem_size) {
-+  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
-+  Register cnt2 = tmp2;  // cnt2 only used in array length compare
-+  Register elem_per_word = tmp6;
-+  int log_elem_size = exact_log2(elem_size);
-+  int length_offset = arrayOopDesc::length_offset_in_bytes();
-+  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
++  virtual void call_VM_base(Register oop_result,
++                            Register java_thread,
++                            Register last_java_sp,
++                            address  entry_point,
++                            int number_of_arguments,
++                            bool check_exceptions);
 +
-+  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
-+  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
-+  mv(elem_per_word, wordSize / elem_size);
++  // base routine for all dispatches
++  void dispatch_base(TosState state, address* table, bool verifyoop = true,
++                     bool generate_poll = false, Register Rs = t0);
 +
-+  BLOCK_COMMENT("arrays_equals {");
++ public:
++  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
++  virtual ~InterpreterMacroAssembler() {}
 +
-+  // if (a1 == a2), return true
-+  oop_equal(a1, a2, SAME);
++  void load_earlyret_value(TosState state);
 +
-+  mv(result, false);
-+  beqz(a1, DONE);
-+  beqz(a2, DONE);
-+  lwu(cnt1, Address(a1, length_offset));
-+  lwu(cnt2, Address(a2, length_offset));
-+  bne(cnt2, cnt1, DONE);
-+  beqz(cnt1, SAME);
++  void jump_to_entry(address entry);
 +
-+  slli(tmp5, cnt1, 3 + log_elem_size);
-+  sub(tmp5, zr, tmp5);
-+  add(a1, a1, base_offset);
-+  add(a2, a2, base_offset);
-+  ld(tmp3, Address(a1, 0));
-+  ld(tmp4, Address(a2, 0));
-+  ble(cnt1, elem_per_word, SHORT); // short or same
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
 +
-+  // Main 16 byte comparison loop with 2 exits
-+  bind(NEXT_DWORD); {
-+    ld(tmp1, Address(a1, wordSize));
-+    ld(tmp2, Address(a2, wordSize));
-+    sub(cnt1, cnt1, 2 * wordSize / elem_size);
-+    blez(cnt1, TAIL);
-+    bne(tmp3, tmp4, DONE);
-+    ld(tmp3, Address(a1, 2 * wordSize));
-+    ld(tmp4, Address(a2, 2 * wordSize));
-+    add(a1, a1, 2 * wordSize);
-+    add(a2, a2, 2 * wordSize);
-+    ble(cnt1, elem_per_word, TAIL2);
-+  } beq(tmp1, tmp2, NEXT_DWORD);
-+  j(DONE);
++  // Interpreter-specific registers
++  void save_bcp() {
++    sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
++  }
 +
-+  bind(TAIL);
-+  xorr(tmp4, tmp3, tmp4);
-+  xorr(tmp2, tmp1, tmp2);
-+  sll(tmp2, tmp2, tmp5);
-+  orr(tmp5, tmp4, tmp2);
-+  j(IS_TMP5_ZR);
++  void restore_bcp() {
++    ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
++  }
 +
-+  bind(TAIL2);
-+  bne(tmp1, tmp2, DONE);
++  void restore_locals() {
++    ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize));
++  }
 +
-+  bind(SHORT);
-+  xorr(tmp4, tmp3, tmp4);
-+  sll(tmp5, tmp4, tmp5);
++  void restore_constant_pool_cache() {
++    ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
++  }
 +
-+  bind(IS_TMP5_ZR);
-+  bnez(tmp5, DONE);
++  void get_dispatch();
 +
-+  bind(SAME);
-+  mv(result, true);
-+  // That's it.
-+  bind(DONE);
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg) {
++    ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize));
++  }
 +
-+  BLOCK_COMMENT("} array_equals");
-+  postcond(pc() != badAddress);
-+  return pc();
-+}
++  void get_const(Register reg) {
++    get_method(reg);
++    ld(reg, Address(reg, in_bytes(Method::const_offset())));
++  }
 +
-+// Compare Strings
++  void get_constant_pool(Register reg) {
++    get_const(reg);
++    ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset())));
++  }
 +
-+// For Strings we're passed the address of the first characters in a1
-+// and a2 and the length in cnt1.
-+// elem_size is the element size in bytes: either 1 or 2.
-+// There are two implementations.  For arrays >= 8 bytes, all
-+// comparisons (including the final one, which may overlap) are
-+// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
-+// halfword, then a short, and then a byte.
++  void get_constant_pool_cache(Register reg) {
++    get_constant_pool(reg);
++    ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes()));
++  }
 +
-+void MacroAssembler::string_equals(Register a1, Register a2,
-+                                   Register result, Register cnt1, int elem_size)
-+{
-+  Label SAME, DONE, SHORT, NEXT_WORD;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
++  void get_cpool_and_tags(Register cpool, Register tags) {
++    get_constant_pool(cpool);
++    ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes()));
++  }
 +
-+  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
-+  assert_different_registers(a1, a2, result, cnt1, t0, t1);
++  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
++  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_method_counters(Register method, Register mcs, Label& skip);
 +
-+  BLOCK_COMMENT("string_equals {");
++  // Load cpool->resolved_references(index).
++  void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15);
 +
-+  beqz(cnt1, SAME);
-+  mv(result, false);
++  // Load cpool->resolved_klass_at(index).
++  void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp);
 +
-+  // Check for short strings, i.e. smaller than wordSize.
-+  sub(cnt1, cnt1, wordSize);
-+  blez(cnt1, SHORT);
++  void load_resolved_method_at_index(int byte_no, Register method, Register cache);
 +
-+  // Main 8 byte comparison loop.
-+  bind(NEXT_WORD); {
-+    ld(tmp1, Address(a1, 0));
-+    add(a1, a1, wordSize);
-+    ld(tmp2, Address(a2, 0));
-+    add(a2, a2, wordSize);
-+    sub(cnt1, cnt1, wordSize);
-+    bne(tmp1, tmp2, DONE);
-+  } bgtz(cnt1, NEXT_WORD);
++  void pop_ptr(Register r = x10);
++  void pop_i(Register r = x10);
++  void pop_l(Register r = x10);
++  void pop_f(FloatRegister r = f10);
++  void pop_d(FloatRegister r = f10);
++  void push_ptr(Register r = x10);
++  void push_i(Register r = x10);
++  void push_l(Register r = x10);
++  void push_f(FloatRegister r = f10);
++  void push_d(FloatRegister r = f10);
 +
-+  if (!AvoidUnalignedAccesses) {
-+    // Last longword.  In the case where length == 4 we compare the
-+    // same longword twice, but that's still faster than another
-+    // conditional branch.
-+    // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
-+    // length == 4.
-+    add(tmp1, a1, cnt1);
-+    ld(tmp1, Address(tmp1, 0));
-+    add(tmp2, a2, cnt1);
-+    ld(tmp2, Address(tmp2, 0));
-+    bne(tmp1, tmp2, DONE);
-+    j(SAME);
-+  }
++  void pop(TosState state); // transition vtos -> state
++  void push(TosState state); // transition state -> vtos
 +
-+  bind(SHORT);
-+  ld(tmp1, Address(a1));
-+  ld(tmp2, Address(a2));
-+  xorr(tmp1, tmp1, tmp2);
-+  neg(cnt1, cnt1);
-+  slli(cnt1, cnt1, LogBitsPerByte);
-+  sll(tmp1, tmp1, cnt1);
-+  bnez(tmp1, DONE);
++  void empty_expression_stack() {
++    ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
++    // NULL last_sp until next java call
++    sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  }
 +
-+  // Arrays are equal.
-+  bind(SAME);
-+  mv(result, true);
++  // Helpers for swap and dup
++  void load_ptr(int n, Register val);
++  void store_ptr(int n, Register val);
 +
-+  // That's it.
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_equals");
-+}
++  // Load float value from 'address'. The value is loaded onto the FPU register v0.
++  void load_float(Address src);
++  void load_double(Address src);
 +
-+typedef void (MacroAssembler::*load_chr_insn)(Register Rd, const Address &adr, Register temp);
++  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
++  // a subtype of super_klass.
++  void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
 +
-+// Compare strings.
-+void MacroAssembler::string_compare(Register str1, Register str2,
-+                                    Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
-+                                    Register tmp3, int ae)
-+{
-+  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
-+      DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
-+      SHORT_LOOP_START, TAIL_CHECK, L;
++  // Dispatching
++  void dispatch_prolog(TosState state, int step = 0);
++  void dispatch_epilog(TosState state, int step = 0);
++  // dispatch via t0
++  void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0);
++  // dispatch normal table via t0 (assume t0 is loaded already)
++  void dispatch_only_normal(TosState state, Register Rs = t0);
++  void dispatch_only_noverify(TosState state, Register Rs = t0);
++  // load t0 from [xbcp + step] and dispatch via t0
++  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
++  // load t0 from [xbcp] and dispatch via t0 and table
++  void dispatch_via (TosState state, address* table);
 +
-+  const int STUB_THRESHOLD = 64 + 8;
-+  bool isLL = ae == StrIntrinsicNode::LL;
-+  bool isLU = ae == StrIntrinsicNode::LU;
-+  bool isUL = ae == StrIntrinsicNode::UL;
++  // jump to an invoked target
++  void prepare_to_jump_from_interpreted();
++  void jump_from_interpreted(Register method);
 +
-+  bool str1_isL = isLL || isLU;
-+  bool str2_isL = isLL || isUL;
 +
-+  // for L strings, 1 byte for 1 character
-+  // for U strings, 2 bytes for 1 character
-+  int str1_chr_size = str1_isL ? 1 : 2;
-+  int str2_chr_size = str2_isL ? 1 : 2;
-+  int minCharsInWord = isLL ? wordSize : wordSize / 2;
++  // Returning from interpreted functions
++  //
++  // Removes the current activation (incl. unlocking of monitors)
++  // and sets up the return address.  This code is also used for
++  // exception unwindwing. In that case, we do not want to throw
++  // IllegalMonitorStateExceptions, since that might get us into an
++  // infinite rethrow exception loop.
++  // Additionally this code is used for popFrame and earlyReturn.
++  // In popFrame case we want to skip throwing an exception,
++  // installing an exception, and notifying jvmdi.
++  // In earlyReturn case we only want to skip throwing an exception
++  // and installing an exception.
++  void remove_activation(TosState state,
++                         bool throw_monitor_exception = true,
++                         bool install_monitor_exception = true,
++                         bool notify_jvmdi = true);
 +
-+  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
++  // FIXME: Give us a valid frame at a null check.
++  virtual void null_check(Register reg, int offset = -1) {
++        MacroAssembler::null_check(reg, offset);
++  }
 +
-+  BLOCK_COMMENT("string_compare {");
++  // Object locking
++  void lock_object  (Register lock_reg);
++  void unlock_object(Register lock_reg);
 +
-+  // Bizzarely, the counts are passed in bytes, regardless of whether they
-+  // are L or U strings, however the result is always in characters.
-+  if (!str1_isL) {
-+    sraiw(cnt1, cnt1, 1);
-+  }
-+  if (!str2_isL) {
-+    sraiw(cnt2, cnt2, 1);
-+  }
++  // Interpreter profiling operations
++  void set_method_data_pointer_for_bcp();
++  void test_method_data_pointer(Register mdp, Label& zero_continue);
++  void verify_method_data_pointer();
 +
-+  // Compute the minimum of the string lengths and save the difference in result.
-+  sub(result, cnt1, cnt2);
-+  bgt(cnt1, cnt2, L);
-+  mv(cnt2, cnt1);
-+  bind(L);
++  void set_mdp_data_at(Register mdp_in, int constant, Register value);
++  void increment_mdp_data_at(Address data, bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, int constant,
++                             bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
++                             bool decrement = false);
++  void increment_mask_and_jump(Address counter_addr,
++                               int increment, Address mask,
++                               Register tmp1, Register tmp2,
++                               bool preloaded, Label* where);
 +
-+  // A very short string
-+  mv(t0, minCharsInWord);
-+  ble(cnt2, t0, SHORT_STRING);
++  void set_mdp_flag_at(Register mdp_in, int flag_constant);
++  void test_mdp_data_at(Register mdp_in, int offset, Register value,
++                        Register test_value_out,
++                        Label& not_equal_continue);
 +
-+  // Compare longwords
-+  // load first parts of strings and finish initialization while loading
-+  {
-+    if (str1_isL == str2_isL) { // LL or UU
-+      // check if str1 and str2 are same string
-+      beq(str1, str2, DONE);
-+      // load 8 bytes once to compare
-+      ld(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+      mv(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      sub(cnt2, cnt2, minCharsInWord);
-+      beqz(cnt2, TAIL_CHECK);
-+      // convert cnt2 from characters to bytes
-+      if(!str1_isL) {
-+        slli(cnt2, cnt2, 1);
-+      }
-+      add(str2, str2, cnt2);
-+      add(str1, str1, cnt2);
-+      sub(cnt2, zr, cnt2);
-+    } else if (isLU) { // LU case
-+      lwu(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+      mv(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      addi(cnt2, cnt2, -4);
-+      add(str1, str1, cnt2);
-+      sub(cnt1, zr, cnt2);
-+      slli(cnt2, cnt2, 1);
-+      add(str2, str2, cnt2);
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+      sub(cnt2, zr, cnt2);
-+      addi(cnt1, cnt1, 4);
-+    } else { // UL case
-+      ld(tmp1, Address(str1));
-+      lwu(tmp2, Address(str2));
-+      mv(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      addi(cnt2, cnt2, -4);
-+      slli(t0, cnt2, 1);
-+      sub(cnt1, zr, t0);
-+      add(str1, str1, t0);
-+      add(str2, str2, cnt2);
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
-+      sub(cnt2, zr, cnt2);
-+      addi(cnt1, cnt1, 8);
-+    }
-+    addi(cnt2, cnt2, isUL ? 4 : 8);
-+    bgez(cnt2, TAIL);
-+    xorr(tmp3, tmp1, tmp2);
-+    bnez(tmp3, DIFFERENCE);
++  void record_klass_in_profile(Register receiver, Register mdp,
++                               Register reg2, bool is_virtual_call);
++  void record_klass_in_profile_helper(Register receiver, Register mdp,
++                                      Register reg2,
++                                      Label& done, bool is_virtual_call);
++  void record_item_in_profile_helper(Register item, Register mdp,
++                                     Register reg2, int start_row, Label& done, int total_rows,
++                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
++                                     int non_profiled_offset);
 +
-+    // main loop
-+    bind(NEXT_WORD);
-+    if (str1_isL == str2_isL) { // LL or UU
-+      add(t0, str1, cnt2);
-+      ld(tmp1, Address(t0));
-+      add(t0, str2, cnt2);
-+      ld(tmp2, Address(t0));
-+      addi(cnt2, cnt2, 8);
-+    } else if (isLU) { // LU case
-+      add(t0, str1, cnt1);
-+      lwu(tmp1, Address(t0));
-+      add(t0, str2, cnt2);
-+      ld(tmp2, Address(t0));
-+      addi(cnt1, cnt1, 4);
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+      addi(cnt2, cnt2, 8);
-+    } else { // UL case
-+      add(t0, str2, cnt2);
-+      lwu(tmp2, Address(t0));
-+      add(t0, str1, cnt1);
-+      ld(tmp1, Address(t0));
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
-+      addi(cnt1, cnt1, 8);
-+      addi(cnt2, cnt2, 4);
-+    }
-+    bgez(cnt2, TAIL);
++  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
++  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
++  void update_mdp_by_constant(Register mdp_in, int constant);
++  void update_mdp_for_ret(Register return_bci);
 +
-+    xorr(tmp3, tmp1, tmp2);
-+    beqz(tmp3, NEXT_WORD);
-+    j(DIFFERENCE);
-+    bind(TAIL);
-+    xorr(tmp3, tmp1, tmp2);
-+    bnez(tmp3, DIFFERENCE);
-+    // Last longword.
-+    if (AvoidUnalignedAccesses) {
-+      // Aligned access. Load bytes from byte-aligned address,
-+      // which may contain invalid bytes when remaining bytes is
-+      // less than 4(UL/LU) or 8 (LL/UU).
-+      // Invalid bytes should be removed before comparison.
-+      if (str1_isL == str2_isL) { // LL or UU
-+        add(t0, str1, cnt2);
-+        ld(tmp1, Address(t0));
-+        add(t0, str2, cnt2);
-+        ld(tmp2, Address(t0));
-+      } else if (isLU) { // LU
-+        add(t0, str1, cnt1);
-+        lwu(tmp1, Address(t0));
-+        add(t0, str2, cnt2);
-+        ld(tmp2, Address(t0));
-+        inflate_lo32(tmp3, tmp1);
-+        mv(tmp1, tmp3);
-+      } else {  // UL
-+        add(t0, str1, cnt1);
-+        ld(tmp1, Address(t0));
-+        add(t0, str2, cnt2);
-+        lwu(tmp2, Address(t0));
-+        inflate_lo32(tmp3, tmp2);
-+        mv(tmp2, tmp3);
-+        slli(cnt2, cnt2, 1);  // UL case should convert cnt2 to bytes
-+      }
-+      // remove invalid bytes
-+      slli(t0, cnt2, LogBitsPerByte);
-+      sll(tmp1, tmp1, t0);
-+      sll(tmp2, tmp2, t0);
-+    } else {
-+      // Last longword.  In the case where length == 4 we compare the
-+      // same longword twice, but that's still faster than another
-+      // conditional branch.
-+      if (str1_isL == str2_isL) { // LL or UU
-+        ld(tmp1, Address(str1));
-+        ld(tmp2, Address(str2));
-+      } else if (isLU) { // LU case
-+        lwu(tmp1, Address(str1));
-+        ld(tmp2, Address(str2));
-+        inflate_lo32(tmp3, tmp1);
-+        mv(tmp1, tmp3);
-+      } else { // UL case
-+        ld(tmp1, Address(str1));
-+        lwu(tmp2, Address(str2));
-+        inflate_lo32(tmp3, tmp2);
-+        mv(tmp2, tmp3);
-+      }
-+    }
-+    bind(TAIL_CHECK);
-+    xorr(tmp3, tmp1, tmp2);
-+    beqz(tmp3, DONE);
++  // narrow int return value
++  void narrow(Register result);
 +
-+    // Find the first different characters in the longwords and
-+    // compute their difference.
-+    bind(DIFFERENCE);
-+    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
-+    srl(tmp1, tmp1, result);
-+    srl(tmp2, tmp2, result);
-+    if (isLL) {
-+      andi(tmp1, tmp1, 0xFF);
-+      andi(tmp2, tmp2, 0xFF);
-+    } else {
-+      andi(tmp1, tmp1, 0xFFFF);
-+      andi(tmp2, tmp2, 0xFFFF);
-+    }
-+    sub(result, tmp1, tmp2);
-+    j(DONE);
-+  }
++  void profile_taken_branch(Register mdp, Register bumped_count);
++  void profile_not_taken_branch(Register mdp);
++  void profile_call(Register mdp);
++  void profile_final_call(Register mdp);
++  void profile_virtual_call(Register receiver, Register mdp,
++                            Register t1,
++                            bool receiver_can_be_null = false);
++  void profile_ret(Register return_bci, Register mdp);
++  void profile_null_seen(Register mdp);
++  void profile_typecheck(Register mdp, Register klass, Register temp);
++  void profile_typecheck_failed(Register mdp);
++  void profile_switch_default(Register mdp);
++  void profile_switch_case(Register index_in_scratch, Register mdp,
++                           Register temp);
 +
-+  bind(STUB);
-+  RuntimeAddress stub = NULL;
-+  switch (ae) {
-+    case StrIntrinsicNode::LL:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
-+      break;
-+    case StrIntrinsicNode::UU:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
-+      break;
-+    case StrIntrinsicNode::LU:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
-+      break;
-+    case StrIntrinsicNode::UL:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
-+  trampoline_call(stub);
-+  j(DONE);
++  void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp);
++  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
++  void profile_return_type(Register mdp, Register ret, Register tmp);
++  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3);
 +
-+  bind(SHORT_STRING);
-+  // Is the minimum length zero?
-+  beqz(cnt2, DONE);
-+  // arrange code to do most branches while loading and loading next characters
-+  // while comparing previous
-+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST_INIT);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  j(SHORT_LOOP_START);
-+  bind(SHORT_LOOP);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST);
-+  bind(SHORT_LOOP_START);
-+  (this->*str1_load_chr)(tmp2, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  (this->*str2_load_chr)(t0, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST2);
-+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  beq(tmp2, t0, SHORT_LOOP);
-+  sub(result, tmp2, t0);
-+  j(DONE);
-+  bind(SHORT_LOOP_TAIL);
-+  sub(result, tmp1, cnt1);
-+  j(DONE);
-+  bind(SHORT_LAST2);
-+  beq(tmp2, t0, DONE);
-+  sub(result, tmp2, t0);
++  // Debugging
++  // only if +VerifyFPU  && (state == ftos || state == dtos)
++  void verify_FPU(int stack_depth, TosState state = ftos);
 +
-+  j(DONE);
-+  bind(SHORT_LAST_INIT);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  bind(SHORT_LAST);
-+  beq(tmp1, cnt1, DONE);
-+  sub(result, tmp1, cnt1);
++  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
 +
-+  bind(DONE);
++  // support for jvmti/dtrace
++  void notify_method_entry();
++  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
 +
-+  BLOCK_COMMENT("} string_compare");
-+}
++  virtual void _call_Unimplemented(address call_site) {
++    save_bcp();
++    set_last_Java_frame(esp, fp, (address) pc(), t0);
++    MacroAssembler::_call_Unimplemented(call_site);
++  }
 +
-+// short string
-+// StringUTF16.indexOfChar
-+// StringLatin1.indexOfChar
-+void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
-+                                               Register ch, Register result,
-+                                               bool isL)
-+{
-+  Register ch1 = t0;
-+  Register index = t1;
++#ifdef ASSERT
++  void verify_access_flags(Register access_flags, uint32_t flag_bits,
++                           const char* msg, bool stop_by_hit = true);
++  void verify_frame_setup();
++#endif
++};
 +
-+  BLOCK_COMMENT("string_indexof_char_short {");
++#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
+new file mode 100644
+index 00000000000..d93530d8564
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
+@@ -0,0 +1,295 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  Label LOOP, LOOP1, LOOP4, LOOP8;
-+  Label MATCH,  MATCH1, MATCH2, MATCH3,
-+        MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/universe.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/signature.hpp"
 +
-+  mv(result, -1);
-+  mv(index, zr);
++#define __ _masm->
 +
-+  bind(LOOP);
-+  addi(t0, index, 8);
-+  ble(t0, cnt1, LOOP8);
-+  addi(t0, index, 4);
-+  ble(t0, cnt1, LOOP4);
-+  j(LOOP1);
++// Implementation of SignatureHandlerGenerator
++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; }
++Register InterpreterRuntime::SignatureHandlerGenerator::to()   { return sp; }
++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; }
 +
-+  bind(LOOP8);
-+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
-+  beq(ch, ch1, MATCH);
-+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
-+  beq(ch, ch1, MATCH1);
-+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
-+  beq(ch, ch1, MATCH2);
-+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
-+  beq(ch, ch1, MATCH3);
-+  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
-+  beq(ch, ch1, MATCH4);
-+  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
-+  beq(ch, ch1, MATCH5);
-+  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
-+  beq(ch, ch1, MATCH6);
-+  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
-+  beq(ch, ch1, MATCH7);
-+  addi(index, index, 8);
-+  addi(str1, str1, isL ? 8 : 16);
-+  blt(index, cnt1, LOOP);
-+  j(NOMATCH);
++Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() {
++  if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
++    return g_INTArgReg[++_num_reg_int_args];
++  }
++  return noreg;
++}
 +
-+  bind(LOOP4);
-+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
-+  beq(ch, ch1, MATCH);
-+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
-+  beq(ch, ch1, MATCH1);
-+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
-+  beq(ch, ch1, MATCH2);
-+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
-+  beq(ch, ch1, MATCH3);
-+  addi(index, index, 4);
-+  addi(str1, str1, isL ? 4 : 8);
-+  bge(index, cnt1, NOMATCH);
++FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() {
++  if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
++    return g_FPArgReg[_num_reg_fp_args++];
++  } else {
++    return fnoreg;
++  }
++}
 +
-+  bind(LOOP1);
-+  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
-+  beq(ch, ch1, MATCH);
-+  addi(index, index, 1);
-+  addi(str1, str1, isL ? 1 : 2);
-+  blt(index, cnt1, LOOP1);
-+  j(NOMATCH);
++int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() {
++  int ret = _stack_offset;
++  _stack_offset += wordSize;
++  return ret;
++}
 +
-+  bind(MATCH1);
-+  addi(index, index, 1);
-+  j(MATCH);
++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
++  const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
++  _masm = new MacroAssembler(buffer); // allocate on resourse area by default
++  _num_reg_int_args = (method->is_static() ? 1 : 0);
++  _num_reg_fp_args = 0;
++  _stack_offset = 0;
++}
 +
-+  bind(MATCH2);
-+  addi(index, index, 2);
-+  j(MATCH);
++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
 +
-+  bind(MATCH3);
-+  addi(index, index, 3);
-+  j(MATCH);
++  Register reg = next_gpr();
++  if (reg != noreg) {
++    __ lw(reg, src);
++  } else {
++    __ lw(x10, src);
++    __ sw(x10, Address(to(), next_stack_offset()));
++  }
++}
 +
-+  bind(MATCH4);
-+  addi(index, index, 4);
-+  j(MATCH);
++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
 +
-+  bind(MATCH5);
-+  addi(index, index, 5);
-+  j(MATCH);
++  Register reg = next_gpr();
++  if (reg != noreg) {
++    __ ld(reg, src);
++  } else  {
++    __ ld(x10, src);
++    __ sd(x10, Address(to(), next_stack_offset()));
++  }
++}
 +
-+  bind(MATCH6);
-+  addi(index, index, 6);
-+  j(MATCH);
++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
 +
-+  bind(MATCH7);
-+  addi(index, index, 7);
++  FloatRegister reg = next_fpr();
++  if (reg != fnoreg) {
++    __ flw(reg, src);
++  } else {
++    // a floating-point argument is passed according to the integer calling
++    // convention if no floating-point argument register available
++    pass_int();
++  }
++}
 +
-+  bind(MATCH);
-+  mv(result, index);
-+  bind(NOMATCH);
-+  BLOCK_COMMENT("} string_indexof_char_short");
++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
++
++  FloatRegister reg = next_fpr();
++  if (reg != fnoreg) {
++    __ fld(reg, src);
++  } else {
++    // a floating-point argument is passed according to the integer calling
++    // convention if no floating-point argument register available
++    pass_long();
++  }
 +}
 +
-+// StringUTF16.indexOfChar
-+// StringLatin1.indexOfChar
-+void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
-+                                         Register ch, Register result,
-+                                         Register tmp1, Register tmp2,
-+                                         Register tmp3, Register tmp4,
-+                                         bool isL)
-+{
-+  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
-+  Register ch1 = t0;
-+  Register orig_cnt = t1;
-+  Register mask1 = tmp3;
-+  Register mask2 = tmp2;
-+  Register match_mask = tmp1;
-+  Register trailing_char = tmp4;
-+  Register unaligned_elems = tmp4;
++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
++  Register reg = next_gpr();
++  if (reg == c_rarg1) {
++    assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
++    __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset()));
++  } else if (reg != noreg) {
++      // c_rarg2-c_rarg7
++      __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
++      __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2,  2:c_rarg3...
++      __ ld(temp(), x10);
++      Label L;
++      __ beqz(temp(), L);
++      __ mv(reg, x10);
++      __ bind(L);
++  } else {
++    //to stack
++    __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ ld(temp(), x10);
++    Label L;
++    __ bnez(temp(), L);
++    __ mv(x10, zr);
++    __ bind(L);
++    assert(sizeof(jobject) == wordSize, "");
++    __ sd(x10, Address(to(), next_stack_offset()));
++  }
++}
 +
-+  BLOCK_COMMENT("string_indexof_char {");
-+  beqz(cnt1, NOMATCH);
++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
++  // generate code to handle arguments
++  iterate(fingerprint);
 +
-+  addi(t0, cnt1, isL ? -32 : -16);
-+  bgtz(t0, DO_LONG);
-+  string_indexof_char_short(str1, cnt1, ch, result, isL);
-+  j(DONE);
++  // return result handler
++  __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type())));
++  __ ret();
 +
-+  bind(DO_LONG);
-+  mv(orig_cnt, cnt1);
-+  if (AvoidUnalignedAccesses) {
-+    Label ALIGNED;
-+    andi(unaligned_elems, str1, 0x7);
-+    beqz(unaligned_elems, ALIGNED);
-+    sub(unaligned_elems, unaligned_elems, 8);
-+    neg(unaligned_elems, unaligned_elems);
-+    if (!isL) {
-+      srli(unaligned_elems, unaligned_elems, 1);
-+    }
-+    // do unaligned part per element
-+    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
-+    bgez(result, DONE);
-+    mv(orig_cnt, cnt1);
-+    sub(cnt1, cnt1, unaligned_elems);
-+    bind(ALIGNED);
-+  }
++  __ flush();
++}
 +
-+  // duplicate ch
-+  if (isL) {
-+    slli(ch1, ch, 8);
-+    orr(ch, ch1, ch);
-+  }
-+  slli(ch1, ch, 16);
-+  orr(ch, ch1, ch);
-+  slli(ch1, ch, 32);
-+  orr(ch, ch1, ch);
 +
-+  if (!isL) {
-+    slli(cnt1, cnt1, 1);
-+  }
++// Implementation of SignatureHandlerLibrary
 +
-+  mv(mask1, isL ? 0x0101010101010101 : 0x0001000100010001);
-+  mv(mask2, isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
++void SignatureHandlerLibrary::pd_set_handler(address handler) {}
 +
-+  bind(CH1_LOOP);
-+  ld(ch1, Address(str1));
-+  addi(str1, str1, 8);
-+  addi(cnt1, cnt1, -8);
-+  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
-+  bnez(match_mask, HIT);
-+  bgtz(cnt1, CH1_LOOP);
-+  j(NOMATCH);
 +
-+  bind(HIT);
-+  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
-+  srli(trailing_char, trailing_char, 3);
-+  addi(cnt1, cnt1, 8);
-+  ble(cnt1, trailing_char, NOMATCH);
-+  // match case
-+  if (!isL) {
-+    srli(cnt1, cnt1, 1);
-+    srli(trailing_char, trailing_char, 1);
++class SlowSignatureHandler
++  : public NativeSignatureIterator {
++ private:
++  address   _from;
++  intptr_t* _to;
++  intptr_t* _int_args;
++  intptr_t* _fp_args;
++  intptr_t* _fp_identifiers;
++  unsigned int _num_reg_int_args;
++  unsigned int _num_reg_fp_args;
++
++  intptr_t* single_slot_addr() {
++    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++    return from_addr;
 +  }
 +
-+  sub(result, orig_cnt, cnt1);
-+  add(result, result, trailing_char);
-+  j(DONE);
++  intptr_t* double_slot_addr() {
++    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1));
++    _from -= 2 * Interpreter::stackElementSize;
++    return from_addr;
++  }
 +
-+  bind(NOMATCH);
-+  mv(result, -1);
++  int pass_gpr(intptr_t value) {
++    if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
++      *_int_args++ = value;
++      return _num_reg_int_args++;
++    }
++    return -1;
++  }
 +
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_indexof_char");
-+}
++  int pass_fpr(intptr_t value) {
++    if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
++      *_fp_args++ = value;
++      return _num_reg_fp_args++;
++    }
++    return -1;
++  }
 +
-+// Search for needle in haystack and return index or -1
-+// x10: result
-+// x11: haystack
-+// x12: haystack_len
-+// x13: needle
-+// x14: needle_len
-+void MacroAssembler::string_indexof(Register haystack, Register needle,
-+                                    Register haystack_len, Register needle_len,
-+                                    Register tmp1, Register tmp2,
-+                                    Register tmp3, Register tmp4,
-+                                    Register tmp5, Register tmp6,
-+                                    Register result, int ae)
-+{
-+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
++  void pass_stack(intptr_t value) {
++    *_to++ = value;
++  }
 +
-+  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
++  virtual void pass_int() {
++    jint value = *(jint*)single_slot_addr();
++    if (pass_gpr(value) < 0) {
++      pass_stack(value);
++    }
++  }
 +
-+  Register ch1 = t0;
-+  Register ch2 = t1;
-+  Register nlen_tmp = tmp1; // needle len tmp
-+  Register hlen_tmp = tmp2; // haystack len tmp
-+  Register result_tmp = tmp4;
++  virtual void pass_long() {
++    intptr_t value = *double_slot_addr();
++    if (pass_gpr(value) < 0) {
++      pass_stack(value);
++    }
++  }
 +
-+  bool isLL = ae == StrIntrinsicNode::LL;
++  virtual void pass_object() {
++    intptr_t* addr = single_slot_addr();
++    intptr_t value = *addr == 0 ? NULL : (intptr_t)addr;
++    if (pass_gpr(value) < 0) {
++      pass_stack(value);
++    }
++  }
 +
-+  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
-+  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
-+  int needle_chr_shift = needle_isL ? 0 : 1;
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  int needle_chr_size = needle_isL ? 1 : 2;
-+  int haystack_chr_size = haystack_isL ? 1 : 2;
-+  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                              (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                (load_chr_insn)&MacroAssembler::lhu;
++  virtual void pass_float() {
++    jint value = *(jint*) single_slot_addr();
++    // a floating-point argument is passed according to the integer calling
++    // convention if no floating-point argument register available
++    if (pass_fpr(value) < 0 && pass_gpr(value) < 0) {
++      pass_stack(value);
++    }
++  }
 +
-+  BLOCK_COMMENT("string_indexof {");
++  virtual void pass_double() {
++    intptr_t value = *double_slot_addr();
++    int arg = pass_fpr(value);
++    if (0 <= arg) {
++      *_fp_identifiers |= (1ull << arg); // mark as double
++    } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack
++      pass_stack(value);
++    }
++  }
 +
-+  // Note, inline_string_indexOf() generates checks:
-+  // if (pattern.count > src.count) return -1;
-+  // if (pattern.count == 0) return 0;
++ public:
++  SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to)
++    : NativeSignatureIterator(method)
++  {
++    _from = from;
++    _to   = to;
 +
-+  // We have two strings, a source string in haystack, haystack_len and a pattern string
-+  // in needle, needle_len. Find the first occurence of pattern in source or return -1.
++    _int_args = to - (method->is_static() ? 16 : 17);
++    _fp_args  = to - 8;
++    _fp_identifiers = to - 9;
++    *(int*) _fp_identifiers = 0;
++    _num_reg_int_args = (method->is_static() ? 1 : 0);
++    _num_reg_fp_args = 0;
++  }
 +
-+  // For larger pattern and source we use a simplified Boyer Moore algorithm.
-+  // With a small pattern and source we use linear scan.
++  ~SlowSignatureHandler()
++  {
++    _from           = NULL;
++    _to             = NULL;
++    _int_args       = NULL;
++    _fp_args        = NULL;
++    _fp_identifiers = NULL;
++  }
++};
 +
-+  // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
-+  sub(result_tmp, haystack_len, needle_len);
-+  // needle_len < 8, use linear scan
-+  sub(t0, needle_len, 8);
-+  bltz(t0, LINEARSEARCH);
-+  // needle_len >= 256, use linear scan
-+  sub(t0, needle_len, 256);
-+  bgez(t0, LINEARSTUB);
-+  // needle_len >= haystack_len/4, use linear scan
-+  srli(t0, haystack_len, 2);
-+  bge(needle_len, t0, LINEARSTUB);
 +
-+  // Boyer-Moore-Horspool introduction:
-+  // The Boyer Moore alogorithm is based on the description here:-
-+  //
-+  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
-+  //
-+  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
-+  // and the 'Good Suffix' rule.
-+  //
-+  // These rules are essentially heuristics for how far we can shift the
-+  // pattern along the search string.
-+  //
-+  // The implementation here uses the 'Bad Character' rule only because of the
-+  // complexity of initialisation for the 'Good Suffix' rule.
-+  //
-+  // This is also known as the Boyer-Moore-Horspool algorithm:
-+  //
-+  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
-+  //
-+  // #define ASIZE 256
-+  //
-+  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
-+  //      int i, j;
-+  //      unsigned c;
-+  //      unsigned char bc[ASIZE];
-+  //
-+  //      /* Preprocessing */
-+  //      for (i = 0; i < ASIZE; ++i)
-+  //        bc[i] = m;
-+  //      for (i = 0; i < m - 1; ) {
-+  //        c = pattern[i];
-+  //        ++i;
-+  //        // c < 256 for Latin1 string, so, no need for branch
-+  //        #ifdef PATTERN_STRING_IS_LATIN1
-+  //        bc[c] = m - i;
-+  //        #else
-+  //        if (c < ASIZE) bc[c] = m - i;
-+  //        #endif
-+  //      }
-+  //
-+  //      /* Searching */
-+  //      j = 0;
-+  //      while (j <= n - m) {
-+  //        c = src[i+j];
-+  //        if (pattern[m-1] == c)
-+  //          int k;
-+  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
-+  //          if (k < 0) return j;
-+  //          // c < 256 for Latin1 string, so, no need for branch
-+  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
-+  //          // LL case: (c< 256) always true. Remove branch
-+  //          j += bc[pattern[j+m-1]];
-+  //          #endif
-+  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
-+  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
-+  //          if (c < ASIZE)
-+  //            j += bc[pattern[j+m-1]];
-+  //          else
-+  //            j += 1
-+  //          #endif
-+  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
-+  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
-+  //          if (c < ASIZE)
-+  //            j += bc[pattern[j+m-1]];
-+  //          else
-+  //            j += m
-+  //          #endif
-+  //      }
-+  //      return -1;
-+  //    }
++JRT_ENTRY(address,
++          InterpreterRuntime::slow_signature_handler(JavaThread* current,
++                                                     Method* method,
++                                                     intptr_t* from,
++                                                     intptr_t* to))
++  methodHandle m(current, (Method*)method);
++  assert(m->is_native(), "sanity check");
 +
-+  // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
-+  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
-+        BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
++  // handle arguments
++  SlowSignatureHandler ssh(m, (address)from, to);
++  ssh.iterate(UCONST64(-1));
 +
-+  Register haystack_end = haystack_len;
-+  Register skipch = tmp2;
++  // return result handler
++  return Interpreter::result_handler(m->result_type());
++JRT_END
+diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
+new file mode 100644
+index 00000000000..05df63ba2ae
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
+@@ -0,0 +1,68 @@
++/*
++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // pattern length is >=8, so, we can read at least 1 register for cases when
-+  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
-+  // UL case. We'll re-read last character in inner pre-loop code to have
-+  // single outer pre-loop load
-+  const int firstStep = isLL ? 7 : 3;
++#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP
++#define CPU_RISCV_INTERPRETERRT_RISCV_HPP
 +
-+  const int ASIZE = 256;
-+  const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
++// This is included in the middle of class Interpreter.
++// Do not include files here.
 +
-+  sub(sp, sp, ASIZE);
++// native method calls
 +
-+  // init BC offset table with default value: needle_len
-+  slli(t0, needle_len, 8);
-+  orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
-+  slli(tmp1, t0, 16);
-+  orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
-+  slli(tmp1, t0, 32);
-+  orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
++class SignatureHandlerGenerator: public NativeSignatureIterator {
++ private:
++  MacroAssembler* _masm;
++  unsigned int _num_reg_fp_args;
++  unsigned int _num_reg_int_args;
++  int _stack_offset;
 +
-+  mv(ch1, sp);  // ch1 is t0
-+  mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
++  void pass_int();
++  void pass_long();
++  void pass_float();
++  void pass_double();
++  void pass_object();
 +
-+  bind(BM_INIT_LOOP);
-+  // for (i = 0; i < ASIZE; ++i)
-+  //   bc[i] = m;
-+  for (int i = 0; i < 4; i++) {
-+    sd(tmp5, Address(ch1, i * wordSize));
++  Register next_gpr();
++  FloatRegister next_fpr();
++  int next_stack_offset();
++
++ public:
++  // Creation
++  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
++  virtual ~SignatureHandlerGenerator() {
++    _masm = NULL;
 +  }
-+  add(ch1, ch1, 32);
-+  sub(tmp6, tmp6, 4);
-+  bgtz(tmp6, BM_INIT_LOOP);
 +
-+  sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
-+  Register orig_haystack = tmp5;
-+  mv(orig_haystack, haystack);
-+  // result_tmp = tmp4
-+  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
-+  sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
-+  mv(tmp3, needle);
++  // Code generation
++  void generate(uint64_t fingerprint);
 +
-+  //  for (i = 0; i < m - 1; ) {
-+  //    c = pattern[i];
-+  //    ++i;
-+  //    // c < 256 for Latin1 string, so, no need for branch
-+  //    #ifdef PATTERN_STRING_IS_LATIN1
-+  //    bc[c] = m - i;
-+  //    #else
-+  //    if (c < ASIZE) bc[c] = m - i;
-+  //    #endif
-+  //  }
-+  bind(BCLOOP);
-+  (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
-+  add(tmp3, tmp3, needle_chr_size);
-+  if (!needle_isL) {
-+    // ae == StrIntrinsicNode::UU
-+    mv(tmp6, ASIZE);
-+    bgeu(ch1, tmp6, BCSKIP);
-+  }
-+  add(tmp4, sp, ch1);
-+  sb(ch2, Address(tmp4)); // store skip offset to BC offset table
++  // Code generation support
++  static Register from();
++  static Register to();
++  static Register temp();
++};
 +
-+  bind(BCSKIP);
-+  sub(ch2, ch2, 1); // for next pattern element, skip distance -1
-+  bgtz(ch2, BCLOOP);
++#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
+new file mode 100644
+index 00000000000..9a6084afa1d
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
+@@ -0,0 +1,86 @@
++/*
++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // tmp6: pattern end, address after needle
-+  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
-+  if (needle_isL == haystack_isL) {
-+    // load last 8 bytes (8LL/4UU symbols)
-+    ld(tmp6, Address(tmp6, -wordSize));
-+  } else {
-+    // UL: from UTF-16(source) search Latin1(pattern)
-+    lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
-+    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
-+    // We'll have to wait until load completed, but it's still faster than per-character loads+checks
-+    srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
-+    slli(ch2, tmp6, XLEN - 24);
-+    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
-+    slli(ch1, tmp6, XLEN - 16);
-+    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
-+    andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
-+    slli(ch2, ch2, 16);
-+    orr(ch2, ch2, ch1); // 0x00000b0c
-+    slli(result, tmp3, 48); // use result as temp register
-+    orr(tmp6, tmp6, result); // 0x0a00000d
-+    slli(result, ch2, 16);
-+    orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
-+  }
++#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
++#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
 +
-+  // i = m - 1;
-+  // skipch = j + i;
-+  // if (skipch == pattern[m - 1]
-+  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
-+  // else
-+  //   move j with bad char offset table
-+  bind(BMLOOPSTR2);
-+  // compare pattern to source string backward
-+  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
-+  (this->*haystack_load_1chr)(skipch, Address(result), noreg);
-+  sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
-+  if (needle_isL == haystack_isL) {
-+    // re-init tmp3. It's for free because it's executed in parallel with
-+    // load above. Alternative is to initialize it before loop, but it'll
-+    // affect performance on in-order systems with 2 or more ld/st pipelines
-+    srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
-+  }
-+  if (!isLL) { // UU/UL case
-+    slli(ch2, nlen_tmp, 1); // offsets in bytes
-+  }
-+  bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
-+  add(result, haystack, isLL ? nlen_tmp : ch2);
-+  ld(ch2, Address(result)); // load 8 bytes from source string
-+  mv(ch1, tmp6);
-+  if (isLL) {
-+    j(BMLOOPSTR1_AFTER_LOAD);
-+  } else {
-+    sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
-+    j(BMLOOPSTR1_CMP);
-+  }
++private:
 +
-+  bind(BMLOOPSTR1);
-+  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
-+  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
-+  (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
-+  (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++  // FP value associated with _last_Java_sp:
++  intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to
 +
-+  bind(BMLOOPSTR1_AFTER_LOAD);
-+  sub(nlen_tmp, nlen_tmp, 1);
-+  bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
++public:
++  // Each arch must define reset, save, restore
++  // These are used by objects that only care about:
++  //  1 - initializing a new state (thread creation, javaCalls)
++  //  2 - saving a current state (javaCalls)
++  //  3 - restoring an old state (javaCalls)
 +
-+  bind(BMLOOPSTR1_CMP);
-+  beq(ch1, ch2, BMLOOPSTR1);
++  void clear(void) {
++    // clearing _last_Java_sp must be first
++    _last_Java_sp = NULL;
++    OrderAccess::release();
++    _last_Java_fp = NULL;
++    _last_Java_pc = NULL;
++  }
 +
-+  bind(BMSKIP);
-+  if (!isLL) {
-+    // if we've met UTF symbol while searching Latin1 pattern, then we can
-+    // skip needle_len symbols
-+    if (needle_isL != haystack_isL) {
-+      mv(result_tmp, needle_len);
-+    } else {
-+      mv(result_tmp, 1);
++  void copy(JavaFrameAnchor* src) {
++    // In order to make sure the transition state is valid for "this"
++    // We must clear _last_Java_sp before copying the rest of the new data
++    //
++    // Hack Alert: Temporary bugfix for 4717480/4721647
++    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
++    // unless the value is changing
++    //
++    assert(src != NULL, "Src should not be NULL.");
++    if (_last_Java_sp != src->_last_Java_sp) {
++      _last_Java_sp = NULL;
++      OrderAccess::release();
 +    }
-+    mv(t0, ASIZE);
-+    bgeu(skipch, t0, BMADV);
++    _last_Java_fp = src->_last_Java_fp;
++    _last_Java_pc = src->_last_Java_pc;
++    // Must be last so profiler will always see valid frame if has_last_frame() is true
++    _last_Java_sp = src->_last_Java_sp;
 +  }
-+  add(result_tmp, sp, skipch);
-+  lbu(result_tmp, Address(result_tmp)); // load skip offset
 +
-+  bind(BMADV);
-+  sub(nlen_tmp, needle_len, 1);
-+  // move haystack after bad char skip offset
-+  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
-+  ble(haystack, haystack_end, BMLOOPSTR2);
-+  add(sp, sp, ASIZE);
-+  j(NOMATCH);
++  bool walkable(void)                            { return _last_Java_sp != NULL && _last_Java_pc != NULL; }
++  void make_walkable(JavaThread* thread);
++  void capture_last_Java_pc(void);
 +
-+  bind(BMLOOPSTR1_LASTCMP);
-+  bne(ch1, ch2, BMSKIP);
++  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
 +
-+  bind(BMMATCH);
-+  sub(result, haystack, orig_haystack);
-+  if (!haystack_isL) {
-+    srli(result, result, 1);
-+  }
-+  add(sp, sp, ASIZE);
-+  j(DONE);
++  const address last_Java_pc(void)               { return _last_Java_pc; }
 +
-+  bind(LINEARSTUB);
-+  sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
-+  bltz(t0, LINEARSEARCH);
-+  mv(result, zr);
-+  RuntimeAddress stub = NULL;
-+  if (isLL) {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
-+    assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
-+  } else if (needle_isL) {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
-+    assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
-+  } else {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
-+    assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
-+  }
-+  trampoline_call(stub);
-+  j(DONE);
-+
-+  bind(NOMATCH);
-+  mv(result, -1);
-+  j(DONE);
-+
-+  bind(LINEARSEARCH);
-+  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
-+
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_indexof");
-+}
-+
-+// string_indexof
-+// result: x10
-+// src: x11
-+// src_count: x12
-+// pattern: x13
-+// pattern_count: x14 or 1/2/3/4
-+void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
-+                                               Register haystack_len, Register needle_len,
-+                                               Register tmp1, Register tmp2,
-+                                               Register tmp3, Register tmp4,
-+                                               int needle_con_cnt, Register result, int ae)
-+{
-+  // Note:
-+  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
-+  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
-+  assert(needle_con_cnt <= 4, "Invalid needle constant count");
-+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
-+
-+  Register ch1 = t0;
-+  Register ch2 = t1;
-+  Register hlen_neg = haystack_len, nlen_neg = needle_len;
-+  Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
-+
-+  bool isLL = ae == StrIntrinsicNode::LL;
++private:
 +
-+  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
-+  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
-+  int needle_chr_shift = needle_isL ? 0 : 1;
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  int needle_chr_size = needle_isL ? 1 : 2;
-+  int haystack_chr_size = haystack_isL ? 1 : 2;
++  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
 +
-+  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                              (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
-+  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
++public:
 +
-+  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
++  void set_last_Java_sp(intptr_t* java_sp)       { _last_Java_sp = java_sp; OrderAccess::release(); }
 +
-+  Register first = tmp3;
++  intptr_t* last_Java_fp(void)                   { return _last_Java_fp; }
 +
-+  if (needle_con_cnt == -1) {
-+    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
++#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
+new file mode 100644
+index 00000000000..814ed23e471
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
+@@ -0,0 +1,214 @@
++/*
++ * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
-+    bltz(t0, DOSHORT);
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "runtime/safepoint.hpp"
 +
-+    (this->*needle_load_1chr)(first, Address(needle), noreg);
-+    slli(t0, needle_len, needle_chr_shift);
-+    add(needle, needle, t0);
-+    neg(nlen_neg, t0);
-+    slli(t0, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, t0);
-+    neg(hlen_neg, t0);
++#define __ masm->
 +
-+    bind(FIRST_LOOP);
-+    add(t0, haystack, hlen_neg);
-+    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
-+    beq(first, ch2, STR1_LOOP);
++#define BUFFER_SIZE 30*wordSize
 +
-+    bind(STR2_NEXT);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, FIRST_LOOP);
-+    j(NOMATCH);
++// Instead of issuing a LoadLoad barrier we create an address
++// dependency between loads; this might be more efficient.
 +
-+    bind(STR1_LOOP);
-+    add(nlen_tmp, nlen_neg, needle_chr_size);
-+    add(hlen_tmp, hlen_neg, haystack_chr_size);
-+    bgez(nlen_tmp, MATCH);
++// Common register usage:
++// x10/f10:      result
++// c_rarg0:    jni env
++// c_rarg1:    obj
++// c_rarg2:    jfield id
 +
-+    bind(STR1_NEXT);
-+    add(ch1, needle, nlen_tmp);
-+    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
-+    add(ch2, haystack, hlen_tmp);
-+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
-+    bne(ch1, ch2, STR2_NEXT);
-+    add(nlen_tmp, nlen_tmp, needle_chr_size);
-+    add(hlen_tmp, hlen_tmp, haystack_chr_size);
-+    bltz(nlen_tmp, STR1_NEXT);
-+    j(MATCH);
++static const Register robj          = x13;
++static const Register rcounter      = x14;
++static const Register roffset       = x15;
++static const Register rcounter_addr = x16;
++static const Register result        = x17;
 +
-+    bind(DOSHORT);
-+    if (needle_isL == haystack_isL) {
-+      sub(t0, needle_len, 2);
-+      bltz(t0, DO1);
-+      bgtz(t0, DO3);
-+    }
++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
++  const char *name;
++  switch (type) {
++    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
++    case T_BYTE:    name = "jni_fast_GetByteField";    break;
++    case T_CHAR:    name = "jni_fast_GetCharField";    break;
++    case T_SHORT:   name = "jni_fast_GetShortField";   break;
++    case T_INT:     name = "jni_fast_GetIntField";     break;
++    case T_LONG:    name = "jni_fast_GetLongField";    break;
++    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
++    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
++    default:        ShouldNotReachHere();
++      name = NULL;  // unreachable
 +  }
++  ResourceMark rm;
++  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
++  CodeBuffer cbuf(blob);
++  MacroAssembler* masm = new MacroAssembler(&cbuf);
++  address fast_entry = __ pc();
 +
-+  if (needle_con_cnt == 4) {
-+    Label CH1_LOOP;
-+    (this->*load_4chr)(ch1, Address(needle), noreg);
-+    sub(result_tmp, haystack_len, 4);
-+    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
++  Label slow;
++  int32_t offset = 0;
++  __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset);
++  __ addi(rcounter_addr, rcounter_addr, offset);
 +
-+    bind(CH1_LOOP);
-+    add(ch2, haystack, hlen_neg);
-+    (this->*load_4chr)(ch2, Address(ch2), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, CH1_LOOP);
-+    j(NOMATCH);
-+  }
++  Address safepoint_counter_addr(rcounter_addr, 0);
++  __ lwu(rcounter, safepoint_counter_addr);
++  // An even value means there are no ongoing safepoint operations
++  __ andi(t0, rcounter, 1);
++  __ bnez(t0, slow);
 +
-+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
-+    Label CH1_LOOP;
-+    BLOCK_COMMENT("string_indexof DO2 {");
-+    bind(DO2);
-+    (this->*load_2chr)(ch1, Address(needle), noreg);
-+    if (needle_con_cnt == 2) {
-+      sub(result_tmp, haystack_len, 2);
-+    }
-+    slli(tmp3, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
++  if (JvmtiExport::can_post_field_access()) {
++    // Using barrier to order wrt. JVMTI check and load of result.
++    __ membar(MacroAssembler::LoadLoad);
 +
-+    bind(CH1_LOOP);
-+    add(tmp3, haystack, hlen_neg);
-+    (this->*load_2chr)(ch2, Address(tmp3), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, CH1_LOOP);
-+    j(NOMATCH);
-+    BLOCK_COMMENT("} string_indexof DO2");
++    // Check to see if a field access watch has been set before we
++    // take the fast path.
++    int32_t offset2;
++    __ la_patchable(result,
++                    ExternalAddress((address) JvmtiExport::get_field_access_count_addr()),
++                    offset2);
++    __ lwu(result, Address(result, offset2));
++    __ bnez(result, slow);
++
++    __ mv(robj, c_rarg1);
++  } else {
++    // Using address dependency to order wrt. load of result.
++    __ xorr(robj, c_rarg1, rcounter);
++    __ xorr(robj, robj, rcounter);               // obj, since
++                                                 // robj ^ rcounter ^ rcounter == robj
++                                                 // robj is address dependent on rcounter.
 +  }
 +
-+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
-+    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
-+    BLOCK_COMMENT("string_indexof DO3 {");
-+
-+    bind(DO3);
-+    (this->*load_2chr)(first, Address(needle), noreg);
-+    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
-+    if (needle_con_cnt == 3) {
-+      sub(result_tmp, haystack_len, 3);
-+    }
-+    slli(hlen_tmp, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, hlen_tmp);
-+    neg(hlen_neg, hlen_tmp);
-+
-+    bind(FIRST_LOOP);
-+    add(ch2, haystack, hlen_neg);
-+    (this->*load_2chr)(ch2, Address(ch2), noreg);
-+    beq(first, ch2, STR1_LOOP);
-+
-+    bind(STR2_NEXT);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, FIRST_LOOP);
-+    j(NOMATCH);
++  // Both robj and t0 are clobbered by try_resolve_jobject_in_native.
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  assert_cond(bs != NULL);
++  bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow);
 +
-+    bind(STR1_LOOP);
-+    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
-+    add(ch2, haystack, hlen_tmp);
-+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
-+    bne(ch1, ch2, STR2_NEXT);
-+    j(MATCH);
-+    BLOCK_COMMENT("} string_indexof DO3");
-+  }
++  __ srli(roffset, c_rarg2, 2);                // offset
 +
-+  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
-+    Label DO1_LOOP;
++  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
++  speculative_load_pclist[count] = __ pc();   // Used by the segfault handler
++  __ add(roffset, robj, roffset);
 +
-+    BLOCK_COMMENT("string_indexof DO1 {");
-+    bind(DO1);
-+    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
-+    sub(result_tmp, haystack_len, 1);
-+    mv(tmp3, result_tmp);
-+    if (haystack_chr_shift) {
-+      slli(tmp3, result_tmp, haystack_chr_shift);
++  switch (type) {
++    case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break;
++    case T_BYTE:    __ lb(result, Address(roffset, 0)); break;
++    case T_CHAR:    __ lhu(result, Address(roffset, 0)); break;
++    case T_SHORT:   __ lh(result, Address(roffset, 0)); break;
++    case T_INT:     __ lw(result, Address(roffset, 0)); break;
++    case T_LONG:    __ ld(result, Address(roffset, 0)); break;
++    case T_FLOAT: {
++      __ flw(f28, Address(roffset, 0)); // f28 as temporaries
++      __ fmv_x_w(result, f28); // f{31--0}-->x
++      break;
 +    }
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
-+
-+    bind(DO1_LOOP);
-+    add(tmp3, haystack, hlen_neg);
-+    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, DO1_LOOP);
-+    BLOCK_COMMENT("} string_indexof DO1");
++    case T_DOUBLE: {
++      __ fld(f28, Address(roffset, 0)); // f28 as temporaries
++      __ fmv_x_d(result, f28); // d{63--0}-->x
++      break;
++    }
++    default:        ShouldNotReachHere();
 +  }
 +
-+  bind(NOMATCH);
-+  mv(result, -1);
-+  j(DONE);
++  // Using acquire: Order JVMTI check and load of result wrt. succeeding check
++  // (LoadStore for volatile field).
++  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
 +
-+  bind(MATCH);
-+  srai(t0, hlen_neg, haystack_chr_shift);
-+  add(result, result_tmp, t0);
++  __ lw(t0, safepoint_counter_addr);
++  __ bne(rcounter, t0, slow);
 +
-+  bind(DONE);
-+}
++  switch (type) {
++    case T_FLOAT:   __ fmv_w_x(f10, result); break;
++    case T_DOUBLE:  __ fmv_d_x(f10, result); break;
++    default:        __ mv(x10, result);   break;
++  }
++  __ ret();
 +
-+void MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
-+                                     VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) {
-+  Label loop;
-+  Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16;
++  slowcase_entry_pclist[count++] = __ pc();
++  __ bind(slow);
++  address slow_case_addr;
++  switch (type) {
++    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
++    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
++    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
++    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
++    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
++    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
++    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
++    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
++    default:        ShouldNotReachHere();
++      slow_case_addr = NULL;  // unreachable
++  }
 +
-+  bind(loop);
-+  vsetvli(tmp1, cnt, sew, Assembler::m2);
-+  vlex_v(vr1, a1, sew);
-+  vlex_v(vr2, a2, sew);
-+  vmsne_vv(vrs, vr1, vr2);
-+  vfirst_m(tmp2, vrs);
-+  bgez(tmp2, DONE);
-+  sub(cnt, cnt, tmp1);
-+  if (!islatin) {
-+    slli(tmp1, tmp1, 1); // get byte counts
++  {
++    __ enter();
++    int32_t tmp_offset = 0;
++    __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset);
++    __ jalr(x1, t0, tmp_offset);
++    __ leave();
++    __ ret();
 +  }
-+  add(a1, a1, tmp1);
-+  add(a2, a2, tmp1);
-+  bnez(cnt, loop);
++  __ flush();
 +
-+  mv(result, true);
++  return fast_entry;
 +}
 +
-+void MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) {
-+  Label DONE;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
-+
-+  BLOCK_COMMENT("string_equals_v {");
 +
-+  mv(result, false);
++address JNI_FastGetField::generate_fast_get_boolean_field() {
++  return generate_fast_get_int_field0(T_BOOLEAN);
++}
 +
-+  if (elem_size == 2) {
-+    srli(cnt, cnt, 1);
-+  }
++address JNI_FastGetField::generate_fast_get_byte_field() {
++  return generate_fast_get_int_field0(T_BYTE);
++}
 +
-+  element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
++address JNI_FastGetField::generate_fast_get_char_field() {
++  return generate_fast_get_int_field0(T_CHAR);
++}
 +
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_equals_v");
++address JNI_FastGetField::generate_fast_get_short_field() {
++  return generate_fast_get_int_field0(T_SHORT);
 +}
 +
-+// used by C2 ClearArray patterns.
-+// base: Address of a buffer to be zeroed
-+// cnt: Count in HeapWords
-+//
-+// base, cnt, v0, v1 and t0 are clobbered.
-+void MacroAssembler::clear_array_v(Register base, Register cnt) {
-+  Label loop;
++address JNI_FastGetField::generate_fast_get_int_field() {
++  return generate_fast_get_int_field0(T_INT);
++}
 +
-+  // making zero words
-+  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
-+  vxor_vv(v0, v0, v0);
++address JNI_FastGetField::generate_fast_get_long_field() {
++  return generate_fast_get_int_field0(T_LONG);
++}
 +
-+  bind(loop);
-+  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
-+  vse64_v(v0, base);
-+  sub(cnt, cnt, t0);
-+  shadd(base, t0, base, t0, 3);
-+  bnez(cnt, loop);
++address JNI_FastGetField::generate_fast_get_float_field() {
++  return generate_fast_get_int_field0(T_FLOAT);
 +}
 +
-+void MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result,
-+                                     Register cnt1, int elem_size) {
-+  Label DONE;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
-+  Register cnt2 = tmp2;
-+  int length_offset = arrayOopDesc::length_offset_in_bytes();
-+  int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
++address JNI_FastGetField::generate_fast_get_double_field() {
++  return generate_fast_get_int_field0(T_DOUBLE);
++}
+diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
+new file mode 100644
+index 00000000000..83ffcc55d83
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
+@@ -0,0 +1,106 @@
++/*
++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  BLOCK_COMMENT("arrays_equals_v {");
++#ifndef CPU_RISCV_JNITYPES_RISCV_HPP
++#define CPU_RISCV_JNITYPES_RISCV_HPP
 +
-+  // if (a1 == a2), return true
-+  mv(result, true);
-+  oop_equal(a1, a2, DONE);
++#include "jni.h"
++#include "memory/allStatic.hpp"
++#include "oops/oop.hpp"
 +
-+  mv(result, false);
-+  // if a1 == null or a2 == null, return false
-+  beqz(a1, DONE);
-+  beqz(a2, DONE);
-+  // if (a1.length != a2.length), return false
-+  lwu(cnt1, Address(a1, length_offset));
-+  lwu(cnt2, Address(a2, length_offset));
-+  bne(cnt1, cnt2, DONE);
++// This file holds platform-dependent routines used to write primitive jni
++// types to the array of arguments passed into JavaCalls::call
 +
-+  la(a1, Address(a1, base_offset));
-+  la(a2, Address(a2, base_offset));
++class JNITypes : private AllStatic {
++  // These functions write a java primitive type (in native format)
++  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
++  // I.e., they are functionally 'push' operations if they have a 'pos'
++  // formal parameter.  Note that jlong's and jdouble's are written
++  // _in reverse_ of the order in which they appear in the interpreter
++  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
++  // reverse the argument list constructed by JavaCallArguments (see
++  // javaCalls.hpp).
 +
-+  element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
++public:
++  // Ints are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_int(jint  from, intptr_t *to)           { *(jint *)(to +   0  ) =  from; }
++  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(jint *)(to + pos++) =  from; }
++  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; }
 +
-+  bind(DONE);
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to+1).
++  static inline void put_long(jlong  from, intptr_t *to) {
++    *(jlong*) (to + 1) = from;
++  }
 +
-+  BLOCK_COMMENT("} arrays_equals_v");
-+}
++  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = from;
++    pos += 2;
++  }
 +
-+void MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2,
-+                                      Register result, Register tmp1, Register tmp2, int encForm) {
-+  Label DIFFERENCE, DONE, L, loop;
-+  bool encLL = encForm == StrIntrinsicNode::LL;
-+  bool encLU = encForm == StrIntrinsicNode::LU;
-+  bool encUL = encForm == StrIntrinsicNode::UL;
++  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = *from;
++    pos += 2;
++  }
 +
-+  bool str1_isL = encLL || encLU;
-+  bool str2_isL = encLL || encUL;
++  // Oops are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); }
++  static inline void    put_obj(jobject       from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; }
 +
-+  int minCharsInWord = encLL ? wordSize : wordSize / 2;
++  // Floats are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
++  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
++  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
 +
-+  BLOCK_COMMENT("string_compare {");
++#undef _JNI_SLOT_OFFSET
++#define _JNI_SLOT_OFFSET 1
++  // Doubles are stored in native word format in one JavaCallArgument
++  // slot at *(to+1).
++  static inline void put_double(jdouble  from, intptr_t *to) {
++    *(jdouble*) (to + 1) = from;
++  }
 +
-+  // for Lating strings, 1 byte for 1 character
-+  // for UTF16 strings, 2 bytes for 1 character
-+  if (!str1_isL)
-+    sraiw(cnt1, cnt1, 1);
-+  if (!str2_isL)
-+    sraiw(cnt2, cnt2, 1);
++  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = from;
++    pos += 2;
++  }
 +
-+  // if str1 == str2, return the difference
-+  // save the minimum of the string lengths in cnt2.
-+  sub(result, cnt1, cnt2);
-+  bgt(cnt1, cnt2, L);
-+  mv(cnt2, cnt1);
-+  bind(L);
++  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = *from;
++    pos += 2;
++  }
 +
-+  if (str1_isL == str2_isL) { // LL or UU
-+    element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE);
-+    j(DONE);
-+  } else { // LU or UL
-+    Register strL = encLU ? str1 : str2;
-+    Register strU = encLU ? str2 : str1;
-+    VectorRegister vstr1 = encLU ? v4 : v0;
-+    VectorRegister vstr2 = encLU ? v0 : v4;
++  // The get_xxx routines, on the other hand, actually _do_ fetch
++  // java primitive types from the interpreter stack.
++  // No need to worry about alignment on Intel.
++  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
++  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
++  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
++  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
++  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
++#undef _JNI_SLOT_OFFSET
++};
 +
-+    bind(loop);
-+    vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2);
-+    vle8_v(vstr1, strL);
-+    vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4);
-+    vzext_vf2(vstr2, vstr1);
-+    vle16_v(vstr1, strU);
-+    vmsne_vv(v0, vstr2, vstr1);
-+    vfirst_m(tmp2, v0);
-+    bgez(tmp2, DIFFERENCE);
-+    sub(cnt2, cnt2, tmp1);
-+    add(strL, strL, tmp1);
-+    shadd(strU, tmp1, strU, tmp1, 1);
-+    bnez(cnt2, loop);
-+    j(DONE);
-+  }
-+  bind(DIFFERENCE);
-+  slli(tmp1, tmp2, 1);
-+  add(str1, str1, str1_isL ? tmp2 : tmp1);
-+  add(str2, str2, str2_isL ? tmp2 : tmp1);
-+  str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0));
-+  str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0));
-+  sub(result, tmp1, tmp2);
++#endif // CPU_RISCV_JNITYPES_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+new file mode 100644
+index 00000000000..86710295444
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -0,0 +1,4016 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  bind(DONE);
-+}
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "compiler/disassembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/accessDecorators.hpp"
++#include "oops/compressedOops.inline.hpp"
++#include "oops/klass.inline.hpp"
++#include "oops/oop.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/jniHandles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.hpp"
++#include "utilities/powerOfTwo.hpp"
++#ifdef COMPILER2
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
++#include "opto/output.hpp"
++#endif
 +
-+address MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) {
-+  Label loop;
-+  assert_different_registers(src, dst, len, tmp, t0);
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#endif
++#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
 +
-+  BLOCK_COMMENT("byte_array_inflate_v {");
-+  bind(loop);
-+  vsetvli(tmp, len, Assembler::e8, Assembler::m2);
-+  vle8_v(v2, src);
-+  vsetvli(t0, len, Assembler::e16, Assembler::m4);
-+  vzext_vf2(v0, v2);
-+  vse16_v(v0, dst);
-+  sub(len, len, tmp);
-+  add(src, src, tmp);
-+  shadd(dst, tmp, dst, tmp, 1);
-+  bnez(len, loop);
-+  BLOCK_COMMENT("} byte_array_inflate_v");
-+  postcond(pc() != badAddress);
-+  return pc();
++static void pass_arg0(MacroAssembler* masm, Register arg) {
++  if (c_rarg0 != arg) {
++    assert_cond(masm != NULL);
++    masm->mv(c_rarg0, arg);
++  }
 +}
 +
-+// Compress char[] array to byte[].
-+// result: the array length if every element in array can be encoded; 0, otherwise.
-+void MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) {
-+  Label done;
-+  encode_iso_array_v(src, dst, len, result, tmp);
-+  beqz(len, done);
-+  mv(result, zr);
-+  bind(done);
++static void pass_arg1(MacroAssembler* masm, Register arg) {
++  if (c_rarg1 != arg) {
++    assert_cond(masm != NULL);
++    masm->mv(c_rarg1, arg);
++  }
 +}
 +
-+// result: the number of elements had been encoded.
-+void MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) {
-+  Label loop, DIFFERENCE, DONE;
++static void pass_arg2(MacroAssembler* masm, Register arg) {
++  if (c_rarg2 != arg) {
++    assert_cond(masm != NULL);
++    masm->mv(c_rarg2, arg);
++  }
++}
 +
-+  BLOCK_COMMENT("encode_iso_array_v {");
-+  mv(result, 0);
++static void pass_arg3(MacroAssembler* masm, Register arg) {
++  if (c_rarg3 != arg) {
++    assert_cond(masm != NULL);
++    masm->mv(c_rarg3, arg);
++  }
++}
 +
-+  bind(loop);
-+  mv(tmp, 0xff);
-+  vsetvli(t0, len, Assembler::e16, Assembler::m2);
-+  vle16_v(v2, src);
-+  // if element > 0xff, stop
-+  vmsgtu_vx(v1, v2, tmp);
-+  vfirst_m(tmp, v1);
-+  vmsbf_m(v0, v1);
-+  // compress char to byte
-+  vsetvli(t0, len, Assembler::e8);
-+  vncvt_x_x_w(v1, v2, Assembler::v0_t);
-+  vse8_v(v1, dst, Assembler::v0_t);
++void MacroAssembler::align(int modulus, int extra_offset) {
++  CompressibleRegion cr(this);
++  while ((offset() + extra_offset) % modulus != 0) { nop(); }
++}
 +
-+  bgez(tmp, DIFFERENCE);
-+  add(result, result, t0);
-+  add(dst, dst, t0);
-+  sub(len, len, t0);
-+  shadd(src, t0, src, t0, 1);
-+  bnez(len, loop);
-+  j(DONE);
++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
++  call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
++}
 +
-+  bind(DIFFERENCE);
-+  add(result, result, tmp);
++// Implementation of call_VM versions
 +
-+  bind(DONE);
-+  BLOCK_COMMENT("} encode_iso_array_v");
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             bool check_exceptions) {
++  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 +}
 +
-+address MacroAssembler::has_negatives_v(Register ary, Register len, Register result, Register tmp) {
-+  Label loop, DONE;
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  pass_arg1(this, arg_1);
++  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
++}
 +
-+  mv(result, true);
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  pass_arg1(this, arg_1);
++  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
++}
 +
-+  bind(loop);
-+  vsetvli(t0, len, Assembler::e8, Assembler::m4);
-+  vle8_v(v0, ary);
-+  // if element highest bit is set, return true
-+  vmslt_vx(v0, v0, zr);
-+  vfirst_m(tmp, v0);
-+  bgez(tmp, DONE);
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  assert(arg_1 != c_rarg3, "smashed arg");
++  assert(arg_2 != c_rarg3, "smashed arg");
++  pass_arg3(this, arg_3);
 +
-+  sub(len, len, t0);
-+  add(ary, ary, t0);
-+  bnez(len, loop);
-+  mv(result, false);
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
 +
-+  bind(DONE);
-+  postcond(pc() != badAddress);
-+  return pc();
++  pass_arg1(this, arg_1);
++  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
 +}
 +
-+// string indexof
-+// compute index by trailing zeros
-+void MacroAssembler::compute_index(Register haystack, Register trailing_zero,
-+                                   Register match_mask, Register result,
-+                                   Register ch2, Register tmp,
-+                                   bool haystack_isL)
-+{
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  srl(match_mask, match_mask, trailing_zero);
-+  srli(match_mask, match_mask, 1);
-+  srli(tmp, trailing_zero, LogBitsPerByte);
-+  if (!haystack_isL) andi(tmp, tmp, 0xE);
-+  add(haystack, haystack, tmp);
-+  ld(ch2, Address(haystack));
-+  if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
-+  add(result, result, tmp);
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             int number_of_arguments,
++                             bool check_exceptions) {
++  call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
 +}
 +
-+// string indexof
-+// Find pattern element in src, compute match mask,
-+// only the first occurrence of 0x80/0x8000 at low bits is the valid match index
-+// match mask patterns would be like:
-+// - 0x8080808080808080 (Latin1)
-+// - 0x8000800080008000 (UTF16)
-+void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
-+                                        Register mask1, Register mask2)
-+{
-+  xorr(src, pattern, src);
-+  sub(match_mask, src, mask1);
-+  orr(src, src, mask2);
-+  notr(src, src);
-+  andr(match_mask, match_mask, src);
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  pass_arg1(this, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
 +}
 +
-+// add two unsigned input and output carry
-+void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
-+{
-+  assert_different_registers(dst, carry);
-+  assert_different_registers(dst, src2);
-+  add(dst, src1, src2);
-+  sltu(carry, dst, src2);
-+}
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
 +
-+// add two input with carry
-+void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry)
-+{
-+  assert_different_registers(dst, carry);
-+  add(dst, src1, src2);
-+  add(dst, dst, carry);
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  pass_arg1(this, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
 +}
 +
-+// add two unsigned input with carry and output carry
-+void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry)
-+{
-+  assert_different_registers(dst, src2);
-+  adc(dst, src1, src2, carry);
-+  sltu(carry, dst, src2);
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  assert(arg_1 != c_rarg3, "smashed arg");
++  assert(arg_2 != c_rarg3, "smashed arg");
++  pass_arg3(this, arg_3);
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  pass_arg1(this, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
 +}
 +
-+void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
-+                                     Register src1, Register src2, Register carry)
-+{
-+  cad(dest_lo, dest_lo, src1, carry);
-+  add(dest_hi, dest_hi, carry);
-+  cad(dest_lo, dest_lo, src2, carry);
-+  add(final_dest_hi, dest_hi, carry);
-+}
++// these are no-ops overridden by InterpreterMacroAssembler
++void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
++void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
 +
-+// Code for BigInteger::mulAdd instrinsic
-+// out     = x10
-+// in      = x11
-+// offset  = x12  (already out.length-offset)
-+// len     = x13
-+// k       = x14
-+void MacroAssembler::mul_add(Register out, Register in, Register offset,
-+                             Register len, Register k, Register tmp1, Register tmp2) {
-+  Label L_loop_1, L_loop_2, L_end, L_not_zero;
-+  bnez(len, L_not_zero);
-+  mv(out, zr);
-+  j(L_end);
-+  bind(L_not_zero);
-+  zero_extend(k, k, 32);
-+  shadd(offset, offset, out, t0, LogBytesPerInt);
-+  shadd(in, len, in, t0, LogBytesPerInt);
-+  mv(out, zr);
++// Calls to C land
++//
++// When entering C land, the fp, & esp of the last Java frame have to be recorded
++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
++// has to be reset to 0. This is required to allow proper stack traversal.
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         Register last_java_pc,
++                                         Register tmp) {
 +
-+  if (AvoidUnalignedAccesses) {
-+    // if in and offset are both 8 bytes aligned.
-+    orr(t0, in, offset);
-+    andi(t0, t0, 0x7);
-+    beqz(t0, L_loop_2);
-+  } else {
-+    j(L_loop_2);
++  if (last_java_pc->is_valid()) {
++      sd(last_java_pc, Address(xthread,
++                               JavaThread::frame_anchor_offset() +
++                               JavaFrameAnchor::last_Java_pc_offset()));
 +  }
 +
-+  bind(L_loop_1);
-+  sub(in, in, 4);
-+  lwu(t0, Address(in, 0));
-+  mul(t1, t0, k);
-+  add(t0, t1, out);
-+  sub(offset, offset, 4);
-+  lwu(t1, Address(offset, 0));
-+  add(t0, t0, t1);
-+  sw(t0, Address(offset));
-+  srli(out, t0, 32);
-+  sub(len, len, 1);
-+  beqz(len, L_end);
-+  j(L_loop_1);
-+
-+
-+  bind(L_loop_2);
-+  Label L_one;
-+  sub(len, len, 1);
-+  bltz(len, L_end);
-+  sub(len, len, 1);
-+  bltz(len, L_one);
-+
-+  sub(in, in, 8);
-+  ld(tmp1, Address(in, 0));
-+  ror_imm(tmp1, tmp1, 32); // convert to little-endian
-+
-+  const Register carry = out;
-+  const Register src1_hi = t0;
-+  const Register src1_lo = tmp2;
-+  const Register src2 = t1;
-+
-+  mulhu(src1_hi, k, tmp1);
-+  mul(src1_lo, k, tmp1);
-+  sub(offset, offset, 8);
-+  ld(src2, Address(offset, 0));
-+  ror_imm(src2, src2, 32, tmp1);
-+  add2_with_carry(carry, src1_hi, src1_lo, carry, src2, tmp1);
-+  ror_imm(src1_lo, src1_lo, 32, tmp1); // back to big-endian
-+  sd(src1_lo, Address(offset, 0));
-+  j(L_loop_2);
-+
-+  bind(L_one);
-+  sub(in, in, 4);
-+  lwu(t0, Address(in, 0));
-+  mul(t1, t0, k);
-+  add(t0, t1, out);
-+  sub(offset, offset, 4);
-+  lwu(t1, Address(offset, 0));
-+  add(t0, t0, t1);
-+  sw(t0, Address(offset));
-+  srli(out, t0, 32);
-+
-+  bind(L_end);
-+}
-+
-+/**
-+ * Multiply 32 bit by 32 bit first loop.
-+ */
-+void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
-+                                           Register y, Register y_idx, Register z,
-+                                           Register carry, Register product,
-+                                           Register idx, Register kdx)
-+{
-+  // long carry = 0;
-+  // for (int j=ystart, k=ystart+1+xstart; j >= 0; j--, k--) {
-+  //     long product = (y[j] & LONG_MASK) *
-+  //                    (x[xstart] & LONG_MASK) + carry;
-+  //     z[k] = (int)product;
-+  //     carry = product >>> 32;
-+  // }
-+  // z[xstart] = (int)carry;
++  // determine last_java_sp register
++  if (last_java_sp == sp) {
++    mv(tmp, sp);
++    last_java_sp = tmp;
++  } else if (!last_java_sp->is_valid()) {
++    last_java_sp = esp;
++  }
 +
-+  Label L_first_loop, L_first_loop_exit;
++  sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset()));
 +
-+  shadd(t0, xstart, x, t0, LogBytesPerInt);
-+  lwu(x_xstart, Address(t0, 0));
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset()));
++  }
++}
 +
-+  bind(L_first_loop);
-+  sub(idx, idx, 1);
-+  bltz(idx, L_first_loop_exit);
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc,
++                                         Register tmp) {
++  assert(last_java_pc != NULL, "must provide a valid PC");
 +
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  lwu(y_idx, Address(t0, 0));
-+  mul(product, x_xstart, y_idx);
-+  add(product, product, carry);
-+  srli(carry, product, 32);
-+  sub(kdx, kdx, 1);
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sw(product, Address(t0, 0));
-+  j(L_first_loop);
++  la(tmp, last_java_pc);
++  sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
 +
-+  bind(L_first_loop_exit);
++  set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp);
 +}
 +
-+/**
-+ * Multiply 64 bit by 64 bit first loop.
-+ */
-+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
-+                                           Register y, Register y_idx, Register z,
-+                                           Register carry, Register product,
-+                                           Register idx, Register kdx)
-+{
-+  //
-+  //  jlong carry, x[], y[], z[];
-+  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
-+  //    huge_128 product = y[idx] * x[xstart] + carry;
-+  //    z[kdx] = (jlong)product;
-+  //    carry  = (jlong)(product >>> 64);
-+  //  }
-+  //  z[xstart] = carry;
-+  //
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         Label &L,
++                                         Register tmp) {
++  if (L.is_bound()) {
++    set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
++  } else {
++    InstructionMark im(this);
++    L.add_patch_at(code(), locator());
++    set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
++  }
++}
 +
-+  Label L_first_loop, L_first_loop_exit;
-+  Label L_one_x, L_one_y, L_multiply;
++void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
++  // we must set sp to zero to clear frame
++  sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
 +
-+  sub(xstart, xstart, 1);
-+  bltz(xstart, L_one_x);
++  // must clear fp, so that compiled frames are not confused; it is
++  // possible that we need it only for debugging
++  if (clear_fp) {
++    sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
++  }
 +
-+  shadd(t0, xstart, x, t0, LogBytesPerInt);
-+  ld(x_xstart, Address(t0, 0));
-+  ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
++  // Always clear the pc because it could have been set by make_walkable()
++  sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
++}
 +
-+  bind(L_first_loop);
-+  sub(idx, idx, 1);
-+  bltz(idx, L_first_loop_exit);
-+  sub(idx, idx, 1);
-+  bltz(idx, L_one_y);
++void MacroAssembler::call_VM_base(Register oop_result,
++                                  Register java_thread,
++                                  Register last_java_sp,
++                                  address  entry_point,
++                                  int      number_of_arguments,
++                                  bool     check_exceptions) {
++   // determine java_thread register
++  if (!java_thread->is_valid()) {
++    java_thread = xthread;
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = esp;
++  }
 +
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  ld(y_idx, Address(t0, 0));
-+  ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
-+  bind(L_multiply);
++  // debugging support
++  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
++  assert(java_thread == xthread, "unexpected register");
 +
-+  mulhu(t0, x_xstart, y_idx);
-+  mul(product, x_xstart, y_idx);
-+  cad(product, product, carry, t1);
-+  adc(carry, t0, zr, t1);
++  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
++  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
 +
-+  sub(kdx, kdx, 2);
-+  ror_imm(product, product, 32); // back to big-endian
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sd(product, Address(t0, 0));
++  // push java thread (becomes first argument of C function)
++  mv(c_rarg0, java_thread);
 +
-+  j(L_first_loop);
++  // set last Java frame before call
++  assert(last_java_sp != fp, "can't use fp");
 +
-+  bind(L_one_y);
-+  lwu(y_idx, Address(y, 0));
-+  j(L_multiply);
++  Label l;
++  set_last_Java_frame(last_java_sp, fp, l, t0);
 +
-+  bind(L_one_x);
-+  lwu(x_xstart, Address(x, 0));
-+  j(L_first_loop);
++  // do the call, remove parameters
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
 +
-+  bind(L_first_loop_exit);
-+}
++  // reset last Java frame
++  // Only interpreter should have to clear fp
++  reset_last_Java_frame(true);
 +
-+/**
-+ * Multiply 128 bit by 128. Unrolled inner loop.
-+ *
-+ */
-+void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
-+                                             Register carry, Register carry2,
-+                                             Register idx, Register jdx,
-+                                             Register yz_idx1, Register yz_idx2,
-+                                             Register tmp, Register tmp3, Register tmp4,
-+                                             Register tmp6, Register product_hi)
-+{
-+  //   jlong carry, x[], y[], z[];
-+  //   int kdx = xstart+1;
-+  //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
-+  //     huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
-+  //     jlong carry2  = (jlong)(tmp3 >>> 64);
-+  //     huge_128 tmp4 = (y[idx]   * product_hi) + z[kdx+idx] + carry2;
-+  //     carry  = (jlong)(tmp4 >>> 64);
-+  //     z[kdx+idx+1] = (jlong)tmp3;
-+  //     z[kdx+idx] = (jlong)tmp4;
-+  //   }
-+  //   idx += 2;
-+  //   if (idx > 0) {
-+  //     yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
-+  //     z[kdx+idx] = (jlong)yz_idx1;
-+  //     carry  = (jlong)(yz_idx1 >>> 64);
-+  //   }
-+  //
-+
-+  Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
-+
-+  srli(jdx, idx, 2);
-+
-+  bind(L_third_loop);
++   // C++ interp handles this in the interpreter
++  check_and_handle_popframe(java_thread);
++  check_and_handle_earlyret(java_thread);
 +
-+  sub(jdx, jdx, 1);
-+  bltz(jdx, L_third_loop_exit);
-+  sub(idx, idx, 4);
++  if (check_exceptions) {
++    // check for pending exceptions (java_thread is set upon return)
++    ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
++    Label ok;
++    beqz(t0, ok);
++    int32_t offset = 0;
++    la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset);
++    jalr(x0, t0, offset);
++    bind(ok);
++  }
 +
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  ld(yz_idx2, Address(t0, 0));
-+  ld(yz_idx1, Address(t0, wordSize));
++  // get oop result if there is one and reset the value in the thread
++  if (oop_result->is_valid()) {
++    get_vm_result(oop_result, java_thread);
++  }
++}
 +
-+  shadd(tmp6, idx, z, t0, LogBytesPerInt);
++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
++  ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
++  sd(zr, Address(java_thread, JavaThread::vm_result_offset()));
++  verify_oop(oop_result, "broken oop in call_VM_base");
++}
 +
-+  ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
-+  ror_imm(yz_idx2, yz_idx2, 32);
++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
++  ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
++  sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
++}
 +
-+  ld(t1, Address(tmp6, 0));
-+  ld(t0, Address(tmp6, wordSize));
++void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) {
++  assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
++  assert_different_registers(klass, xthread, tmp);
 +
-+  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
-+  mulhu(tmp4, product_hi, yz_idx1);
++  Label L_fallthrough, L_tmp;
++  if (L_fast_path == NULL) {
++    L_fast_path = &L_fallthrough;
++  } else if (L_slow_path == NULL) {
++    L_slow_path = &L_fallthrough;
++  }
 +
-+  ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
-+  ror_imm(t1, t1, 32, tmp);
++  // Fast path check: class is fully initialized
++  lbu(tmp, Address(klass, InstanceKlass::init_state_offset()));
++  sub(tmp, tmp, InstanceKlass::fully_initialized);
++  beqz(tmp, *L_fast_path);
 +
-+  mul(tmp, product_hi, yz_idx2); //  yz_idx2 * product_hi -> carry2:tmp
-+  mulhu(carry2, product_hi, yz_idx2);
++  // Fast path check: current thread is initializer thread
++  ld(tmp, Address(klass, InstanceKlass::init_thread_offset()));
 +
-+  cad(tmp3, tmp3, carry, carry);
-+  adc(tmp4, tmp4, zr, carry);
-+  cad(tmp3, tmp3, t0, t0);
-+  cadc(tmp4, tmp4, tmp, t0);
-+  adc(carry, carry2, zr, t0);
-+  cad(tmp4, tmp4, t1, carry2);
-+  adc(carry, carry, zr, carry2);
++  if (L_slow_path == &L_fallthrough) {
++    beq(xthread, tmp, *L_fast_path);
++    bind(*L_slow_path);
++  } else if (L_fast_path == &L_fallthrough) {
++    bne(xthread, tmp, *L_slow_path);
++    bind(*L_fast_path);
++  } else {
++    Unimplemented();
++  }
++}
 +
-+  ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
-+  ror_imm(tmp4, tmp4, 32);
-+  sd(tmp4, Address(tmp6, 0));
-+  sd(tmp3, Address(tmp6, wordSize));
++void MacroAssembler::verify_oop(Register reg, const char* s) {
++  if (!VerifyOops) { return; }
 +
-+  j(L_third_loop);
++  // Pass register number to verify_oop_subroutine
++  const char* b = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("verify_oop: %s: %s", reg->name(), s);
++    b = code_string(ss.as_string());
++  }
++  BLOCK_COMMENT("verify_oop {");
 +
-+  bind(L_third_loop_exit);
++  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 +
-+  andi(idx, idx, 0x3);
-+  beqz(idx, L_post_third_loop_done);
++  mv(c_rarg0, reg); // c_rarg0 : x10
++  li(t0, (uintptr_t)(address)b);
 +
-+  Label L_check_1;
-+  sub(idx, idx, 2);
-+  bltz(idx, L_check_1);
++  // call indirectly to solve generation ordering problem
++  int32_t offset = 0;
++  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
++  ld(t1, Address(t1, offset));
++  jalr(t1);
 +
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  ld(yz_idx1, Address(t0, 0));
-+  ror_imm(yz_idx1, yz_idx1, 32);
++  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 +
-+  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
-+  mulhu(tmp4, product_hi, yz_idx1);
++  BLOCK_COMMENT("} verify_oop");
++}
 +
-+  shadd(t0, idx, z, t0, LogBytesPerInt);
-+  ld(yz_idx2, Address(t0, 0));
-+  ror_imm(yz_idx2, yz_idx2, 32, tmp);
++void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
++  if (!VerifyOops) {
++    return;
++  }
 +
-+  add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
++  const char* b = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("verify_oop_addr: %s", s);
++    b = code_string(ss.as_string());
++  }
++  BLOCK_COMMENT("verify_oop_addr {");
 +
-+  ror_imm(tmp3, tmp3, 32, tmp);
-+  sd(tmp3, Address(t0, 0));
++  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 +
-+  bind(L_check_1);
++  if (addr.uses(sp)) {
++    la(x10, addr);
++    ld(x10, Address(x10, 4 * wordSize));
++  } else {
++    ld(x10, addr);
++  }
 +
-+  andi(idx, idx, 0x1);
-+  sub(idx, idx, 1);
-+  bltz(idx, L_post_third_loop_done);
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  lwu(tmp4, Address(t0, 0));
-+  mul(tmp3, tmp4, product_hi); //  tmp4 * product_hi -> carry2:tmp3
-+  mulhu(carry2, tmp4, product_hi);
++  li(t0, (uintptr_t)(address)b);
 +
-+  shadd(t0, idx, z, t0, LogBytesPerInt);
-+  lwu(tmp4, Address(t0, 0));
++  // call indirectly to solve generation ordering problem
++  int32_t offset = 0;
++  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
++  ld(t1, Address(t1, offset));
++  jalr(t1);
 +
-+  add2_with_carry(carry2, carry2, tmp3, tmp4, carry);
++  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 +
-+  shadd(t0, idx, z, t0, LogBytesPerInt);
-+  sw(tmp3, Address(t0, 0));
-+  slli(t0, carry2, 32);
-+  srli(carry, tmp3, 32);
-+  orr(carry, carry, t0);
++  BLOCK_COMMENT("} verify_oop_addr");
++}
 +
-+  bind(L_post_third_loop_done);
++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
++                                         int extra_slot_offset) {
++  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
++  int stackElementSize = Interpreter::stackElementSize;
++  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
++#ifdef ASSERT
++  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
++  assert(offset1 - offset == stackElementSize, "correct arithmetic");
++#endif
++  if (arg_slot.is_constant()) {
++    return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
++  } else {
++    assert_different_registers(t0, arg_slot.as_register());
++    shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize));
++    return Address(t0, offset);
++  }
 +}
 +
-+/**
-+ * Code for BigInteger::multiplyToLen() instrinsic.
-+ *
-+ * x10: x
-+ * x11: xlen
-+ * x12: y
-+ * x13: ylen
-+ * x14: z
-+ * x15: zlen
-+ * x16: tmp1
-+ * x17: tmp2
-+ * x7:  tmp3
-+ * x28: tmp4
-+ * x29: tmp5
-+ * x30: tmp6
-+ * x31: tmp7
-+ */
-+void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
-+                                     Register z, Register zlen,
-+                                     Register tmp1, Register tmp2, Register tmp3, Register tmp4,
-+                                     Register tmp5, Register tmp6, Register product_hi)
-+{
-+  assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
++#ifndef PRODUCT
++extern "C" void findpc(intptr_t x);
++#endif
 +
-+  const Register idx = tmp1;
-+  const Register kdx = tmp2;
-+  const Register xstart = tmp3;
++void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
++{
++  // In order to get locks to work, we need to fake a in_VM state
++  if (ShowMessageBoxOnError) {
++    JavaThread* thread = JavaThread::current();
++    JavaThreadState saved_state = thread->thread_state();
++    thread->set_thread_state(_thread_in_vm);
++#ifndef PRODUCT
++    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
++      ttyLocker ttyl;
++      BytecodeCounter::print();
++    }
++#endif
++    if (os::message_box(msg, "Execution stopped, print registers?")) {
++      ttyLocker ttyl;
++      tty->print_cr(" pc = 0x%016lx", pc);
++#ifndef PRODUCT
++      tty->cr();
++      findpc(pc);
++      tty->cr();
++#endif
++      tty->print_cr(" x0 = 0x%016lx", regs[0]);
++      tty->print_cr(" x1 = 0x%016lx", regs[1]);
++      tty->print_cr(" x2 = 0x%016lx", regs[2]);
++      tty->print_cr(" x3 = 0x%016lx", regs[3]);
++      tty->print_cr(" x4 = 0x%016lx", regs[4]);
++      tty->print_cr(" x5 = 0x%016lx", regs[5]);
++      tty->print_cr(" x6 = 0x%016lx", regs[6]);
++      tty->print_cr(" x7 = 0x%016lx", regs[7]);
++      tty->print_cr(" x8 = 0x%016lx", regs[8]);
++      tty->print_cr(" x9 = 0x%016lx", regs[9]);
++      tty->print_cr("x10 = 0x%016lx", regs[10]);
++      tty->print_cr("x11 = 0x%016lx", regs[11]);
++      tty->print_cr("x12 = 0x%016lx", regs[12]);
++      tty->print_cr("x13 = 0x%016lx", regs[13]);
++      tty->print_cr("x14 = 0x%016lx", regs[14]);
++      tty->print_cr("x15 = 0x%016lx", regs[15]);
++      tty->print_cr("x16 = 0x%016lx", regs[16]);
++      tty->print_cr("x17 = 0x%016lx", regs[17]);
++      tty->print_cr("x18 = 0x%016lx", regs[18]);
++      tty->print_cr("x19 = 0x%016lx", regs[19]);
++      tty->print_cr("x20 = 0x%016lx", regs[20]);
++      tty->print_cr("x21 = 0x%016lx", regs[21]);
++      tty->print_cr("x22 = 0x%016lx", regs[22]);
++      tty->print_cr("x23 = 0x%016lx", regs[23]);
++      tty->print_cr("x24 = 0x%016lx", regs[24]);
++      tty->print_cr("x25 = 0x%016lx", regs[25]);
++      tty->print_cr("x26 = 0x%016lx", regs[26]);
++      tty->print_cr("x27 = 0x%016lx", regs[27]);
++      tty->print_cr("x28 = 0x%016lx", regs[28]);
++      tty->print_cr("x30 = 0x%016lx", regs[30]);
++      tty->print_cr("x31 = 0x%016lx", regs[31]);
++      BREAKPOINT;
++    }
++  }
++  fatal("DEBUG MESSAGE: %s", msg);
++}
 +
-+  const Register y_idx = tmp4;
-+  const Register carry = tmp5;
-+  const Register product = xlen;
-+  const Register x_xstart = zlen; // reuse register
++void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) {
++  Label done, not_weak;
++  beqz(value, done);           // Use NULL as-is.
 +
-+  mv(idx, ylen); // idx = ylen;
-+  mv(kdx, zlen); // kdx = xlen+ylen;
-+  mv(carry, zr); // carry = 0;
++  // Test for jweak tag.
++  andi(t0, value, JNIHandles::weak_tag_mask);
++  beqz(t0, not_weak);
 +
-+  Label L_multiply_64_or_128, L_done;
++  // Resolve jweak.
++  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
++                 Address(value, -JNIHandles::weak_tag_value), tmp, thread);
++  verify_oop(value);
++  j(done);
 +
-+  sub(xstart, xlen, 1);
-+  bltz(xstart, L_done);
++  bind(not_weak);
++  // Resolve (untagged) jobject.
++  access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
++  verify_oop(value);
++  bind(done);
++}
 +
-+  const Register jdx = tmp1;
++void MacroAssembler::stop(const char* msg) {
++  address ip = pc();
++  pusha();
++  li(c_rarg0, (uintptr_t)(address)msg);
++  li(c_rarg1, (uintptr_t)(address)ip);
++  mv(c_rarg2, sp);
++  mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
++  jalr(c_rarg3);
++  ebreak();
++}
 +
-+  if (AvoidUnalignedAccesses) {
-+    // if x and y are both 8 bytes aligend.
-+    orr(t0, xlen, ylen);
-+    andi(t0, t0, 0x1);
-+    beqz(t0, L_multiply_64_or_128);
-+  } else {
-+    j(L_multiply_64_or_128);
++void MacroAssembler::unimplemented(const char* what) {
++  const char* buf = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("unimplemented: %s", what);
++    buf = code_string(ss.as_string());
 +  }
++  stop(buf);
++}
 +
-+  multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
-+  shadd(t0, xstart, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
++void MacroAssembler::emit_static_call_stub() {
++  // CompiledDirectStaticCall::set_to_interpreted knows the
++  // exact layout of this stub.
 +
-+  Label L_second_loop_1;
-+  bind(L_second_loop_1);
-+  mv(carry, zr);
-+  mv(jdx, ylen);
-+  sub(xstart, xstart, 1);
-+  bltz(xstart, L_done);
-+  sub(sp, sp, 2 * wordSize);
-+  sd(z, Address(sp, 0));
-+  sd(zr, Address(sp, wordSize));
-+  shadd(t0, xstart, z, t0, LogBytesPerInt);
-+  addi(z, t0, 4);
-+  shadd(t0, xstart, x, t0, LogBytesPerInt);
-+  lwu(product, Address(t0, 0));
-+  Label L_third_loop, L_third_loop_exit;
++  ifence();
++  mov_metadata(xmethod, (Metadata*)NULL);
 +
-+  bind(L_third_loop);
-+  sub(jdx, jdx, 1);
-+  bltz(jdx, L_third_loop_exit);
++  // Jump to the entry point of the i2c stub.
++  int32_t offset = 0;
++  movptr_with_offset(t0, 0, offset);
++  jalr(x0, t0, offset);
++}
 +
-+  shadd(t0, jdx, y, t0, LogBytesPerInt);
-+  lwu(t0, Address(t0, 0));
-+  mul(t1, t0, product);
-+  add(t0, t1, carry);
-+  shadd(tmp6, jdx, z, t1, LogBytesPerInt);
-+  lwu(t1, Address(tmp6, 0));
-+  add(t0, t0, t1);
-+  sw(t0, Address(tmp6, 0));
-+  srli(carry, t0, 32);
-+  j(L_third_loop);
++void MacroAssembler::call_VM_leaf_base(address entry_point,
++                                       int number_of_arguments,
++                                       Label *retaddr) {
++  call_native_base(entry_point, retaddr);
++}
 +
-+  bind(L_third_loop_exit);
-+  ld(z, Address(sp, 0));
-+  addi(sp, sp, 2 * wordSize);
-+  shadd(t0, xstart, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
++void MacroAssembler::call_native(address entry_point, Register arg_0) {
++  pass_arg0(this, arg_0);
++  call_native_base(entry_point);
++}
 +
-+  j(L_second_loop_1);
++void MacroAssembler::call_native_base(address entry_point, Label *retaddr) {
++  Label E, L;
++  int32_t offset = 0;
++  push_reg(0x80000040, sp);   // push << t0 & xmethod >> to sp
++  movptr_with_offset(t0, entry_point, offset);
++  jalr(x1, t0, offset);
++  if (retaddr != NULL) {
++    bind(*retaddr);
++  }
++  pop_reg(0x80000040, sp);   // pop << t0 & xmethod >> from sp
++}
 +
-+  bind(L_multiply_64_or_128);
-+  multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
++  call_VM_leaf_base(entry_point, number_of_arguments);
++}
 +
-+  Label L_second_loop_2;
-+  beqz(kdx, L_second_loop_2);
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
++  pass_arg0(this, arg_0);
++  call_VM_leaf_base(entry_point, 1);
++}
 +
-+  Label L_carry;
-+  sub(kdx, kdx, 1);
-+  beqz(kdx, L_carry);
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  pass_arg0(this, arg_0);
++  pass_arg1(this, arg_1);
++  call_VM_leaf_base(entry_point, 2);
++}
 +
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
-+  srli(carry, carry, 32);
-+  sub(kdx, kdx, 1);
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
++                                  Register arg_1, Register arg_2) {
++  pass_arg0(this, arg_0);
++  pass_arg1(this, arg_1);
++  pass_arg2(this, arg_2);
++  call_VM_leaf_base(entry_point, 3);
++}
 +
-+  bind(L_carry);
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
++  pass_arg0(this, arg_0);
++  MacroAssembler::call_VM_leaf_base(entry_point, 1);
++}
 +
-+  // Second and third (nested) loops.
-+  //
-+  // for (int i = xstart-1; i >= 0; i--) { // Second loop
-+  //   carry = 0;
-+  //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
-+  //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
-+  //                    (z[k] & LONG_MASK) + carry;
-+  //     z[k] = (int)product;
-+  //     carry = product >>> 32;
-+  //   }
-+  //   z[i] = (int)carry;
-+  // }
-+  //
-+  // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 +
-+  bind(L_second_loop_2);
-+  mv(carry, zr); // carry = 0;
-+  mv(jdx, ylen); // j = ystart+1
++  assert(arg_0 != c_rarg1, "smashed arg");
++  pass_arg1(this, arg_1);
++  pass_arg0(this, arg_0);
++  MacroAssembler::call_VM_leaf_base(entry_point, 2);
++}
 +
-+  sub(xstart, xstart, 1); // i = xstart-1;
-+  bltz(xstart, L_done);
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
++  assert(arg_0 != c_rarg2, "smashed arg");
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  assert(arg_0 != c_rarg1, "smashed arg");
++  pass_arg1(this, arg_1);
++  pass_arg0(this, arg_0);
++  MacroAssembler::call_VM_leaf_base(entry_point, 3);
++}
 +
-+  sub(sp, sp, 4 * wordSize);
-+  sd(z, Address(sp, 0));
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
++  assert(arg_0 != c_rarg3, "smashed arg");
++  assert(arg_1 != c_rarg3, "smashed arg");
++  assert(arg_2 != c_rarg3, "smashed arg");
++  pass_arg3(this, arg_3);
++  assert(arg_0 != c_rarg2, "smashed arg");
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  assert(arg_0 != c_rarg1, "smashed arg");
++  pass_arg1(this, arg_1);
++  pass_arg0(this, arg_0);
++  MacroAssembler::call_VM_leaf_base(entry_point, 4);
++}
 +
-+  Label L_last_x;
-+  shadd(t0, xstart, z, t0, LogBytesPerInt);
-+  addi(z, t0, 4);
-+  sub(xstart, xstart, 1); // i = xstart-1;
-+  bltz(xstart, L_last_x);
++void MacroAssembler::nop() {
++  addi(x0, x0, 0);
++}
 +
-+  shadd(t0, xstart, x, t0, LogBytesPerInt);
-+  ld(product_hi, Address(t0, 0));
-+  ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
++void MacroAssembler::mv(Register Rd, Register Rs) {
++  if (Rd != Rs) {
++    addi(Rd, Rs, 0);
++  }
++}
 +
-+  Label L_third_loop_prologue;
-+  bind(L_third_loop_prologue);
++void MacroAssembler::notr(Register Rd, Register Rs) {
++  xori(Rd, Rs, -1);
++}
 +
-+  sd(ylen, Address(sp, wordSize));
-+  sd(x, Address(sp, 2 * wordSize));
-+  sd(xstart, Address(sp, 3 * wordSize));
-+  multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
-+                          tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
-+  ld(z, Address(sp, 0));
-+  ld(ylen, Address(sp, wordSize));
-+  ld(x, Address(sp, 2 * wordSize));
-+  ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
-+  addi(sp, sp, 4 * wordSize);
++void MacroAssembler::neg(Register Rd, Register Rs) {
++  sub(Rd, x0, Rs);
++}
 +
-+  addi(tmp3, xlen, 1);
-+  shadd(t0, tmp3, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
++void MacroAssembler::negw(Register Rd, Register Rs) {
++  subw(Rd, x0, Rs);
++}
 +
-+  sub(tmp3, tmp3, 1);
-+  bltz(tmp3, L_done);
++void MacroAssembler::sext_w(Register Rd, Register Rs) {
++  addiw(Rd, Rs, 0);
++}
 +
-+  // z[i] = (int) carry;
-+  srli(carry, carry, 32);
-+  shadd(t0, tmp3, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
-+  j(L_second_loop_2);
++void MacroAssembler::zext_b(Register Rd, Register Rs) {
++  andi(Rd, Rs, 0xFF);
++}
 +
-+  // Next infrequent code is moved outside loops.
-+  bind(L_last_x);
-+  lwu(product_hi, Address(x, 0));
-+  j(L_third_loop_prologue);
++void MacroAssembler::seqz(Register Rd, Register Rs) {
++  sltiu(Rd, Rs, 1);
++}
 +
-+  bind(L_done);
++void MacroAssembler::snez(Register Rd, Register Rs) {
++  sltu(Rd, x0, Rs);
 +}
-+#endif // COMPILER2
 +
-+// Count bits of trailing zero chars from lsb to msb until first non-zero element.
-+// For LL case, one byte for one element, so shift 8 bits once, and for other case,
-+// shift 16 bits once.
-+void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2)
-+{
-+  if (UseZbb) {
-+    assert_different_registers(Rd, Rs, tmp1);
-+    int step = isLL ? 8 : 16;
-+    ctz(Rd, Rs);
-+    andi(tmp1, Rd, step - 1);
-+    sub(Rd, Rd, tmp1);
-+    return;
-+  }
-+  assert_different_registers(Rd, Rs, tmp1, tmp2);
-+  Label Loop;
-+  int step = isLL ? 8 : 16;
-+  mv(Rd, -step);
-+  mv(tmp2, Rs);
++void MacroAssembler::sltz(Register Rd, Register Rs) {
++  slt(Rd, Rs, x0);
++}
 +
-+  bind(Loop);
-+  addi(Rd, Rd, step);
-+  andi(tmp1, tmp2, ((1 << step) - 1));
-+  srli(tmp2, tmp2, step);
-+  beqz(tmp1, Loop);
++void MacroAssembler::sgtz(Register Rd, Register Rs) {
++  slt(Rd, x0, Rs);
 +}
 +
-+// This instruction reads adjacent 4 bytes from the lower half of source register,
-+// inflate into a register, for example:
-+// Rs: A7A6A5A4A3A2A1A0
-+// Rd: 00A300A200A100A0
-+void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2)
-+{
-+  assert_different_registers(Rd, Rs, tmp1, tmp2);
-+  mv(tmp1, 0xFF000000);  // first byte mask at lower word
-+  andr(Rd, Rs, tmp1);
-+  for (int i = 0; i < 2; i++) {
-+    slli(Rd, Rd, wordSize);
-+    srli(tmp1, tmp1, wordSize);
-+    andr(tmp2, Rs, tmp1);
-+    orr(Rd, Rd, tmp2);
++void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) {
++  if (Rd != Rs) {
++    fsgnj_s(Rd, Rs, Rs);
 +  }
-+  slli(Rd, Rd, wordSize);
-+  andi(tmp2, Rs, 0xFF);  // last byte mask at lower word
-+  orr(Rd, Rd, tmp2);
 +}
 +
-+// This instruction reads adjacent 4 bytes from the upper half of source register,
-+// inflate into a register, for example:
-+// Rs: A7A6A5A4A3A2A1A0
-+// Rd: 00A700A600A500A4
-+void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2)
-+{
-+  assert_different_registers(Rd, Rs, tmp1, tmp2);
-+  srli(Rs, Rs, 32);   // only upper 32 bits are needed
-+  inflate_lo32(Rd, Rs, tmp1, tmp2);
++void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) {
++  fsgnjx_s(Rd, Rs, Rs);
 +}
 +
-+// The size of the blocks erased by the zero_blocks stub.  We must
-+// handle anything smaller than this ourselves in zero_words().
-+const int MacroAssembler::zero_words_block_size = 8;
-+
-+// zero_words() is used by C2 ClearArray patterns.  It is as small as
-+// possible, handling small word counts locally and delegating
-+// anything larger to the zero_blocks stub.  It is expanded many times
-+// in compiled code, so it is important to keep it short.
-+
-+// ptr:   Address of a buffer to be zeroed.
-+// cnt:   Count in HeapWords.
-+//
-+// ptr, cnt, and t0 are clobbered.
-+address MacroAssembler::zero_words(Register ptr, Register cnt)
-+{
-+  assert(is_power_of_2(zero_words_block_size), "adjust this");
-+  assert(ptr == x28 && cnt == x29, "mismatch in register usage");
-+  assert_different_registers(cnt, t0);
++void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) {
++  fsgnjn_s(Rd, Rs, Rs);
++}
 +
-+  BLOCK_COMMENT("zero_words {");
-+  mv(t0, zero_words_block_size);
-+  Label around, done, done16;
-+  bltu(cnt, t0, around);
-+  {
-+    RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
-+    assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
-+    if (StubRoutines::riscv::complete()) {
-+      address tpc = trampoline_call(zero_blocks);
-+      if (tpc == NULL) {
-+        DEBUG_ONLY(reset_labels1(around));
-+        postcond(pc() == badAddress);
-+        return NULL;
-+      }
-+    } else {
-+      jal(zero_blocks);
-+    }
-+  }
-+  bind(around);
-+  for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
-+    Label l;
-+    andi(t0, cnt, i);
-+    beqz(t0, l);
-+    for (int j = 0; j < i; j++) {
-+      sd(zr, Address(ptr, 0));
-+      addi(ptr, ptr, 8);
-+    }
-+    bind(l);
-+  }
-+  {
-+    Label l;
-+    andi(t0, cnt, 1);
-+    beqz(t0, l);
-+    sd(zr, Address(ptr, 0));
-+    bind(l);
++void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) {
++  if (Rd != Rs) {
++    fsgnj_d(Rd, Rs, Rs);
 +  }
-+  BLOCK_COMMENT("} zero_words");
-+  postcond(pc() != badAddress);
-+  return pc();
 +}
 +
-+// base:         Address of a buffer to be zeroed, 8 bytes aligned.
-+// cnt:          Immediate count in HeapWords.
-+#define SmallArraySize (18 * BytesPerLong)
-+void MacroAssembler::zero_words(Register base, uint64_t cnt)
-+{
-+  assert_different_registers(base, t0, t1);
++void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) {
++  fsgnjx_d(Rd, Rs, Rs);
++}
 +
-+  BLOCK_COMMENT("zero_words {");
++void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) {
++  fsgnjn_d(Rd, Rs, Rs);
++}
 +
-+  if (cnt <= SmallArraySize / BytesPerLong) {
-+    for (int i = 0; i < (int)cnt; i++) {
-+      sd(zr, Address(base, i * wordSize));
-+    }
++void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) {
++  vmnand_mm(vd, vs, vs);
++}
++
++void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
++  vnsrl_wx(vd, vs, x0, vm);
++}
++
++void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
++  vfsgnjn_vv(vd, vs, vs);
++}
++
++void MacroAssembler::la(Register Rd, const address &dest) {
++  int64_t offset = dest - pc();
++  if (is_offset_in_range(offset, 32)) {
++    auipc(Rd, (int32_t)offset + 0x800);  //0x800, Note:the 11th sign bit
++    addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
 +  } else {
-+    const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
-+    int remainder = cnt %  unroll;
-+    for (int i = 0; i < remainder; i++) {
-+      sd(zr, Address(base, i * wordSize));
-+    }
++    movptr(Rd, dest);
++  }
++}
 +
-+    Label loop;
-+    Register cnt_reg = t0;
-+    Register loop_base = t1;
-+    cnt = cnt - remainder;
-+    mv(cnt_reg, cnt);
-+    add(loop_base, base, remainder * wordSize);
-+    bind(loop);
-+    sub(cnt_reg, cnt_reg, unroll);
-+    for (int i = 0; i < unroll; i++) {
-+      sd(zr, Address(loop_base, i * wordSize));
++void MacroAssembler::la(Register Rd, const Address &adr) {
++  InstructionMark im(this);
++  code_section()->relocate(inst_mark(), adr.rspec());
++  relocInfo::relocType rtype = adr.rspec().reloc()->type();
++
++  switch (adr.getMode()) {
++    case Address::literal: {
++      if (rtype == relocInfo::none) {
++        li(Rd, (intptr_t)(adr.target()));
++      } else {
++        movptr(Rd, adr.target());
++      }
++      break;
 +    }
-+    add(loop_base, loop_base, unroll * wordSize);
-+    bnez(cnt_reg, loop);
++    case Address::base_plus_offset: {
++      int32_t offset = 0;
++      baseOffset(Rd, adr, offset);
++      addi(Rd, Rd, offset);
++      break;
++    }
++    default:
++      ShouldNotReachHere();
 +  }
-+  BLOCK_COMMENT("} zero_words");
 +}
 +
-+// base:   Address of a buffer to be filled, 8 bytes aligned.
-+// cnt:    Count in 8-byte unit.
-+// value:  Value to be filled with.
-+// base will point to the end of the buffer after filling.
-+void MacroAssembler::fill_words(Register base, Register cnt, Register value)
-+{
-+//  Algorithm:
-+//
-+//    t0 = cnt & 7
-+//    cnt -= t0
-+//    p += t0
-+//    switch (t0):
-+//      switch start:
-+//      do while cnt
-+//        cnt -= 8
-+//          p[-8] = value
-+//        case 7:
-+//          p[-7] = value
-+//        case 6:
-+//          p[-6] = value
-+//          // ...
-+//        case 1:
-+//          p[-1] = value
-+//        case 0:
-+//          p += 8
-+//      do-while end
-+//    switch end
++void MacroAssembler::la(Register Rd, Label &label) {
++  la(Rd, target(label));
++}
 +
-+  assert_different_registers(base, cnt, value, t0, t1);
++#define INSN(NAME)                                                                \
++  void MacroAssembler::NAME##z(Register Rs, const address &dest) {                \
++    NAME(Rs, zr, dest);                                                           \
++  }                                                                               \
++  void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) {              \
++    NAME(Rs, zr, l, is_far);                                                      \
++  }                                                                               \
 +
-+  Label fini, skip, entry, loop;
-+  const int unroll = 8; // Number of sd instructions we'll unroll
++  INSN(beq);
++  INSN(bne);
++  INSN(blt);
++  INSN(ble);
++  INSN(bge);
++  INSN(bgt);
 +
-+  beqz(cnt, fini);
++#undef INSN
 +
-+  andi(t0, cnt, unroll - 1);
-+  sub(cnt, cnt, t0);
-+  // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
-+  shadd(base, t0, base, t1, 3);
-+  la(t1, entry);
-+  slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
-+  sub(t1, t1, t0);
-+  jr(t1);
++// Float compare branch instructions
 +
-+  bind(loop);
-+  add(base, base, unroll * 8);
-+  for (int i = -unroll; i < 0; i++) {
-+    sd(value, Address(base, i * 8));
++#define INSN(NAME, FLOATCMP, BRANCH)                                                                                   \
++  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) {  \
++    FLOATCMP##_s(t0, Rs1, Rs2);                                                                                        \
++    BRANCH(t0, l, is_far);                                                                                             \
++  }                                                                                                                    \
++  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
++    FLOATCMP##_d(t0, Rs1, Rs2);                                                                                        \
++    BRANCH(t0, l, is_far);                                                                                             \
 +  }
-+  bind(entry);
-+  sub(cnt, cnt, unroll);
-+  bgez(cnt, loop);
 +
-+  bind(fini);
-+}
++  INSN(beq, feq, bnez);
++  INSN(bne, feq, beqz);
 +
-+#define FCVT_SAFE(FLOATCVT, FLOATEQ)                                                             \
-+void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) {           \
-+  Label L_Okay;                                                                                  \
-+  fscsr(zr);                                                                                     \
-+  FLOATCVT(dst, src);                                                                            \
-+  frcsr(tmp);                                                                                    \
-+  andi(tmp, tmp, 0x1E);                                                                          \
-+  beqz(tmp, L_Okay);                                                                             \
-+  FLOATEQ(tmp, src, src);                                                                        \
-+  bnez(tmp, L_Okay);                                                                             \
-+  mv(dst, zr);                                                                                   \
-+  bind(L_Okay);                                                                                  \
-+}
++#undef INSN
 +
-+FCVT_SAFE(fcvt_w_s, feq_s)
-+FCVT_SAFE(fcvt_l_s, feq_s)
-+FCVT_SAFE(fcvt_w_d, feq_d)
-+FCVT_SAFE(fcvt_l_d, feq_d)
 +
-+#undef FCVT_SAFE
++#define INSN(NAME, FLOATCMP1, FLOATCMP2)                                              \
++  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,   \
++                                    bool is_far, bool is_unordered) {                 \
++    if (is_unordered) {                                                               \
++      /* jump if either source is NaN or condition is expected */                     \
++      FLOATCMP2##_s(t0, Rs2, Rs1);                                                    \
++      beqz(t0, l, is_far);                                                            \
++    } else {                                                                          \
++      /* jump if no NaN in source and condition is expected */                        \
++      FLOATCMP1##_s(t0, Rs1, Rs2);                                                    \
++      bnez(t0, l, is_far);                                                            \
++    }                                                                                 \
++  }                                                                                   \
++  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
++                                     bool is_far, bool is_unordered) {                \
++    if (is_unordered) {                                                               \
++      /* jump if either source is NaN or condition is expected */                     \
++      FLOATCMP2##_d(t0, Rs2, Rs1);                                                    \
++      beqz(t0, l, is_far);                                                            \
++    } else {                                                                          \
++      /* jump if no NaN in source and condition is expected */                        \
++      FLOATCMP1##_d(t0, Rs1, Rs2);                                                    \
++      bnez(t0, l, is_far);                                                            \
++    }                                                                                 \
++  }
 +
-+#define FCMP(FLOATTYPE, FLOATSIG)                                                       \
-+void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1,            \
-+                                         FloatRegister Rs2, int unordered_result) {     \
-+  Label Ldone;                                                                          \
-+  if (unordered_result < 0) {                                                           \
-+    /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */    \
-+    /* installs 1 if gt else 0 */                                                       \
-+    flt_##FLOATSIG(result, Rs2, Rs1);                                                   \
-+    /* Rs1 > Rs2, install 1 */                                                          \
-+    bgtz(result, Ldone);                                                                \
-+    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
-+    addi(result, result, -1);                                                           \
-+    /* Rs1 = Rs2, install 0 */                                                          \
-+    /* NaN or Rs1 < Rs2, install -1 */                                                  \
-+    bind(Ldone);                                                                        \
-+  } else {                                                                              \
-+    /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */    \
-+    /* installs 1 if gt or unordered else 0 */                                          \
-+    flt_##FLOATSIG(result, Rs1, Rs2);                                                   \
-+    /* Rs1 < Rs2, install -1 */                                                         \
-+    bgtz(result, Ldone);                                                                \
-+    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
-+    addi(result, result, -1);                                                           \
-+    /* Rs1 = Rs2, install 0 */                                                          \
-+    /* NaN or Rs1 > Rs2, install 1 */                                                   \
-+    bind(Ldone);                                                                        \
-+    neg(result, result);                                                                \
-+  }                                                                                     \
-+}
++  INSN(ble, fle, flt);
++  INSN(blt, flt, fle);
 +
-+FCMP(float, s);
-+FCMP(double, d);
++#undef INSN
 +
-+#undef FCMP
++#define INSN(NAME, CMP)                                                              \
++  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
++                                    bool is_far, bool is_unordered) {                \
++    float_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                  \
++  }                                                                                  \
++  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
++                                     bool is_far, bool is_unordered) {               \
++    double_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                 \
++  }
 +
-+// Zero words; len is in bytes
-+// Destroys all registers except addr
-+// len must be a nonzero multiple of wordSize
-+void MacroAssembler::zero_memory(Register addr, Register len, Register tmp1) {
-+  assert_different_registers(addr, len, tmp1, t0, t1);
++  INSN(bgt, blt);
++  INSN(bge, ble);
 +
-+#ifdef ASSERT
-+  {
-+    Label L;
-+    andi(t0, len, BytesPerWord - 1);
-+    beqz(t0, L);
-+    stop("len is not a multiple of BytesPerWord");
-+    bind(L);
-+  }
-+#endif // ASSERT
++#undef INSN
 +
-+#ifndef PRODUCT
-+  block_comment("zero memory");
-+#endif // PRODUCT
 +
-+  Label loop;
-+  Label entry;
++#define INSN(NAME, CSR)                       \
++  void MacroAssembler::NAME(Register Rd) {    \
++    csrr(Rd, CSR);                            \
++  }
 +
-+  // Algorithm:
-+  //
-+  //  t0 = cnt & 7
-+  //  cnt -= t0
-+  //  p += t0
-+  //  switch (t0) {
-+  //    do {
-+  //      cnt -= 8
-+  //        p[-8] = 0
-+  //      case 7:
-+  //        p[-7] = 0
-+  //      case 6:
-+  //        p[-6] = 0
-+  //        ...
-+  //      case 1:
-+  //        p[-1] = 0
-+  //      case 0:
-+  //        p += 8
-+  //     } while (cnt)
-+  //  }
++  INSN(rdinstret,  CSR_INSTERT);
++  INSN(rdcycle,    CSR_CYCLE);
++  INSN(rdtime,     CSR_TIME);
++  INSN(frcsr,      CSR_FCSR);
++  INSN(frrm,       CSR_FRM);
++  INSN(frflags,    CSR_FFLAGS);
 +
-+  const int unroll = 8;   // Number of sd(zr) instructions we'll unroll
++#undef INSN
 +
-+  srli(len, len, LogBytesPerWord);
-+  andi(t0, len, unroll - 1);  // t0 = cnt % unroll
-+  sub(len, len, t0);          // cnt -= unroll
-+  // tmp1 always points to the end of the region we're about to zero
-+  shadd(tmp1, t0, addr, t1, LogBytesPerWord);
-+  la(t1, entry);
-+  slli(t0, t0, 2);
-+  sub(t1, t1, t0);
-+  jr(t1);
-+  bind(loop);
-+  sub(len, len, unroll);
-+  for (int i = -unroll; i < 0; i++) {
-+    Assembler::sd(zr, Address(tmp1, i * wordSize));
-+  }
-+  bind(entry);
-+  add(tmp1, tmp1, unroll * wordSize);
-+  bnez(len, loop);
++void MacroAssembler::csrr(Register Rd, unsigned csr) {
++  csrrs(Rd, csr, x0);
 +}
 +
-+// shift left by shamt and add
-+// Rd = (Rs1 << shamt) + Rs2
-+void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
-+  if (UseZba) {
-+    if (shamt == 1) {
-+      sh1add(Rd, Rs1, Rs2);
-+      return;
-+    } else if (shamt == 2) {
-+      sh2add(Rd, Rs1, Rs2);
-+      return;
-+    } else if (shamt == 3) {
-+      sh3add(Rd, Rs1, Rs2);
-+      return;
-+    }
++#define INSN(NAME, OPFUN)                                      \
++  void MacroAssembler::NAME(unsigned csr, Register Rs) {       \
++    OPFUN(x0, csr, Rs);                                        \
 +  }
 +
-+  if (shamt != 0) {
-+    slli(tmp, Rs1, shamt);
-+    add(Rd, Rs2, tmp);
-+  } else {
-+    add(Rd, Rs1, Rs2);
-+  }
-+}
++  INSN(csrw, csrrw);
++  INSN(csrs, csrrs);
++  INSN(csrc, csrrc);
 +
-+void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
-+  if (UseZba && bits == 32) {
-+    zext_w(dst, src);
-+    return;
-+  }
++#undef INSN
 +
-+  if (UseZbb && bits == 16) {
-+    zext_h(dst, src);
-+    return;
++#define INSN(NAME, OPFUN)                                      \
++  void MacroAssembler::NAME(unsigned csr, unsigned imm) {      \
++    OPFUN(x0, csr, imm);                                       \
 +  }
 +
-+  if (bits == 8) {
-+    zext_b(dst, src);
-+  } else {
-+    slli(dst, src, XLEN - bits);
-+    srli(dst, dst, XLEN - bits);
-+  }
-+}
++  INSN(csrwi, csrrwi);
++  INSN(csrsi, csrrsi);
++  INSN(csrci, csrrci);
 +
-+void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
-+  if (UseZbb) {
-+    if (bits == 8) {
-+      sext_b(dst, src);
-+      return;
-+    } else if (bits == 16) {
-+      sext_h(dst, src);
-+      return;
-+    }
-+  }
++#undef INSN
 +
-+  if (bits == 32) {
-+    sext_w(dst, src);
-+  } else {
-+    slli(dst, src, XLEN - bits);
-+    srai(dst, dst, XLEN - bits);
++#define INSN(NAME, CSR)                                      \
++  void MacroAssembler::NAME(Register Rd, Register Rs) {      \
++    csrrw(Rd, CSR, Rs);                                      \
 +  }
-+}
 +
-+void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
-+{
-+  if (src1 == src2) {
-+    mv(dst, zr);
-+    return;
-+  }
-+  Label done;
-+  Register left = src1;
-+  Register right = src2;
-+  if (dst == src1) {
-+    assert_different_registers(dst, src2, tmp);
-+    mv(tmp, src1);
-+    left = tmp;
-+  } else if (dst == src2) {
-+    assert_different_registers(dst, src1, tmp);
-+    mv(tmp, src2);
-+    right = tmp;
++  INSN(fscsr,   CSR_FCSR);
++  INSN(fsrm,    CSR_FRM);
++  INSN(fsflags, CSR_FFLAGS);
++
++#undef INSN
++
++#define INSN(NAME)                              \
++  void MacroAssembler::NAME(Register Rs) {      \
++    NAME(x0, Rs);                               \
 +  }
 +
-+  // installs 1 if gt else 0
-+  slt(dst, right, left);
-+  bnez(dst, done);
-+  slt(dst, left, right);
-+  // dst = -1 if lt; else if eq , dst = 0
-+  neg(dst, dst);
-+  bind(done);
-+}
++  INSN(fscsr);
++  INSN(fsrm);
++  INSN(fsflags);
 +
-+void MacroAssembler::load_constant_pool_cache(Register cpool, Register method)
-+{
-+  ld(cpool, Address(method, Method::const_offset()));
-+  ld(cpool, Address(cpool, ConstMethod::constants_offset()));
-+  ld(cpool, Address(cpool, ConstantPool::cache_offset_in_bytes()));
++#undef INSN
++
++void MacroAssembler::fsrmi(Register Rd, unsigned imm) {
++  guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register");
++  csrrwi(Rd, CSR_FRM, imm);
 +}
 +
-+void MacroAssembler::load_max_stack(Register dst, Register method)
-+{
-+  ld(dst, Address(xmethod, Method::const_offset()));
-+  lhu(dst, Address(dst, ConstMethod::max_stack_offset()));
++void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
++   csrrwi(Rd, CSR_FFLAGS, imm);
 +}
 +
-+// The java_calling_convention describes stack locations as ideal slots on
-+// a frame with no abi restrictions. Since we must observe abi restrictions
-+// (like the placement of the register window) the slots must be biased by
-+// the following value.
-+static int reg2offset_in(VMReg r) {
-+  // Account for saved fp and ra
-+  // This should really be in_preserve_stack_slots
-+  return r->reg2stack() * VMRegImpl::stack_slot_size;
++#define INSN(NAME)                             \
++  void MacroAssembler::NAME(unsigned imm) {    \
++    NAME(x0, imm);                             \
++  }
++
++  INSN(fsrmi);
++  INSN(fsflagsi);
++
++#undef INSN
++
++void MacroAssembler::push_reg(Register Rs)
++{
++  addi(esp, esp, 0 - wordSize);
++  sd(Rs, Address(esp, 0));
 +}
 +
-+static int reg2offset_out(VMReg r) {
-+  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++void MacroAssembler::pop_reg(Register Rd)
++{
++  ld(Rd, esp, 0);
++  addi(esp, esp, wordSize);
 +}
 +
-+// On 64 bit we will store integer like items to the stack as
-+// 64 bits items (riscv64 abi) even though java would only store
-+// 32bits for a parameter. On 32bit it will simply be 32 bits
-+// So this routine will do 32->32 on 32bit and 32->64 on 64bit
-+void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) {
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      // stack to stack
-+      ld(tmp, Address(fp, reg2offset_in(src.first())));
-+      sd(tmp, Address(sp, reg2offset_out(dst.first())));
-+    } else {
-+      // stack to reg
-+      lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    }
-+  } else if (dst.first()->is_stack()) {
-+    // reg to stack
-+    sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
-+  } else {
-+    if (dst.first() != src.first()) {
-+      // 32bits extend sign
-+      addw(dst.first()->as_Register(), src.first()->as_Register(), zr);
++int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
++  int count = 0;
++  // Scan bitset to accumulate register pairs
++  for (int reg = 31; reg >= 0; reg--) {
++    if ((1U << 31) & bitset) {
++      regs[count++] = reg;
 +    }
++    bitset <<= 1;
 +  }
++  return count;
 +}
 +
-+// An oop arg. Must pass a handle not the oop itself
-+void MacroAssembler::object_move(OopMap* map,
-+                                 int oop_handle_offset,
-+                                 int framesize_in_slots,
-+                                 VMRegPair src,
-+                                 VMRegPair dst,
-+                                 bool is_receiver,
-+                                 int* receiver_offset) {
-+  assert_cond(map != NULL && receiver_offset != NULL);
-+  // must pass a handle. First figure out the location we use as a handle
-+  Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
-+
-+  // See if oop is NULL if it is we need no handle
++// Push lots of registers in the bit set supplied.  Don't push sp.
++// Return the number of words pushed
++int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
++  DEBUG_ONLY(int words_pushed = 0;)
++  CompressibleRegion cr(this);
 +
-+  if (src.first()->is_stack()) {
-+    // Oop is already on the stack as an argument
-+    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
-+    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
-+    if (is_receiver) {
-+      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
-+    }
++  unsigned char regs[32];
++  int count = bitset_to_regs(bitset, regs);
++  // reserve one slot to align for odd count
++  int offset = is_even(count) ? 0 : wordSize;
 +
-+    ld(t0, Address(fp, reg2offset_in(src.first())));
-+    la(rHandle, Address(fp, reg2offset_in(src.first())));
-+    // conditionally move a NULL
-+    Label notZero1;
-+    bnez(t0, notZero1);
-+    mv(rHandle, zr);
-+    bind(notZero1);
-+  } else {
++  if (count) {
++    addi(stack, stack, - count * wordSize - offset);
++  }
++  for (int i = count - 1; i >= 0; i--) {
++    sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
++    DEBUG_ONLY(words_pushed ++;)
++  }
 +
-+    // Oop is in a register we must store it to the space we reserve
-+    // on the stack for oop_handles and pass a handle if oop is non-NULL
++  assert(words_pushed == count, "oops, pushed != count");
 +
-+    const Register rOop = src.first()->as_Register();
-+    int oop_slot = -1;
-+    if (rOop == j_rarg0) {
-+      oop_slot = 0;
-+    } else if (rOop == j_rarg1) {
-+      oop_slot = 1;
-+    } else if (rOop == j_rarg2) {
-+      oop_slot = 2;
-+    } else if (rOop == j_rarg3) {
-+      oop_slot = 3;
-+    } else if (rOop == j_rarg4) {
-+      oop_slot = 4;
-+    } else if (rOop == j_rarg5) {
-+      oop_slot = 5;
-+    } else if (rOop == j_rarg6) {
-+      oop_slot = 6;
-+    } else {
-+      assert(rOop == j_rarg7, "wrong register");
-+      oop_slot = 7;
-+    }
++  return count;
++}
 +
-+    oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
-+    int offset = oop_slot * VMRegImpl::stack_slot_size;
++int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
++  DEBUG_ONLY(int words_popped = 0;)
++  CompressibleRegion cr(this);
 +
-+    map->set_oop(VMRegImpl::stack2reg(oop_slot));
-+    // Store oop in handle area, may be NULL
-+    sd(rOop, Address(sp, offset));
-+    if (is_receiver) {
-+      *receiver_offset = offset;
-+    }
++  unsigned char regs[32];
++  int count = bitset_to_regs(bitset, regs);
++  // reserve one slot to align for odd count
++  int offset = is_even(count) ? 0 : wordSize;
 +
-+    //rOop maybe the same as rHandle
-+    if (rOop == rHandle) {
-+      Label isZero;
-+      beqz(rOop, isZero);
-+      la(rHandle, Address(sp, offset));
-+      bind(isZero);
-+    } else {
-+      Label notZero2;
-+      la(rHandle, Address(sp, offset));
-+      bnez(rOop, notZero2);
-+      mv(rHandle, zr);
-+      bind(notZero2);
-+    }
++  for (int i = count - 1; i >= 0; i--) {
++    ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
++    DEBUG_ONLY(words_popped ++;)
 +  }
 +
-+  // If arg is on the stack then place it otherwise it is already in correct reg.
-+  if (dst.first()->is_stack()) {
-+    sd(rHandle, Address(sp, reg2offset_out(dst.first())));
++  if (count) {
++    addi(stack, stack, count * wordSize + offset);
 +  }
++  assert(words_popped == count, "oops, popped != count");
++
++  return count;
 +}
 +
-+// A float arg may have to do float reg int reg conversion
-+void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) {
-+  assert(src.first()->is_stack() && dst.first()->is_stack() ||
-+         src.first()->is_reg() && dst.first()->is_reg() ||
-+         src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      lwu(tmp, Address(fp, reg2offset_in(src.first())));
-+      sw(tmp, Address(sp, reg2offset_out(dst.first())));
-+    } else if (dst.first()->is_Register()) {
-+      lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    } else {
-+      ShouldNotReachHere();
-+    }
-+  } else if (src.first() != dst.first()) {
-+    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
-+      fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
-+    } else {
-+      ShouldNotReachHere();
-+    }
++// Push float registers in the bitset, except sp.
++// Return the number of heapwords pushed.
++int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
++  CompressibleRegion cr(this);
++  int words_pushed = 0;
++  unsigned char regs[32];
++  int count = bitset_to_regs(bitset, regs);
++  int push_slots = count + (count & 1);
++
++  if (count) {
++    addi(stack, stack, -push_slots * wordSize);
 +  }
-+}
 +
-+// A long move
-+void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) {
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      // stack to stack
-+      ld(tmp, Address(fp, reg2offset_in(src.first())));
-+      sd(tmp, Address(sp, reg2offset_out(dst.first())));
-+    } else {
-+      // stack to reg
-+      ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    }
-+  } else if (dst.first()->is_stack()) {
-+    // reg to stack
-+    sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
-+  } else {
-+    if (dst.first() != src.first()) {
-+      mv(dst.first()->as_Register(), src.first()->as_Register());
-+    }
++  for (int i = count - 1; i >= 0; i--) {
++    fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize));
++    words_pushed++;
 +  }
++
++  assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
++  return count;
 +}
 +
-+// A double move
-+void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) {
-+  assert(src.first()->is_stack() && dst.first()->is_stack() ||
-+         src.first()->is_reg() && dst.first()->is_reg() ||
-+         src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      ld(tmp, Address(fp, reg2offset_in(src.first())));
-+      sd(tmp, Address(sp, reg2offset_out(dst.first())));
-+    } else if (dst.first()-> is_Register()) {
-+      ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    } else {
-+      ShouldNotReachHere();
-+    }
-+  } else if (src.first() != dst.first()) {
-+    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
-+      fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
-+    } else {
-+      ShouldNotReachHere();
-+    }
++int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
++  CompressibleRegion cr(this);
++  int words_popped = 0;
++  unsigned char regs[32];
++  int count = bitset_to_regs(bitset, regs);
++  int pop_slots = count + (count & 1);
++
++  for (int i = count - 1; i >= 0; i--) {
++    fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize));
++    words_popped++;
 +  }
-+}
 +
-+void MacroAssembler::rt_call(address dest, Register tmp) {
-+  CodeBlob *cb = CodeCache::find_blob(dest);
-+  if (cb) {
-+    far_call(RuntimeAddress(dest));
-+  } else {
-+    int32_t offset = 0;
-+    la_patchable(tmp, RuntimeAddress(dest), offset);
-+    jalr(x1, tmp, offset);
++  if (count) {
++    addi(stack, stack, pop_slots * wordSize);
 +  }
-+}
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-new file mode 100644
-index 000000000..a4d5ce0e0
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -0,0 +1,975 @@
-+/*
-+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
++  assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count);
++  return count;
++}
 +
-+#include "asm/assembler.inline.hpp"
-+#include "code/vmreg.hpp"
-+// MacroAssembler extends Assembler by frequently used macros.
-+//
-+// Instructions for which a 'better' code sequence exists depending
-+// on arguments should also go in here.
++#ifdef COMPILER2
++int MacroAssembler::push_vp(unsigned int bitset, Register stack) {
++  CompressibleRegion cr(this);
++  int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
 +
-+class MacroAssembler: public Assembler {
++  // Scan bitset to accumulate register pairs
++  unsigned char regs[32];
++  int count = 0;
++  for (int reg = 31; reg >= 0; reg--) {
++    if ((1U << 31) & bitset) {
++      regs[count++] = reg;
++    }
++    bitset <<= 1;
++  }
 +
-+ public:
-+  MacroAssembler(CodeBuffer* code) : Assembler(code) {
++  for (int i = 0; i < count; i++) {
++    sub(stack, stack, vector_size_in_bytes);
++    vs1r_v(as_VectorRegister(regs[i]), stack);
 +  }
-+  virtual ~MacroAssembler() {}
 +
-+  void safepoint_poll(Label& slow_path);
-+  void safepoint_poll_acquire(Label& slow_path);
++  return count * vector_size_in_bytes / wordSize;
++}
 +
-+  // Alignment
-+  void align(int modulus);
++int MacroAssembler::pop_vp(unsigned int bitset, Register stack) {
++  CompressibleRegion cr(this);
++  int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
 +
-+  // Stack frame creation/removal
-+  // Note that SP must be updated to the right place before saving/restoring RA and FP
-+  // because signal based thread suspend/resume could happend asychronously
-+  void enter() {
-+    addi(sp, sp, - 2 * wordSize);
-+    sd(ra, Address(sp, wordSize));
-+    sd(fp, Address(sp));
-+    addi(fp, sp, 2 * wordSize);
++  // Scan bitset to accumulate register pairs
++  unsigned char regs[32];
++  int count = 0;
++  for (int reg = 31; reg >= 0; reg--) {
++    if ((1U << 31) & bitset) {
++      regs[count++] = reg;
++    }
++    bitset <<= 1;
 +  }
 +
-+  void leave() {
-+    addi(sp, fp, - 2 * wordSize);
-+    ld(fp, Address(sp));
-+    ld(ra, Address(sp, wordSize));
-+    addi(sp, sp, 2 * wordSize);
++  for (int i = count - 1; i >= 0; i--) {
++    vl1r_v(as_VectorRegister(regs[i]), stack);
++    add(stack, stack, vector_size_in_bytes);
 +  }
 +
++  return count * vector_size_in_bytes / wordSize;
++}
++#endif // COMPILER2
 +
-+  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
-+  // The pointer will be loaded into the thread register.
-+  void get_thread(Register thread);
-+
-+  // Support for VM calls
-+  //
-+  // It is imperative that all calls into the VM are handled via the call_VM macros.
-+  // They make sure that the stack linkage is setup correctly. call_VM's correspond
-+  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
++void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
++  CompressibleRegion cr(this);
++  // Push integer registers x7, x10-x17, x28-x31.
++  push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
 +
-+  void call_VM(Register oop_result,
-+               address entry_point,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               address entry_point,
-+               Register arg_1,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               address entry_point,
-+               Register arg_1, Register arg_2,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               address entry_point,
-+               Register arg_1, Register arg_2, Register arg_3,
-+               bool check_exceptions = true);
++  // Push float registers f0-f7, f10-f17, f28-f31.
++  addi(sp, sp, - wordSize * 20);
++  int offset = 0;
++  for (int i = 0; i < 32; i++) {
++    if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
++      fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
++    }
++  }
++}
 +
-+  // Overloadings with last_Java_sp
-+  void call_VM(Register oop_result,
-+               Register last_java_sp,
-+               address entry_point,
-+               int number_of_arguments = 0,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               Register last_java_sp,
-+               address entry_point,
-+               Register arg_1,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               Register last_java_sp,
-+               address entry_point,
-+               Register arg_1, Register arg_2,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               Register last_java_sp,
-+               address entry_point,
-+               Register arg_1, Register arg_2, Register arg_3,
-+               bool check_exceptions = true);
++void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
++  CompressibleRegion cr(this);
++  int offset = 0;
++  for (int i = 0; i < 32; i++) {
++    if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
++      fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
++    }
++  }
++  addi(sp, sp, wordSize * 20);
 +
-+  void get_vm_result(Register oop_result, Register java_thread);
-+  void get_vm_result_2(Register metadata_result, Register java_thread);
++  pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
++}
 +
-+  // These always tightly bind to MacroAssembler::call_VM_leaf_base
-+  // bypassing the virtual implementation
-+  void call_VM_leaf(address entry_point,
-+                    int number_of_arguments = 0);
-+  void call_VM_leaf(address entry_point,
-+                    Register arg_0);
-+  void call_VM_leaf(address entry_point,
-+                    Register arg_0, Register arg_1);
-+  void call_VM_leaf(address entry_point,
-+                    Register arg_0, Register arg_1, Register arg_2);
++// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
++void MacroAssembler::pusha() {
++  CompressibleRegion cr(this);
++  push_reg(0xffffffe2, sp);
++}
 +
-+  // These always tightly bind to MacroAssembler::call_VM_base
-+  // bypassing the virtual implementation
-+  void super_call_VM_leaf(address entry_point, Register arg_0);
-+  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
-+  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
-+  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
++// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
++void MacroAssembler::popa() {
++  CompressibleRegion cr(this);
++  pop_reg(0xffffffe2, sp);
++}
 +
-+  // last Java Frame (fills frame anchor)
-+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
-+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
-+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp);
++void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
++  CompressibleRegion cr(this);
++  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
++  push_reg(0xffffffe0, sp);
 +
-+  // thread in the default location (xthread)
-+  void reset_last_Java_frame(bool clear_fp);
++  // float registers
++  addi(sp, sp, - 32 * wordSize);
++  for (int i = 0; i < 32; i++) {
++    fsd(as_FloatRegister(i), Address(sp, i * wordSize));
++  }
 +
-+  virtual void call_VM_leaf_base(
-+    address entry_point,                // the entry point
-+    int     number_of_arguments,        // the number of arguments to pop after the call
-+    Label*  retaddr = NULL
-+  );
++  // vector registers
++  if (save_vectors) {
++    sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers);
++    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
++    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
++      add(t0, sp, vector_size_in_bytes * i);
++      vse64_v(as_VectorRegister(i), t0);
++    }
++  }
++}
 +
-+  virtual void call_VM_leaf_base(
-+    address entry_point,                // the entry point
-+    int     number_of_arguments,        // the number of arguments to pop after the call
-+    Label&  retaddr) {
-+    call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
++void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
++  CompressibleRegion cr(this);
++  // vector registers
++  if (restore_vectors) {
++    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
++    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
++      vle64_v(as_VectorRegister(i), sp);
++      add(sp, sp, vector_size_in_bytes * 8);
++    }
 +  }
 +
-+  virtual void call_VM_base(           // returns the register containing the thread upon return
-+    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
-+    Register java_thread,              // the thread if computed before     ; use noreg otherwise
-+    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
-+    address  entry_point,              // the entry point
-+    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
-+    bool     check_exceptions          // whether to check for pending exceptions after return
-+  );
++  // float registers
++  for (int i = 0; i < 32; i++) {
++    fld(as_FloatRegister(i), Address(sp, i * wordSize));
++  }
++  addi(sp, sp, 32 * wordSize);
 +
-+  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
++  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
++  pop_reg(0xffffffe0, sp);
++}
 +
-+  virtual void check_and_handle_earlyret(Register java_thread);
-+  virtual void check_and_handle_popframe(Register java_thread);
++static int patch_offset_in_jal(address branch, int64_t offset) {
++  assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n");
++  Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1);                       // offset[20]    ==> branch[31]
++  Assembler::patch(branch, 30, 21, (offset >> 1)  & 0x3ff);                     // offset[10:1]  ==> branch[30:21]
++  Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1);                       // offset[11]    ==> branch[20]
++  Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff);                      // offset[19:12] ==> branch[19:12]
++  return NativeInstruction::instruction_size;                                   // only one instruction
++}
 +
-+  void resolve_oop_handle(Register result, Register tmp = x15);
-+  void resolve_jobject(Register value, Register thread, Register tmp);
++static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
++  assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n");
++  Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1);                       // offset[12]    ==> branch[31]
++  Assembler::patch(branch, 30, 25, (offset >> 5)  & 0x3f);                      // offset[10:5]  ==> branch[30:25]
++  Assembler::patch(branch, 7,  7,  (offset >> 11) & 0x1);                       // offset[11]    ==> branch[7]
++  Assembler::patch(branch, 11, 8,  (offset >> 1)  & 0xf);                       // offset[4:1]   ==> branch[11:8]
++  return NativeInstruction::instruction_size;                                   // only one instruction
++}
 +
-+  void movoop(Register dst, jobject obj, bool immediate = false);
-+  void mov_metadata(Register dst, Metadata* obj);
-+  void bang_stack_size(Register size, Register tmp);
-+  void set_narrow_oop(Register dst, jobject obj);
-+  void set_narrow_klass(Register dst, Klass* k);
++static int patch_offset_in_pc_relative(address branch, int64_t offset) {
++  const int PC_RELATIVE_INSTRUCTION_NUM = 2;                                    // auipc, addi/jalr/load
++  Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff);         // Auipc.          offset[31:12]  ==> branch[31:12]
++  Assembler::patch(branch + 4, 31, 20, offset & 0xfff);                         // Addi/Jalr/Load. offset[11:0]   ==> branch[31:20]
++  return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
++}
 +
-+  void load_mirror(Register dst, Register method, Register tmp = x15);
-+  void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
-+                      Address src, Register tmp1, Register thread_tmp);
-+  void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
-+                       Register src, Register tmp1, Register tmp2, Register tmp3);
-+  void load_klass(Register dst, Register src);
-+  void store_klass(Register dst, Register src);
-+  void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L);
++static int patch_addr_in_movptr(address branch, address target) {
++  const int MOVPTR_INSTRUCTIONS_NUM = 6;                                        // lui + addi + slli + addi + slli + addi/jalr/load
++  int32_t lower = ((intptr_t)target << 36) >> 36;
++  int64_t upper = ((intptr_t)target - lower) >> 28;
++  Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[47:28] + target[27] ==> branch[31:12]
++  Assembler::patch(branch + 4,  31, 20, (lower >> 16) & 0xfff);                 // Addi.            target[27:16] ==> branch[31:20]
++  Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff);                  // Addi.            target[15: 5] ==> branch[31:20]
++  Assembler::patch(branch + 20, 31, 20, lower & 0x1f);                          // Addi/Jalr/Load.  target[ 4: 0] ==> branch[31:20]
++  return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
++}
 +
-+  void encode_klass_not_null(Register r);
-+  void decode_klass_not_null(Register r);
-+  void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
-+  void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
-+  void decode_heap_oop_not_null(Register r);
-+  void decode_heap_oop_not_null(Register dst, Register src);
-+  void decode_heap_oop(Register d, Register s);
-+  void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
-+  void encode_heap_oop(Register d, Register s);
-+  void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
-+  void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
-+                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
-+  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
-+                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
-+  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
-+                      Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0);
++static int patch_imm_in_li64(address branch, address target) {
++  const int LI64_INSTRUCTIONS_NUM = 8;                                          // lui + addi + slli + addi + slli + addi + slli + addi
++  int64_t lower = (intptr_t)target & 0xffffffff;
++  lower = lower - ((lower << 44) >> 44);
++  int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower;
++  int32_t upper =  (tmp_imm - (int32_t)lower) >> 32;
++  int64_t tmp_upper = upper, tmp_lower = upper;
++  tmp_lower = (tmp_lower << 52) >> 52;
++  tmp_upper -= tmp_lower;
++  tmp_upper >>= 12;
++  // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1),
++  // upper = target[63:32] + 1.
++  Assembler::patch(branch + 0,  31, 12, tmp_upper & 0xfffff);                       // Lui.
++  Assembler::patch(branch + 4,  31, 20, tmp_lower & 0xfff);                         // Addi.
++  // Load the rest 32 bits.
++  Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff);            // Addi.
++  Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff);  // Addi.
++  Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff);                   // Addi.
++  return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
++}
 +
-+  void store_klass_gap(Register dst, Register src);
++static int patch_imm_in_li32(address branch, int32_t target) {
++  const int LI32_INSTRUCTIONS_NUM = 2;                                          // lui + addiw
++  int64_t upper = (intptr_t)target;
++  int32_t lower = (((int32_t)target) << 20) >> 20;
++  upper -= lower;
++  upper = (int32_t)upper;
++  Assembler::patch(branch + 0,  31, 12, (upper >> 12) & 0xfffff);               // Lui.
++  Assembler::patch(branch + 4,  31, 20, lower & 0xfff);                         // Addiw.
++  return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
++}
 +
-+  // currently unimplemented
-+  // Used for storing NULL. All other oop constants should be
-+  // stored using routines that take a jobject.
-+  void store_heap_oop_null(Address dst);
++static long get_offset_of_jal(address insn_addr) {
++  assert_cond(insn_addr != NULL);
++  long offset = 0;
++  unsigned insn = *(unsigned*)insn_addr;
++  long val = (long)Assembler::sextract(insn, 31, 12);
++  offset |= ((val >> 19) & 0x1) << 20;
++  offset |= (val & 0xff) << 12;
++  offset |= ((val >> 8) & 0x1) << 11;
++  offset |= ((val >> 9) & 0x3ff) << 1;
++  offset = (offset << 43) >> 43;
++  return offset;
++}
 +
-+  // This dummy is to prevent a call to store_heap_oop from
-+  // converting a zero (linke NULL) into a Register by giving
-+  // the compiler two choices it can't resolve
++static long get_offset_of_conditional_branch(address insn_addr) {
++  long offset = 0;
++  assert_cond(insn_addr != NULL);
++  unsigned insn = *(unsigned*)insn_addr;
++  offset = (long)Assembler::sextract(insn, 31, 31);
++  offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
++  offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
++  offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
++  offset = (offset << 41) >> 41;
++  return offset;
++}
 +
-+  void store_heap_oop(Address dst, void* dummy);
++static long get_offset_of_pc_relative(address insn_addr) {
++  long offset = 0;
++  assert_cond(insn_addr != NULL);
++  offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12;                                  // Auipc.
++  offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                                         // Addi/Jalr/Load.
++  offset = (offset << 32) >> 32;
++  return offset;
++}
 +
-+  // Support for NULL-checks
-+  //
-+  // Generates code that causes a NULL OS exception if the content of reg is NULL.
-+  // If the accessed location is M[reg + offset] and the offset is known, provide the
-+  // offset. No explicit code generateion is needed if the offset is within a certain
-+  // range (0 <= offset <= page_size).
++static address get_target_of_movptr(address insn_addr) {
++  assert_cond(insn_addr != NULL);
++  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28;    // Lui.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16;                        // Addi.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5;                         // Addi.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20));                              // Addi/Jalr/Load.
++  return (address) target_address;
++}
 +
-+  virtual void null_check(Register reg, int offset = -1);
-+  static bool needs_explicit_null_check(intptr_t offset);
++static address get_target_of_li64(address insn_addr) {
++  assert_cond(insn_addr != NULL);
++  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44;    // Lui.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32;                        // Addi.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20;                        // Addi.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8;                         // Addi.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20));                              // Addi.
++  return (address)target_address;
++}
 +
-+  // idiv variant which deals with MINLONG as dividend and -1 as divisor
-+  int corrected_idivl(Register result, Register rs1, Register rs2,
-+                      bool want_remainder);
-+  int corrected_idivq(Register result, Register rs1, Register rs2,
-+                      bool want_remainder);
++static address get_target_of_li32(address insn_addr) {
++  assert_cond(insn_addr != NULL);
++  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12;    // Lui.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                              // Addiw.
++  return (address)target_address;
++}
 +
-+  // interface method calling
-+  void lookup_interface_method(Register recv_klass,
-+                               Register intf_klass,
-+                               RegisterOrConstant itable_index,
-+                               Register method_result,
-+                               Register scan_tmp,
-+                               Label& no_such_interface,
-+                               bool return_method = true);
++// Patch any kind of instruction; there may be several instructions.
++// Return the total length (in bytes) of the instructions.
++int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
++  assert_cond(branch != NULL);
++  int64_t offset = target - branch;
++  if (NativeInstruction::is_jal_at(branch)) {                         // jal
++    return patch_offset_in_jal(branch, offset);
++  } else if (NativeInstruction::is_branch_at(branch)) {               // beq/bge/bgeu/blt/bltu/bne
++    return patch_offset_in_conditional_branch(branch, offset);
++  } else if (NativeInstruction::is_pc_relative_at(branch)) {          // auipc, addi/jalr/load
++    return patch_offset_in_pc_relative(branch, offset);
++  } else if (NativeInstruction::is_movptr_at(branch)) {               // movptr
++    return patch_addr_in_movptr(branch, target);
++  } else if (NativeInstruction::is_li64_at(branch)) {                 // li64
++    return patch_imm_in_li64(branch, target);
++  } else if (NativeInstruction::is_li32_at(branch)) {                 // li32
++    int64_t imm = (intptr_t)target;
++    return patch_imm_in_li32(branch, (int32_t)imm);
++  } else {
++#ifdef ASSERT
++    tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n",
++                  *(unsigned*)branch, p2i(branch));
++    Disassembler::decode(branch - 16, branch + 16);
++#endif
++    ShouldNotReachHere();
++    return -1;
++  }
++}
 +
-+  // virtual method calling
-+  // n.n. x86 allows RegisterOrConstant for vtable_index
-+  void lookup_virtual_method(Register recv_klass,
-+                             RegisterOrConstant vtable_index,
-+                             Register method_result);
++address MacroAssembler::target_addr_for_insn(address insn_addr) {
++  long offset = 0;
++  assert_cond(insn_addr != NULL);
++  if (NativeInstruction::is_jal_at(insn_addr)) {                     // jal
++    offset = get_offset_of_jal(insn_addr);
++  } else if (NativeInstruction::is_branch_at(insn_addr)) {           // beq/bge/bgeu/blt/bltu/bne
++    offset = get_offset_of_conditional_branch(insn_addr);
++  } else if (NativeInstruction::is_pc_relative_at(insn_addr)) {      // auipc, addi/jalr/load
++    offset = get_offset_of_pc_relative(insn_addr);
++  } else if (NativeInstruction::is_movptr_at(insn_addr)) {           // movptr
++    return get_target_of_movptr(insn_addr);
++  } else if (NativeInstruction::is_li64_at(insn_addr)) {             // li64
++    return get_target_of_li64(insn_addr);
++  } else if (NativeInstruction::is_li32_at(insn_addr)) {             // li32
++    return get_target_of_li32(insn_addr);
++  } else {
++    ShouldNotReachHere();
++  }
++  return address(((uintptr_t)insn_addr + offset));
++}
 +
-+  // allocation
-+  void eden_allocate(
-+    Register obj,                   // result: pointer to object after successful allocation
-+    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
-+    int      con_size_in_bytes,     // object size in bytes if   known at compile time
-+    Register tmp1,                  // temp register
-+    Label&   slow_case,             // continuation point if fast allocation fails
-+    bool is_far = false
-+  );
-+  void tlab_allocate(
-+    Register obj,                   // result: pointer to object after successful allocation
-+    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
-+    int      con_size_in_bytes,     // object size in bytes if   known at compile time
-+    Register tmp1,                  // temp register
-+    Register tmp2,                  // temp register
-+    Label&   slow_case,             // continuation point of fast allocation fails
-+    bool is_far = false
-+  );
++int MacroAssembler::patch_oop(address insn_addr, address o) {
++  // OOPs are either narrow (32 bits) or wide (48 bits).  We encode
++  // narrow OOPs by setting the upper 16 bits in the first
++  // instruction.
++  if (NativeInstruction::is_li32_at(insn_addr)) {
++    // Move narrow OOP
++    uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
++    return patch_imm_in_li32(insn_addr, (int32_t)n);
++  } else if (NativeInstruction::is_movptr_at(insn_addr)) {
++    // Move wide OOP
++    return patch_addr_in_movptr(insn_addr, o);
++  }
++  ShouldNotReachHere();
++  return -1;
++}
 +
-+  // Test sub_klass against super_klass, with fast and slow paths.
++void MacroAssembler::reinit_heapbase() {
++  if (UseCompressedOops) {
++    if (Universe::is_fully_initialized()) {
++      mv(xheapbase, CompressedOops::ptrs_base());
++    } else {
++      int32_t offset = 0;
++      la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset);
++      ld(xheapbase, Address(xheapbase, offset));
++    }
++  }
++}
 +
-+  // The fast path produces a tri-state answer: yes / no / maybe-slow.
-+  // One of the three labels can be NULL, meaning take the fall-through.
-+  // If super_check_offset is -1, the value is loaded up from super_klass.
-+  // No registers are killed, except tmp_reg
-+  void check_klass_subtype_fast_path(Register sub_klass,
-+                                     Register super_klass,
-+                                     Register tmp_reg,
-+                                     Label* L_success,
-+                                     Label* L_failure,
-+                                     Label* L_slow_path,
-+                                     Register super_check_offset = noreg);
++void MacroAssembler::mv(Register Rd, Address dest) {
++  assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
++  code_section()->relocate(pc(), dest.rspec());
++  movptr(Rd, dest.target());
++}
 +
-+  // The reset of the type cehck; must be wired to a corresponding fast path.
-+  // It does not repeat the fast path logic, so don't use it standalone.
-+  // The tmp_reg and tmp2_reg can be noreg, if no tmps are avaliable.
-+  // Updates the sub's secondary super cache as necessary.
-+  void check_klass_subtype_slow_path(Register sub_klass,
-+                                     Register super_klass,
-+                                     Register tmp_reg,
-+                                     Register tmp2_reg,
-+                                     Label* L_success,
-+                                     Label* L_failure);
++void MacroAssembler::mv(Register Rd, address addr) {
++  // Here in case of use with relocation, use fix length instruciton
++  // movptr instead of li
++  movptr(Rd, addr);
++}
 +
-+  void check_klass_subtype(Register sub_klass,
-+                           Register super_klass,
-+                           Register tmp_reg,
-+                           Label& L_success);
++void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
++  if (src.is_register()) {
++    mv(Rd, src.as_register());
++  } else {
++    mv(Rd, src.as_constant());
++  }
++}
 +
-+  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
++void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
++  andr(Rd, Rs1, Rs2);
++  // addw: The result is clipped to 32 bits, then the sign bit is extended,
++  // and the result is stored in Rd
++  addw(Rd, Rd, zr);
++}
 +
-+  // only if +VerifyOops
-+  void verify_oop(Register reg, const char* s = "broken oop");
-+  void verify_oop_addr(Address addr, const char* s = "broken oop addr");
++void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
++  orr(Rd, Rs1, Rs2);
++  // addw: The result is clipped to 32 bits, then the sign bit is extended,
++  // and the result is stored in Rd
++  addw(Rd, Rd, zr);
++}
 +
-+  void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
-+  void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
++void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
++  xorr(Rd, Rs1, Rs2);
++  // addw: The result is clipped to 32 bits, then the sign bit is extended,
++  // and the result is stored in Rd
++  addw(Rd, Rd, zr);
++}
 +
-+#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
-+#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
++// Note: load_unsigned_short used to be called load_unsigned_word.
++int MacroAssembler::load_unsigned_short(Register dst, Address src) {
++  int off = offset();
++  lhu(dst, src);
++  return off;
++}
 +
-+  // A more convenient access to fence for our purposes
-+  // We used four bit to indicate the read and write bits in the predecessors and successors,
-+  // and extended i for r, o for w if UseConservativeFence enabled.
-+  enum Membar_mask_bits {
-+    StoreStore = 0b0101,               // (pred = ow   + succ =   ow)
-+    LoadStore  = 0b1001,               // (pred = ir   + succ =   ow)
-+    StoreLoad  = 0b0110,               // (pred = ow   + succ =   ir)
-+    LoadLoad   = 0b1010,               // (pred = ir   + succ =   ir)
-+    AnyAny     = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
-+  };
++int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
++  int off = offset();
++  lbu(dst, src);
++  return off;
++}
 +
-+  void membar(uint32_t order_constraint);
++int MacroAssembler::load_signed_short(Register dst, Address src) {
++  int off = offset();
++  lh(dst, src);
++  return off;
++}
 +
-+  static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) {
-+    predecessor = (order_constraint >> 2) & 0x3;
-+    successor = order_constraint & 0x3;
++int MacroAssembler::load_signed_byte(Register dst, Address src) {
++  int off = offset();
++  lb(dst, src);
++  return off;
++}
 +
-+    // extend rw -> iorw:
-+    // 01(w) -> 0101(ow)
-+    // 10(r) -> 1010(ir)
-+    // 11(rw)-> 1111(iorw)
-+    if (UseConservativeFence) {
-+      predecessor |= predecessor << 2;
-+      successor |= successor << 2;
-+    }
++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
++  switch (size_in_bytes) {
++    case  8:  ld(dst, src); break;
++    case  4:  is_signed ? lw(dst, src) : lwu(dst, src); break;
++    case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
++    case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
++    default:  ShouldNotReachHere();
 +  }
++}
 +
-+  static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
-+    return ((predecessor & 0x3) << 2) | (successor & 0x3);
++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
++  switch (size_in_bytes) {
++    case  8:  sd(src, dst); break;
++    case  4:  sw(src, dst); break;
++    case  2:  sh(src, dst); break;
++    case  1:  sb(src, dst); break;
++    default:  ShouldNotReachHere();
 +  }
++}
 +
-+  // prints msg, dumps registers and stops execution
-+  void stop(const char* msg);
-+
-+  static void debug64(char* msg, int64_t pc, int64_t regs[]);
++// reverse bytes in halfword in lower 16 bits and sign-extend
++// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
++void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
++  if (UseRVB) {
++    rev8(Rd, Rs);
++    srai(Rd, Rd, 48);
++    return;
++  }
++  assert_different_registers(Rs, tmp);
++  assert_different_registers(Rd, tmp);
++  srli(tmp, Rs, 8);
++  andi(tmp, tmp, 0xFF);
++  slli(Rd, Rs, 56);
++  srai(Rd, Rd, 48); // sign-extend
++  orr(Rd, Rd, tmp);
++}
 +
-+  void unimplemented(const char* what = "");
++// reverse bytes in lower word and sign-extend
++// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
++void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  if (UseRVB) {
++    rev8(Rd, Rs);
++    srai(Rd, Rd, 32);
++    return;
++  }
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1, tmp2);
++  revb_h_w_u(Rd, Rs, tmp1, tmp2);
++  slli(tmp2, Rd, 48);
++  srai(tmp2, tmp2, 32); // sign-extend
++  srli(Rd, Rd, 16);
++  orr(Rd, Rd, tmp2);
++}
 +
-+  void should_not_reach_here()                   { stop("should not reach here"); }
++// reverse bytes in halfword in lower 16 bits and zero-extend
++// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
++void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
++  if (UseRVB) {
++    rev8(Rd, Rs);
++    srli(Rd, Rd, 48);
++    return;
++  }
++  assert_different_registers(Rs, tmp);
++  assert_different_registers(Rd, tmp);
++  srli(tmp, Rs, 8);
++  andi(tmp, tmp, 0xFF);
++  andi(Rd, Rs, 0xFF);
++  slli(Rd, Rd, 8);
++  orr(Rd, Rd, tmp);
++}
 +
-+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
-+                                                Register tmp,
-+                                                int offset) {
-+    return RegisterOrConstant(tmp);
++// reverse bytes in halfwords in lower 32 bits and zero-extend
++// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
++void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  if (UseRVB) {
++    rev8(Rd, Rs);
++    rori(Rd, Rd, 32);
++    roriw(Rd, Rd, 16);
++    zext_w(Rd, Rd);
++    return;
 +  }
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1, tmp2);
++  srli(tmp2, Rs, 16);
++  revb_h_h_u(tmp2, tmp2, tmp1);
++  revb_h_h_u(Rd, Rs, tmp1);
++  slli(tmp2, tmp2, 16);
++  orr(Rd, Rd, tmp2);
++}
 +
-+  static address target_addr_for_insn(address insn_addr);
++// This method is only used for revb_h
++// Rd = Rs[47:0] Rs[55:48] Rs[63:56]
++void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1);
++  srli(tmp1, Rs, 48);
++  andi(tmp2, tmp1, 0xFF);
++  slli(tmp2, tmp2, 8);
++  srli(tmp1, tmp1, 8);
++  orr(tmp1, tmp1, tmp2);
++  slli(Rd, Rs, 16);
++  orr(Rd, Rd, tmp1);
++}
 +
-+  // Required platform-specific helpers for Label::patch_instructions.
-+  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
-+  static int pd_patch_instruction_size(address branch, address target) ;
-+  void pd_patch_instruction(address branch, address target) {
-+    pd_patch_instruction_size(branch, target);
++// reverse bytes in each halfword
++// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
++void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  if (UseRVB) {
++    assert_different_registers(Rs, tmp1);
++    assert_different_registers(Rd, tmp1);
++    rev8(Rd, Rs);
++    zext_w(tmp1, Rd);
++    roriw(tmp1, tmp1, 16);
++    slli(tmp1, tmp1, 32);
++    srli(Rd, Rd, 32);
++    roriw(Rd, Rd, 16);
++    zext_w(Rd, Rd);
++    orr(Rd, Rd, tmp1);
++    return;
 +  }
-+  static address pd_call_destination(address branch) {
-+    return target_addr_for_insn(branch);
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1, tmp2);
++  revb_h_helper(Rd, Rs, tmp1, tmp2);
++  for (int i = 0; i < 3; ++i) {
++    revb_h_helper(Rd, Rd, tmp1, tmp2);
 +  }
++}
 +
-+  static int patch_oop(address insn_addr, address o);
-+  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
-+  void emit_static_call_stub();
++// reverse bytes in each word
++// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
++void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  if (UseRVB) {
++    rev8(Rd, Rs);
++    rori(Rd, Rd, 32);
++    return;
++  }
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1, tmp2);
++  revb(Rd, Rs, tmp1, tmp2);
++  ror_imm(Rd, Rd, 32);
++}
 +
-+  // The following 4 methods return the offset of the appropriate move instruction
++// reverse bytes in doubleword
++// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
++void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  if (UseRVB) {
++    rev8(Rd, Rs);
++    return;
++  }
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1, tmp2);
++  andi(tmp1, Rs, 0xFF);
++  slli(tmp1, tmp1, 8);
++  for (int step = 8; step < 56; step += 8) {
++    srli(tmp2, Rs, step);
++    andi(tmp2, tmp2, 0xFF);
++    orr(tmp1, tmp1, tmp2);
++    slli(tmp1, tmp1, 8);
++  }
++  srli(Rd, Rs, 56);
++  andi(Rd, Rd, 0xFF);
++  orr(Rd, tmp1, Rd);
++}
 +
-+  // Support for fast byte/short loading with zero extension (depending on particular CPU)
-+  int load_unsigned_byte(Register dst, Address src);
-+  int load_unsigned_short(Register dst, Address src);
++// rotate right with shift bits
++void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
++{
++  if (UseRVB) {
++    rori(dst, src, shift);
++    return;
++  }
 +
-+  // Support for fast byte/short loading with sign extension (depending on particular CPU)
-+  int load_signed_byte(Register dst, Address src);
-+  int load_signed_short(Register dst, Address src);
++  assert_different_registers(dst, tmp);
++  assert_different_registers(src, tmp);
++  assert(shift < 64, "shift amount must be < 64");
++  slli(tmp, src, 64 - shift);
++  srli(dst, src, shift);
++  orr(dst, dst, tmp);
++}
 +
-+  // Load and store values by size and signed-ness
-+  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
-+  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
++void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
++  if (is_imm_in_range(imm, 12, 0)) {
++    and_imm12(Rd, Rn, imm);
++  } else {
++    assert_different_registers(Rn, tmp);
++    li(tmp, imm);
++    andr(Rd, Rn, tmp);
++  }
++}
 +
-+ public:
-+  // enum used for riscv--x86 linkage to define return type of x86 function
-+  enum ret_type { ret_type_void, ret_type_integral, ret_type_float, ret_type_double};
++void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) {
++  ld(tmp1, adr);
++  if (src.is_register()) {
++    orr(tmp1, tmp1, src.as_register());
++  } else {
++    if (is_imm_in_range(src.as_constant(), 12, 0)) {
++      ori(tmp1, tmp1, src.as_constant());
++    } else {
++      assert_different_registers(tmp1, tmp2);
++      li(tmp2, src.as_constant());
++      orr(tmp1, tmp1, tmp2);
++    }
++  }
++  sd(tmp1, adr);
++}
 +
-+  // Standard pseudoinstruction
-+  void nop();
-+  void mv(Register Rd, Register Rs) ;
-+  void notr(Register Rd, Register Rs);
-+  void neg(Register Rd, Register Rs);
-+  void negw(Register Rd, Register Rs);
-+  void sext_w(Register Rd, Register Rs);
-+  void zext_b(Register Rd, Register Rs);
-+  void seqz(Register Rd, Register Rs);          // set if = zero
-+  void snez(Register Rd, Register Rs);          // set if != zero
-+  void sltz(Register Rd, Register Rs);          // set if < zero
-+  void sgtz(Register Rd, Register Rs);          // set if > zero
++void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
++  if (UseCompressedClassPointers) {
++      lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
++    if (CompressedKlassPointers::base() == NULL) {
++      slli(tmp, tmp, CompressedKlassPointers::shift());
++      beq(trial_klass, tmp, L);
++      return;
++    }
++    decode_klass_not_null(tmp);
++  } else {
++    ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
++  }
++  beq(trial_klass, tmp, L);
++}
 +
-+  // Float pseudoinstruction
-+  void fmv_s(FloatRegister Rd, FloatRegister Rs);
-+  void fabs_s(FloatRegister Rd, FloatRegister Rs);    // single-precision absolute value
-+  void fneg_s(FloatRegister Rd, FloatRegister Rs);
++// Move an oop into a register. immediate is true if we want
++// immediate instructions and nmethod entry barriers are not enabled.
++// i.e. we are not going to patch this instruction while the code is being
++// executed by another thread.
++void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
++  int oop_index;
++  if (obj == NULL) {
++    oop_index = oop_recorder()->allocate_oop_index(obj);
++  } else {
++#ifdef ASSERT
++    {
++      ThreadInVMfromUnknown tiv;
++      assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
++    }
++#endif
++    oop_index = oop_recorder()->find_index(obj);
++  }
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
 +
-+  // Double pseudoinstruction
-+  void fmv_d(FloatRegister Rd, FloatRegister Rs);
-+  void fabs_d(FloatRegister Rd, FloatRegister Rs);
-+  void fneg_d(FloatRegister Rd, FloatRegister Rs);
++  // nmethod entry barrier necessitate using the constant pool. They have to be
++  // ordered with respected to oop access.
++  // Using immediate literals would necessitate fence.i.
++  if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) {
++    address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
++    ld_constant(dst, Address(dummy, rspec));
++  } else
++    mv(dst, Address((address)obj, rspec));
++}
 +
-+  // Pseudoinstruction for control and status register
-+  void rdinstret(Register Rd);                  // read instruction-retired counter
-+  void rdcycle(Register Rd);                    // read cycle counter
-+  void rdtime(Register Rd);                     // read time
-+  void csrr(Register Rd, unsigned csr);         // read csr
-+  void csrw(unsigned csr, Register Rs);         // write csr
-+  void csrs(unsigned csr, Register Rs);         // set bits in csr
-+  void csrc(unsigned csr, Register Rs);         // clear bits in csr
-+  void csrwi(unsigned csr, unsigned imm);
-+  void csrsi(unsigned csr, unsigned imm);
-+  void csrci(unsigned csr, unsigned imm);
-+  void frcsr(Register Rd);                      // read float-point csr
-+  void fscsr(Register Rd, Register Rs);         // swap float-point csr
-+  void fscsr(Register Rs);                      // write float-point csr
-+  void frrm(Register Rd);                       // read float-point rounding mode
-+  void fsrm(Register Rd, Register Rs);          // swap float-point rounding mode
-+  void fsrm(Register Rs);                       // write float-point rounding mode
-+  void fsrmi(Register Rd, unsigned imm);
-+  void fsrmi(unsigned imm);
-+  void frflags(Register Rd);                    // read float-point exception flags
-+  void fsflags(Register Rd, Register Rs);       // swap float-point exception flags
-+  void fsflags(Register Rs);                    // write float-point exception flags
-+  void fsflagsi(Register Rd, unsigned imm);
-+  void fsflagsi(unsigned imm);
++// Move a metadata address into a register.
++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
++  int oop_index;
++  if (obj == NULL) {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  } else {
++    oop_index = oop_recorder()->find_index(obj);
++  }
++  RelocationHolder rspec = metadata_Relocation::spec(oop_index);
++  mv(dst, Address((address)obj, rspec));
++}
 +
-+  void beqz(Register Rs, const address &dest);
-+  void blez(Register Rs, const address &dest);
-+  void bgez(Register Rs, const address &dest);
-+  void bltz(Register Rs, const address &dest);
-+  void bgtz(Register Rs, const address &dest);
-+  void bnez(Register Rs, const address &dest);
-+  void la(Register Rd, Label &label);
-+  void la(Register Rd, const address &dest);
-+  void la(Register Rd, const Address &adr);
-+  //label
-+  void beqz(Register Rs, Label &l, bool is_far = false);
-+  void bnez(Register Rs, Label &l, bool is_far = false);
-+  void blez(Register Rs, Label &l, bool is_far = false);
-+  void bgez(Register Rs, Label &l, bool is_far = false);
-+  void bltz(Register Rs, Label &l, bool is_far = false);
-+  void bgtz(Register Rs, Label &l, bool is_far = false);
-+  void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+
-+  void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } }
-+  void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } }
-+  void push_reg(Register Rs);
-+  void pop_reg(Register Rd);
-+  int  push_reg(unsigned int bitset, Register stack);
-+  int  pop_reg(unsigned int bitset, Register stack);
-+  static RegSet call_clobbered_registers();
-+  void push_call_clobbered_registers();
-+  void pop_call_clobbered_registers();
-+  void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
-+  void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
-+
-+  // if heap base register is used - reinit it with the correct value
-+  void reinit_heapbase();
++// Writes to stack successive pages until offset reached to check for
++// stack overflow + shadow pages.  This clobbers tmp.
++void MacroAssembler::bang_stack_size(Register size, Register tmp) {
++  assert_different_registers(tmp, size, t0);
++  // Bang stack for total size given plus shadow page size.
++  // Bang one page at a time because large size can bang beyond yellow and
++  // red zones.
++  mv(t0, os::vm_page_size());
++  Label loop;
++  bind(loop);
++  sub(tmp, sp, t0);
++  subw(size, size, t0);
++  sd(size, Address(tmp));
++  bgtz(size, loop);
 +
-+  void bind(Label& L) {
-+    Assembler::bind(L);
-+    // fences across basic blocks should not be merged
-+    code()->clear_last_insn();
++  // Bang down shadow pages too.
++  // At this point, (tmp-0) is the last address touched, so don't
++  // touch it again.  (It was touched as (tmp-pagesize) but then tmp
++  // was post-decremented.)  Skip this address by starting at i=1, and
++  // touch a few more pages below.  N.B.  It is important to touch all
++  // the way down to and including i=StackShadowPages.
++  for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
++    // this could be any sized move but this is can be a debugging crumb
++    // so the bigger the better.
++    sub(tmp, tmp, os::vm_page_size());
++    sd(size, Address(tmp, 0));
 +  }
++}
 +
-+  // mv
-+  void mv(Register Rd, address addr)                    { li(Rd, (int64_t)addr);  }
-+
-+  inline void mv(Register Rd, int imm64)                { li(Rd, (int64_t)imm64); }
-+  inline void mv(Register Rd, long imm64)               { li(Rd, (int64_t)imm64); }
-+  inline void mv(Register Rd, long long imm64)          { li(Rd, (int64_t)imm64); }
-+  inline void mv(Register Rd, unsigned int imm64)       { li(Rd, (int64_t)imm64); }
-+  inline void mv(Register Rd, unsigned long imm64)      { li(Rd, (int64_t)imm64); }
-+  inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); }
-+
-+  inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
++SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
++  assert_cond(masm != NULL);
++  int32_t offset = 0;
++  _masm = masm;
++  _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset);
++  _masm->lbu(t0, Address(t0, offset));
++  _masm->beqz(t0, _label);
++}
 +
-+  void mv(Register Rd, Address dest);
-+  void mv(Register Rd, RegisterOrConstant src);
++SkipIfEqual::~SkipIfEqual() {
++  assert_cond(_masm != NULL);
++  _masm->bind(_label);
++  _masm = NULL;
++}
 +
-+  // logic
-+  void andrw(Register Rd, Register Rs1, Register Rs2);
-+  void orrw(Register Rd, Register Rs1, Register Rs2);
-+  void xorrw(Register Rd, Register Rs1, Register Rs2);
++void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) {
++  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++  ld(dst, Address(xmethod, Method::const_offset()));
++  ld(dst, Address(dst, ConstMethod::constants_offset()));
++  ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
++  ld(dst, Address(dst, mirror_offset));
++  resolve_oop_handle(dst, tmp);
++}
 +
-+  // vext
-+  void vmnot_m(VectorRegister vd, VectorRegister vs);
-+  void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
-+  void vfneg_v(VectorRegister vd, VectorRegister vs);
++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
++  // OopHandle::resolve is an indirection.
++  assert_different_registers(result, tmp);
++  access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg);
++}
 +
-+  // support for argument shuffling
-+  void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0);
-+  void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
-+  void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
-+  void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
-+  void object_move(OopMap* map,
-+                   int oop_handle_offset,
-+                   int framesize_in_slots,
-+                   VMRegPair src,
-+                   VMRegPair dst,
-+                   bool is_receiver,
-+                   int* receiver_offset);
-+
-+  void rt_call(address dest, Register tmp = t0);
++// ((WeakHandle)result).resolve()
++void MacroAssembler::resolve_weak_handle(Register result, Register tmp) {
++  assert_different_registers(result, tmp);
++  Label resolved;
 +
-+  // revb
-+  void revb_h_h(Register Rd, Register Rs, Register tmp = t0);                           // reverse bytes in halfword in lower 16 bits, sign-extend
-+  void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);      // reverse bytes in lower word, sign-extend
-+  void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0);                         // reverse bytes in halfword in lower 16 bits, zero-extend
-+  void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);    // reverse bytes in halfwords in lower 32 bits, zero-extend
-+  void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in upper 16 bits (48:63) and move to lower
-+  void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each halfword
-+  void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each word
-+  void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);          // reverse bytes in doubleword
++  // A null weak handle resolves to null.
++  beqz(result, resolved);
 +
-+  void andi(Register Rd, Register Rn, int64_t increment, Register tmp = t0);
-+  void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
++  // Only 64 bit platforms support GCs that require a tmp register
++  // Only IN_HEAP loads require a thread_tmp register
++  // WeakHandle::resolve is an indirection like jweak.
++  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
++                 result, Address(result), tmp, noreg /* tmp_thread */);
++  bind(resolved);
++}
 +
-+  // Support for serializing memory accesses between threads
-+  void serialize_memory(Register thread, Register tmp1, Register tmp2);
++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
++                                    Register dst, Address src,
++                                    Register tmp1, Register thread_tmp) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  } else {
++    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  }
++}
 +
-+  void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
-+  void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail) ;
-+  void cmpxchg(Register addr, Register expected,
-+               Register new_val,
-+               enum operand_size size,
-+               Assembler::Aqrl acquire, Assembler::Aqrl release,
-+               Register result, bool result_as_bool = false);
-+  void cmpxchg_weak(Register addr, Register expected,
-+                    Register new_val,
-+                    enum operand_size size,
-+                    Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                    Register result);
-+  void cmpxchg_narrow_value_helper(Register addr, Register expected,
-+                                   Register new_val,
-+                                   enum operand_size size,
-+                                   Register tmp1, Register tmp2, Register tmp3);
-+  void cmpxchg_narrow_value(Register addr, Register expected,
-+                            Register new_val,
-+                            enum operand_size size,
-+                            Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                            Register result, bool result_as_bool,
-+                            Register tmp1, Register tmp2, Register tmp3);
-+  void weak_cmpxchg_narrow_value(Register addr, Register expected,
-+                                 Register new_val,
-+                                 enum operand_size size,
-+                                 Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                                 Register result,
-+                                 Register tmp1, Register tmp2, Register tmp3);
++void MacroAssembler::null_check(Register reg, int offset) {
++  if (needs_explicit_null_check(offset)) {
++    // provoke OS NULL exception if reg = NULL by
++    // accessing M[reg] w/o changing any registers
++    // NOTE: this is plenty to provoke a segv
++    ld(zr, Address(reg, 0));
++  } else {
++    // nothing to do, (later) access of M[reg + offset]
++    // will provoke OS NULL exception if reg = NULL
++  }
++}
 +
-+  void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
-+  void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
-+  void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
-+  void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
++                                     Address dst, Register src,
++                                     Register tmp1, Register thread_tmp) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  } else {
++    bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  }
++}
 +
-+  void atomic_xchg(Register prev, Register newv, Register addr);
-+  void atomic_xchgw(Register prev, Register newv, Register addr);
-+  void atomic_xchgal(Register prev, Register newv, Register addr);
-+  void atomic_xchgalw(Register prev, Register newv, Register addr);
-+  void atomic_xchgwu(Register prev, Register newv, Register addr);
-+  void atomic_xchgalwu(Register prev, Register newv, Register addr);
++// Algorithm must match CompressedOops::encode.
++void MacroAssembler::encode_heap_oop(Register d, Register s) {
++  verify_oop(s, "broken oop in encode_heap_oop");
++  if (CompressedOops::base() == NULL) {
++    if (CompressedOops::shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++      srli(d, s, LogMinObjAlignmentInBytes);
++    } else {
++      mv(d, s);
++    }
++  } else {
++    Label notNull;
++    sub(d, s, xheapbase);
++    bgez(d, notNull);
++    mv(d, zr);
++    bind(notNull);
++    if (CompressedOops::shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++      srli(d, d, CompressedOops::shift());
++    }
++  }
++}
 +
-+  // Biased locking support
-+  // lock_reg and obj_reg must be loaded up with the appropriate values.
-+  // swap_reg is killed.
-+  // tmp_reg must be supplied and must not be t0 or t1
-+  // Optional slow case is for implementations (interpreter and C1) which branch to
-+  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
-+  // Returns offset of first potentially-faulting instruction for null
-+  // check info (currently consumed only by C1). If
-+  // swap_reg_contains_mark is true then returns -1 as it is assumed
-+  // the calling code has already passed any potential faults.
-+  int biased_locking_enter(Register lock_reg, Register obj_reg,
-+                           Register swap_reg, Register tmp_reg,
-+                           bool swap_reg_contains_mark,
-+                           Label& done, Label* slow_case = NULL,
-+                           BiasedLockingCounters* counters = NULL,
-+                           Register flag = noreg);
-+  void biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag = noreg);
++void MacroAssembler::load_klass(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++    decode_klass_not_null(dst);
++  } else {
++    ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++  }
++}
 +
-+  static bool far_branches() {
-+    return ReservedCodeCacheSize > branch_range;
++void MacroAssembler::store_klass(Register dst, Register src) {
++  // FIXME: Should this be a store release? concurrent gcs assumes
++  // klass length is valid if klass field is not null.
++  if (UseCompressedClassPointers) {
++    encode_klass_not_null(src);
++    sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
++  } else {
++    sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
 +  }
++}
 +
-+  //atomic
-+  void atomic_incw(Register counter_addr, Register tmp1);
-+  void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
-+    la(tmp1, counter_addr);
-+    atomic_incw(tmp1, tmp2);
++void MacroAssembler::store_klass_gap(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    // Store to klass gap in destination
++    sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
 +  }
++}
 +
-+  // Jumps that can reach anywhere in the code cache.
-+  // Trashes tmp.
-+  void far_call(Address entry, Register tmp = t0);
-+  void far_jump(Address entry, Register tmp = t0);
++void  MacroAssembler::decode_klass_not_null(Register r) {
++  decode_klass_not_null(r, r);
++}
 +
-+  static int far_branch_size() {
-+    if (far_branches()) {
-+      return 2 * 4;  // auipc + jalr, see far_call() & far_jump()
++void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
++  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++
++  if (CompressedKlassPointers::base() == NULL) {
++    if (CompressedKlassPointers::shift() != 0) {
++      assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++      slli(dst, src, LogKlassAlignmentInBytes);
 +    } else {
-+      return 4;
++      mv(dst, src);
 +    }
++    return;
 +  }
 +
-+  void load_byte_map_base(Register reg);
-+
-+  void bang_stack_with_offset(int offset) {
-+    // stack grows down, caller passes positive offset
-+    assert(offset > 0, "must bang with negative offset");
-+    sub(t1, sp, offset);
-+    sd(zr, Address(t1));
++  Register xbase = dst;
++  if (dst == src) {
++    xbase = tmp;
 +  }
 +
-+  void la_patchable(Register reg1, const Address &dest, int32_t &offset);
++  assert_different_registers(src, xbase);
++  li(xbase, (uintptr_t)CompressedKlassPointers::base());
 +
-+  virtual void _call_Unimplemented(address call_site) {
-+    mv(t1, call_site);
++  if (CompressedKlassPointers::shift() != 0) {
++    assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++    assert_different_registers(t0, xbase);
++    shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
++  } else {
++    add(dst, xbase, src);
 +  }
-+  #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
 +
-+#ifdef COMPILER2
-+  void spill(Register Rx, bool is64, int offset) {
-+    is64 ? sd(Rx, Address(sp, offset))
-+         : sw(Rx, Address(sp, offset));
-+  }
++  if (xbase == xheapbase) { reinit_heapbase(); }
++}
 +
-+  void spill(FloatRegister Rx, bool is64, int offset) {
-+    is64 ? fsd(Rx, Address(sp, offset))
-+         : fsw(Rx, Address(sp, offset));
-+  }
++void MacroAssembler::encode_klass_not_null(Register r) {
++  encode_klass_not_null(r, r);
++}
 +
-+  void spill(VectorRegister Vx, int offset) {
-+    add(t0, sp, offset);
-+    vs1r_v(Vx, t0);
++void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
++  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++
++  if (CompressedKlassPointers::base() == NULL) {
++    if (CompressedKlassPointers::shift() != 0) {
++      assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++      srli(dst, src, LogKlassAlignmentInBytes);
++    } else {
++      mv(dst, src);
++    }
++    return;
 +  }
 +
-+  void unspill(Register Rx, bool is64, int offset) {
-+    is64 ? ld(Rx, Address(sp, offset))
-+         : lw(Rx, Address(sp, offset));
++  if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 &&
++      CompressedKlassPointers::shift() == 0) {
++    zero_extend(dst, src, 32);
++    return;
 +  }
 +
-+  void unspillu(Register Rx, bool is64, int offset) {
-+    is64 ? ld(Rx, Address(sp, offset))
-+         : lwu(Rx, Address(sp, offset));
++  Register xbase = dst;
++  if (dst == src) {
++    xbase = tmp;
 +  }
 +
-+  void unspill(FloatRegister Rx, bool is64, int offset) {
-+    is64 ? fld(Rx, Address(sp, offset))
-+         : flw(Rx, Address(sp, offset));
++  assert_different_registers(src, xbase);
++  li(xbase, (intptr_t)CompressedKlassPointers::base());
++  sub(dst, src, xbase);
++  if (CompressedKlassPointers::shift() != 0) {
++    assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++    srli(dst, dst, LogKlassAlignmentInBytes);
++  }
++  if (xbase == xheapbase) {
++    reinit_heapbase();
 +  }
++}
 +
-+  void unspill(VectorRegister Vx, int offset) {
-+    add(t0, sp, offset);
-+    vl1r_v(Vx, t0);
++void  MacroAssembler::decode_heap_oop_not_null(Register r) {
++  decode_heap_oop_not_null(r, r);
++}
++
++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
++  assert(UseCompressedOops, "should only be used for compressed headers");
++  assert(Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (CompressedOops::shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++    slli(dst, src, LogMinObjAlignmentInBytes);
++    if (CompressedOops::base() != NULL) {
++      add(dst, xheapbase, dst);
++    }
++  } else {
++    assert(CompressedOops::base() == NULL, "sanity");
++    mv(dst, src);
 +  }
++}
 +
-+  void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset,
-+                                        int vec_reg_size_in_bytes) {
-+    assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size");
-+    unspill(v0, src_offset);
-+    spill(v0, dst_offset);
++void  MacroAssembler::decode_heap_oop(Register d, Register s) {
++  if (CompressedOops::base() == NULL) {
++    if (CompressedOops::shift() != 0 || d != s) {
++      slli(d, s, CompressedOops::shift());
++    }
++  } else {
++    Label done;
++    mv(d, s);
++    beqz(s, done);
++    shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes);
++    bind(done);
 +  }
++  verify_oop(d, "broken oop in decode_heap_oop");
++}
 +
-+#endif // COMPILER2
++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
++                                    Register thread_tmp, DecoratorSet decorators) {
++  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
++}
 +
-+  // Frame creation and destruction shared between JITs.
-+  void build_frame(int framesize);
-+  void remove_frame(int framesize);
++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
++                                   Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
++}
 +
-+  void reserved_stack_check();
-+  void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype);
-+  void read_polling_page(Register r, address page, relocInfo::relocType rtype);
-+  void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
-+  // Return: the call PC
-+  address trampoline_call(Address entry);
-+  address ic_call(address entry, jint method_index = 0);
-+  // Support for memory inc/dec
-+  // n.b. increment/decrement calls with an Address destination will
-+  // need to use a scratch register to load the value to be
-+  // incremented. increment/decrement calls which add or subtract a
-+  // constant value other than sign-extended 12-bit immediate will need
-+  // to use a 2nd scratch register to hold the constant. so, an address
-+  // increment/decrement may trash both t0 and t1.
-+
-+  void increment(const Address dst, int64_t value = 1);
-+  void incrementw(const Address dst, int32_t value = 1);
-+
-+  void decrement(const Address dst, int64_t value = 1);
-+  void decrementw(const Address dst, int32_t value = 1);
-+  void cmpptr(Register src1, Address src2, Label& equal);
-+  void oop_equal(Register obj1, Register obj2, Label& equal, bool is_far = false); // cmpoop
-+  void oop_nequal(Register obj1, Register obj2, Label& nequal, bool is_far = false);
-+  void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
-+#ifdef COMPILER2
-+  void minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, bool is_double, bool is_min);
++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
++                                            Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp);
++}
 +
-+  address arrays_equals(Register a1, Register a2, Register tmp3, Register tmp4,
-+                        Register tmp5, Register tmp6, Register result, Register cnt1, int elem_size);
++// Used for storing NULLs.
++void MacroAssembler::store_heap_oop_null(Address dst) {
++  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
++}
 +
-+  void string_equals(Register a1, Register a2, Register result, Register cnt1,
-+                     int elem_size);
-+  void string_compare(Register str1, Register str2,
-+                      Register cnt1, Register cnt2, Register result,
-+                      Register tmp1, Register tmp2, Register tmp3, int ae);
-+  void string_indexof_char_short(Register str1, Register cnt1,
-+                                 Register ch, Register result,
-+                                 bool isL);
-+  void string_indexof_char(Register str1, Register cnt1,
-+                           Register ch, Register result,
-+                           Register tmp1, Register tmp2,
-+                           Register tmp3, Register tmp4,
-+                           bool isL);
-+  void string_indexof(Register str1, Register str2,
-+                      Register cnt1, Register cnt2,
-+                      Register tmp1, Register tmp2,
-+                      Register tmp3, Register tmp4,
-+                      Register tmp5, Register tmp6,
-+                      Register result, int ae);
-+  void string_indexof_linearscan(Register haystack, Register needle,
-+                                 Register haystack_len, Register needle_len,
-+                                 Register tmp1, Register tmp2,
-+                                 Register tmp3, Register tmp4,
-+                                 int needle_con_cnt, Register result, int ae);
-+  void compute_index(Register str1, Register trailing_zero, Register match_mask,
-+                     Register result, Register char_tmp, Register tmp,
-+                     bool haystack_isL);
-+  void compute_match_mask(Register src, Register pattern, Register match_mask,
-+                          Register mask1, Register mask2);
-+  void cad(Register dst, Register src1, Register src2, Register carry);
-+  void cadc(Register dst, Register src1, Register src2, Register carry);
-+  void adc(Register dst, Register src1, Register src2, Register carry);
-+  void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
-+                       Register src1, Register src2, Register carry = t0);
-+  void mul_add(Register out, Register in, Register offset,
-+               Register len, Register k, Register tmp1, Register tmp2);
-+  void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
-+                             Register y, Register y_idx, Register z,
-+                             Register carry, Register product,
-+                             Register idx, Register kdx);
-+  void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
-+                             Register y, Register y_idx, Register z,
-+                             Register carry, Register product,
-+                             Register idx, Register kdx);
-+  void multiply_128_x_128_loop(Register y, Register z,
-+                               Register carry, Register carry2,
-+                               Register idx, Register jdx,
-+                               Register yz_idx1, Register yz_idx2,
-+                               Register tmp, Register tmp3, Register tmp4,
-+                               Register tmp6, Register product_hi);
-+  void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
-+                       Register z, Register zlen,
-+                       Register tmp1, Register tmp2, Register tmp3, Register tmp4,
-+                       Register tmp5, Register tmp6, Register product_hi);
-+#endif // COMPILER2
-+  void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
-+  void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
-+  
-+  void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
-+  void zero_words(Register base, uint64_t cnt);
-+  address zero_words(Register ptr, Register cnt);
-+  void fill_words(Register base, Register cnt, Register value);
-+  void zero_memory(Register addr, Register len, Register tmp1);
++int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
++                                    bool want_remainder)
++{
++  // Full implementation of Java idiv and irem.  The function
++  // returns the (pc) offset of the div instruction - may be needed
++  // for implicit exceptions.
++  //
++  // input : rs1: dividend
++  //         rs2: divisor
++  //
++  // result: either
++  //         quotient  (= rs1 idiv rs2)
++  //         remainder (= rs1 irem rs2)
 +
-+   // shift left by shamt and add
-+  void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
 +
-+#ifdef COMPILER2
-+  // refer to conditional_branches and float_conditional_branches
-+  static const int bool_test_bits = 3;
-+  static const int neg_cond_bits = 2;
-+  static const int unsigned_branch_mask = 1 << bool_test_bits;
-+  static const int double_branch_mask = 1 << bool_test_bits;
++  int idivl_offset = offset();
++  if (!want_remainder) {
++    divw(result, rs1, rs2);
++  } else {
++    remw(result, rs1, rs2); // result = rs1 % rs2;
++  }
++  return idivl_offset;
++}
 +
-+  void enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src);
++int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
++                                    bool want_remainder)
++{
++  // Full implementation of Java ldiv and lrem.  The function
++  // returns the (pc) offset of the div instruction - may be needed
++  // for implicit exceptions.
++  //
++  // input : rs1: dividend
++  //         rs2: divisor
++  //
++  // result: either
++  //         quotient  (= rs1 idiv rs2)
++  //         remainder (= rs1 irem rs2)
 +
-+  // cmp
-+  void cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far = false);
-+  void float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far = false);
++  int idivq_offset = offset();
++  if (!want_remainder) {
++    div(result, rs1, rs2);
++  } else {
++    rem(result, rs1, rs2); // result = rs1 % rs2;
++  }
++  return idivq_offset;
++}
 +
-+  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far = false);
-+  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far = false);
++// Look up the method for a megamorpic invkkeinterface call.
++// The target method is determined by <intf_klass, itable_index>.
++// The receiver klass is in recv_klass.
++// On success, the result will be in method_result, and execution falls through.
++// On failure, execution transfers to the given label.
++void MacroAssembler::lookup_interface_method(Register recv_klass,
++                                             Register intf_klass,
++                                             RegisterOrConstant itable_index,
++                                             Register method_result,
++                                             Register scan_tmp,
++                                             Label& L_no_such_interface,
++                                             bool return_method) {
++  assert_different_registers(recv_klass, intf_klass, scan_tmp);
++  assert_different_registers(method_result, intf_klass, scan_tmp);
++  assert(recv_klass != method_result || !return_method,
++         "recv_klass can be destroyed when mehtid isn't needed");
++  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
++         "caller must be same register for non-constant itable index as for method");
 +
-+  // intrinsic methods implemented by vector instructions
-+  void string_equals_v(Register a1, Register a2, Register result, Register cnt1, int elem_size);
-+  void arrays_equals_v(Register a1, Register a2, Register result, Register cnt1, int elem_size);
-+  void string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2,
-+                        Register result, Register tmp1, Register tmp2, int encForm);
++  // Compute start of first itableOffsetEntry (which is at the end of the vtable).
++  int vtable_base = in_bytes(Klass::vtable_start_offset());
++  int itentry_off = itableMethodEntry::method_offset_in_bytes();
++  int scan_step   = itableOffsetEntry::size() * wordSize;
++  int vte_size    = vtableEntry::size_in_bytes();
++  assert(vte_size == wordSize, "else adjust times_vte_scale");
 +
-+  void clear_array_v(Register base, Register cnt);
-+  address byte_array_inflate_v(Register src, Register dst, Register len, Register tmp);
-+  void char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp);
-+  void encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp);
++  lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset()));
 +
-+  address has_negatives_v(Register ary, Register len, Register result, Register tmp);
-+#endif
++  // %%% Could store the aligned, prescaled offset in the klassoop.
++  shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
++  add(scan_tmp, scan_tmp, vtable_base);
 +
-+  // Here the float instructions with safe deal with some exceptions.
-+  // e.g. convert from NaN, +Inf, -Inf to int, float, double
-+  // will trigger exception, we need to deal with these situations
-+  // to get correct results.
-+  void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
-+  void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
-+  void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
-+  void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
++  if (return_method) {
++    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
++    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++    if (itable_index.is_register()) {
++      slli(t0, itable_index.as_register(), 3);
++    } else {
++      li(t0, itable_index.as_constant() << 3);
++    }
++    add(recv_klass, recv_klass, t0);
++    if (itentry_off) {
++      add(recv_klass, recv_klass, itentry_off);
++    }
++  }
 +
-+  // vector load/store unit-stride instructions
-+  void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
-+    switch (sew) {
-+      case Assembler::e64:
-+        vle64_v(vd, base, vm);
-+        break;
-+      case Assembler::e32:
-+        vle32_v(vd, base, vm);
-+        break;
-+      case Assembler::e16:
-+        vle16_v(vd, base, vm);
-+        break;
-+      case Assembler::e8: // fall through
-+      default:
-+        vle8_v(vd, base, vm);
-+        break;
-+    }
++  Label search, found_method;
++
++  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
++  beq(intf_klass, method_result, found_method);
++  bind(search);
++  // Check that the previous entry is non-null. A null entry means that
++  // the receiver class doens't implement the interface, and wasn't the
++  // same as when the caller was compiled.
++  beqz(method_result, L_no_such_interface, /* is_far */ true);
++  addi(scan_tmp, scan_tmp, scan_step);
++  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
++  bne(intf_klass, method_result, search);
++
++  bind(found_method);
++
++  // Got a hit.
++  if (return_method) {
++    lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes()));
++    add(method_result, recv_klass, scan_tmp);
++    ld(method_result, Address(method_result));
 +  }
++}
 +
-+  void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
-+    switch (sew) {
-+      case Assembler::e64:
-+        vse64_v(store_data, base, vm);
-+        break;
-+      case Assembler::e32:
-+        vse32_v(store_data, base, vm);
-+        break;
-+      case Assembler::e16:
-+        vse16_v(store_data, base, vm);
-+        break;
-+      case Assembler::e8: // fall through
-+      default:
-+        vse8_v(store_data, base, vm);
-+        break;
-+    }
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++                                           RegisterOrConstant vtable_index,
++                                           Register method_result) {
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == 8,
++         "adjust the scaling in the code below");
++  int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
++
++  if (vtable_index.is_register()) {
++    shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord);
++    ld(method_result, Address(method_result, vtable_offset_in_bytes));
++  } else {
++    vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
++    ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes));
 +  }
++}
 +
-+  static const int zero_words_block_size;
++void MacroAssembler::membar(uint32_t order_constraint) {
++  address prev = pc() - NativeMembar::instruction_size;
++  address last = code()->last_insn();
 +
-+  void cast_primitive_type(BasicType type, Register Rt) {
-+    switch (type) {
-+      case T_BOOLEAN:
-+        sltu(Rt, zr, Rt);
-+        break;
-+      case T_CHAR   :
-+        zero_extend(Rt, Rt, 16);
-+        break;
-+      case T_BYTE   :
-+        sign_extend(Rt, Rt, 8);
-+        break;
-+      case T_SHORT  :
-+        sign_extend(Rt, Rt, 16);
-+        break;
-+      case T_INT    :
-+        addw(Rt, Rt, zr);
-+        break;
-+      case T_LONG   : /* nothing to do */        break;
-+      case T_VOID   : /* nothing to do */        break;
-+      case T_FLOAT  : /* nothing to do */        break;
-+      case T_DOUBLE : /* nothing to do */        break;
-+      default: ShouldNotReachHere();
-+    }
++  if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) {
++    NativeMembar *bar = NativeMembar_at(prev);
++    // We are merging two memory barrier instructions.  On RISCV we
++    // can do this simply by ORing them together.
++    bar->set_kind(bar->get_kind() | order_constraint);
++    BLOCK_COMMENT("merged membar");
++  } else {
++    code()->set_last_insn(pc());
++
++    uint32_t predecessor = 0;
++    uint32_t successor = 0;
++
++    membar_mask_to_pred_succ(order_constraint, predecessor, successor);
++    fence(predecessor, successor);
 +  }
++}
 +
-+  // float cmp with unordered_result
-+  void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
-+  void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
++// Form an addres from base + offset in Rd. Rd my or may not
++// actually be used: you must use the Address that is returned. It
++// is up to you to ensure that the shift provided mathces the size
++// of your data.
++Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) {
++  if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12
++    return Address(base, byte_offset);
++  }
 +
-+  // Zero/Sign-extend
-+  void zero_extend(Register dst, Register src, int bits);
-+  void sign_extend(Register dst, Register src, int bits);
++  // Do it the hard way
++  mv(Rd, byte_offset);
++  add(Rd, base, Rd);
++  return Address(Rd);
++}
 +
-+  // compare src1 and src2 and get -1/0/1 in dst.
-+  // if [src1 > src2], dst = 1;
-+  // if [src1 == src2], dst = 0;
-+  // if [src1 < src2], dst = -1;
-+  void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
++void MacroAssembler::check_klass_subtype(Register sub_klass,
++                                         Register super_klass,
++                                         Register tmp_reg,
++                                         Label& L_success) {
++  Label L_failure;
++  check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL);
++  check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL);
++  bind(L_failure);
++}
 +
-+  void load_constant_pool_cache(Register cpool, Register method);
++void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
++  ld(t0, Address(xthread, JavaThread::polling_word_offset()));
++  if (acquire) {
++    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++  }
++  if (at_return) {
++    bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */);
++  } else {
++    andi(t0, t0, SafepointMechanism::poll_bit());
++    bnez(t0, slow_path, true /* is_far */);
++  }
++}
 +
-+  void load_max_stack(Register dst, Register method);
++void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
++                                Label &succeed, Label *fail) {
++  // oldv holds comparison value
++  // newv holds value to write in exchange
++  // addr identifies memory word to compare against/update
++  Label retry_load, nope;
++  bind(retry_load);
++  // Load reserved from the memory location
++  lr_d(tmp, addr, Assembler::aqrl);
++  // Fail and exit if it is not what we expect
++  bne(tmp, oldv, nope);
++  // If the store conditional succeeds, tmp will be zero
++  sc_d(tmp, newv, addr, Assembler::rl);
++  beqz(tmp, succeed);
++  // Retry only when the store conditional failed
++  j(retry_load);
 +
-+private:
-+  void load_prototype_header(Register dst, Register src);
-+  void repne_scan(Register addr, Register value, Register count, Register tmp);
++  bind(nope);
++  membar(AnyAny);
++  mv(oldv, tmp);
++  if (fail != NULL) {
++    j(*fail);
++  }
++}
 +
-+#ifdef ASSERT
-+  // Macro short-hand support to clean-up after a failed call to trampoline
-+  // call generation (see trampoline_call() below), when a set of Labels must
-+  // be reset (before returning).
-+#define reset_labels1(L1) L1.reset()
-+#define reset_labels2(L1, L2) L1.reset(); L2.reset()
-+#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3)
-+#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5)
-+#endif
++void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
++                                        Label &succeed, Label *fail) {
++  assert(oopDesc::mark_offset_in_bytes() == 0, "assumption");
++  cmpxchgptr(oldv, newv, obj, tmp, succeed, fail);
++}
 +
-+  // Return true if an address is within the 48-bit RISCV64 address space.
-+  bool is_valid_riscv64_address(address addr) {
-+    // sv48: must have bits 63-48 all equal to bit 47
-+    return ((uintptr_t)addr >> 47) == 0;
++void MacroAssembler::load_reserved(Register addr,
++                                   enum operand_size size,
++                                   Assembler::Aqrl acquire) {
++  switch (size) {
++    case int64:
++      lr_d(t0, addr, acquire);
++      break;
++    case int32:
++      lr_w(t0, addr, acquire);
++      break;
++    case uint32:
++      lr_w(t0, addr, acquire);
++      zero_extend(t0, t0, 32);
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
++}
 +
-+  void ld_constant(Register dest, const Address &const_addr) {
-+    if (NearCpool) {
-+      ld(dest, const_addr);
-+    } else {
-+      int32_t offset = 0;
-+      la_patchable(dest, InternalAddress(const_addr.target()), offset);
-+      ld(dest, Address(dest, offset));
-+    }
++void MacroAssembler::store_conditional(Register addr,
++                                       Register new_val,
++                                       enum operand_size size,
++                                       Assembler::Aqrl release) {
++  switch (size) {
++    case int64:
++      sc_d(t0, new_val, addr, release);
++      break;
++    case int32:
++    case uint32:
++      sc_w(t0, new_val, addr, release);
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
++}
 +
-+  int bitset_to_regs(unsigned int bitset, unsigned char* regs);
-+  Address add_memory_helper(const Address dst);
 +
-+  void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
-+  void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
++void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected,
++                                                 Register new_val,
++                                                 enum operand_size size,
++                                                 Register tmp1, Register tmp2, Register tmp3) {
++  assert(size == int8 || size == int16, "unsupported operand size");
 +
-+#ifdef COMPILER2
-+  void element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
-+                       VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE);
-+#endif // COMPILER2
-+};
++  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3;
 +
-+#ifdef ASSERT
-+inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
-+#endif
++  andi(shift, addr, 3);
++  slli(shift, shift, 3);
 +
-+/**
-+ * class SkipIfEqual:
-+ *
-+ * Instantiating this class will result in assembly code being output that will
-+ * jump around any code emitted between the creation of the instance and it's
-+ * automatic destruction at the end of a scope block, depending on the value of
-+ * the flag passed to the constructor, which will be checked at run-time.
-+ */
-+class SkipIfEqual {
-+ private:
-+  MacroAssembler* _masm;
-+  Label _label;
++  andi(aligned_addr, addr, ~3);
 +
-+ public:
-+   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
-+   ~SkipIfEqual();
-+};
-+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
-new file mode 100644
-index 000000000..fc2b191c0
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
-@@ -0,0 +1,30 @@
-+/*
-+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  if (size == int8) {
++    addi(mask, zr, 0xff);
++  } else {
++    // size == int16 case
++    addi(mask, zr, -1);
++    zero_extend(mask, mask, 16);
++  }
++  sll(mask, mask, shift);
 +
-+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
-+#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
++  xori(not_mask, mask, -1);
 +
-+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-new file mode 100644
-index 000000000..d049193d4
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-@@ -0,0 +1,440 @@
-+/*
-+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  sll(expected, expected, shift);
++  andr(expected, expected, mask);
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "classfile/javaClasses.inline.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "interpreter/interpreterRuntime.hpp"
-+#include "memory/allocation.inline.hpp"
-+#include "prims/methodHandles.hpp"
-+#include "runtime/flags/flagSetting.hpp"
-+#include "runtime/frame.inline.hpp"
++  sll(new_val, new_val, shift);
++  andr(new_val, new_val, mask);
++}
 +
-+#define __ _masm->
++// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
++// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
++// which are forced to work with 4-byte aligned address.
++void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
++                                          Register new_val,
++                                          enum operand_size size,
++                                          Assembler::Aqrl acquire, Assembler::Aqrl release,
++                                          Register result, bool result_as_bool,
++                                          Register tmp1, Register tmp2, Register tmp3) {
++  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
++  assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
++  cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
 +
-+#ifdef PRODUCT
-+#define BLOCK_COMMENT(str) /* nothing */
-+#else
-+#define BLOCK_COMMENT(str) __ block_comment(str)
-+#endif
++  Label retry, fail, done;
 +
-+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++  bind(retry);
++  lr_w(old, aligned_addr, acquire);
++  andr(tmp, old, mask);
++  bne(tmp, expected, fail);
 +
-+void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
-+  if (VerifyMethodHandles) {
-+    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
-+                 "MH argument is a Class");
-+  }
-+  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
-+}
++  andr(tmp, old, not_mask);
++  orr(tmp, tmp, new_val);
++  sc_w(tmp, tmp, aligned_addr, release);
++  bnez(tmp, retry);
 +
-+#ifdef ASSERT
-+static int check_nonzero(const char* xname, int x) {
-+  assert(x != 0, "%s should be nonzero", xname);
-+  return x;
-+}
-+#define NONZERO(x) check_nonzero(#x, x)
-+#else //ASSERT
-+#define NONZERO(x) (x)
-+#endif //PRODUCT
++  if (result_as_bool) {
++    addi(result, zr, 1);
++    j(done);
 +
-+#ifdef ASSERT
-+void MethodHandles::verify_klass(MacroAssembler* _masm,
-+                                 Register obj, SystemDictionary::WKID klass_id,
-+                                 const char* error_message) {
-+  InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id);
-+  Klass* klass = SystemDictionary::well_known_klass(klass_id);
-+  Register temp = t1;
-+  Register temp2 = t0; // used by MacroAssembler::cmpptr
-+  Label L_ok, L_bad;
-+  BLOCK_COMMENT("verify_klass {");
-+  __ verify_oop(obj);
-+  __ beqz(obj, L_bad);
-+  __ push_reg(RegSet::of(temp, temp2), sp);
-+  __ load_klass(temp, obj);
-+  __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
-+  intptr_t super_check_offset = klass->super_check_offset();
-+  __ ld(temp, Address(temp, super_check_offset));
-+  __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
-+  __ pop_reg(RegSet::of(temp, temp2), sp);
-+  __ bind(L_bad);
-+  __ stop(error_message);
-+  __ BIND(L_ok);
-+  __ pop_reg(RegSet::of(temp, temp2), sp);
-+  BLOCK_COMMENT("} verify_klass");
++    bind(fail);
++    mv(result, zr);
++
++    bind(done);
++  } else {
++    andr(tmp, old, mask);
++
++    bind(fail);
++    srl(result, tmp, shift);
++
++    if (size == int8) {
++      sign_extend(result, result, 8);
++    } else {
++      // size == int16 case
++      sign_extend(result, result, 16);
++    }
++  }
 +}
 +
-+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {  }
++// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
++// the weak CAS stuff. The major difference is that it just failed when store conditional
++// failed.
++void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
++                                               Register new_val,
++                                               enum operand_size size,
++                                               Assembler::Aqrl acquire, Assembler::Aqrl release,
++                                               Register result,
++                                               Register tmp1, Register tmp2, Register tmp3) {
++  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
++  assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
++  cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
 +
-+#endif //ASSERT
++  Label succ, fail, done;
 +
-+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
-+                                            bool for_compiler_entry) {
-+  assert(method == xmethod, "interpreter calling convention");
-+  Label L_no_such_method;
-+  __ beqz(xmethod, L_no_such_method);
-+  __ verify_method_ptr(method);
++  lr_w(old, aligned_addr, acquire);
++  andr(tmp, old, mask);
++  bne(tmp, expected, fail);
 +
-+  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
-+    Label run_compiled_code;
-+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
-+    // compiled code in threads for which the event is enabled.  Check here for
-+    // interp_only_mode if these events CAN be enabled.
++  andr(tmp, old, not_mask);
++  orr(tmp, tmp, new_val);
++  sc_w(tmp, tmp, aligned_addr, release);
++  beqz(tmp, succ);
 +
-+    __ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
-+    __ beqz(t0, run_compiled_code);
-+    __ ld(t0, Address(method, Method::interpreter_entry_offset()));
-+    __ jr(t0);
-+    __ BIND(run_compiled_code);
-+  }
++  bind(fail);
++  addi(result, zr, 1);
++  j(done);
 +
-+  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
-+                                                     Method::from_interpreted_offset();
-+  __ ld(t0,Address(method, entry_offset));
-+  __ jr(t0);
-+  __ bind(L_no_such_method);
-+  __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry()));
++  bind(succ);
++  mv(result, zr);
++
++  bind(done);
 +}
 +
-+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
-+                                        Register recv, Register method_temp,
-+                                        Register temp2,
-+                                        bool for_compiler_entry) {
-+  BLOCK_COMMENT("jump_to_lambda_form {");
-+  // This is the initial entry point of a lazy method handle.
-+  // After type checking, it picks up the invoker from the LambdaForm.
-+  assert_different_registers(recv, method_temp, temp2);
-+  assert(recv != noreg, "required register");
-+  assert(method_temp == xmethod, "required register for loading method");
++void MacroAssembler::cmpxchg(Register addr, Register expected,
++                             Register new_val,
++                             enum operand_size size,
++                             Assembler::Aqrl acquire, Assembler::Aqrl release,
++                             Register result, bool result_as_bool) {
++  assert(size != int8 && size != int16, "unsupported operand size");
 +
-+  // Load the invoker, as MH -> MH.form -> LF.vmentry
-+  __ verify_oop(recv);
-+  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2);
-+  __ verify_oop(method_temp);
-+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2);
-+  __ verify_oop(method_temp);
-+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2);
-+  __ verify_oop(method_temp);
-+  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg);
++  Label retry_load, done, ne_done;
++  bind(retry_load);
++  load_reserved(addr, size, acquire);
++  bne(t0, expected, ne_done);
++  store_conditional(addr, new_val, size, release);
++  bnez(t0, retry_load);
 +
-+  if (VerifyMethodHandles && !for_compiler_entry) {
-+    // make sure recv is already on stack
-+    __ ld(temp2, Address(method_temp, Method::const_offset()));
-+    __ load_sized_value(temp2,
-+                        Address(temp2, ConstMethod::size_of_parameters_offset()),
-+                        sizeof(u2), /*is_signed*/ false);
-+    Label L;
-+    __ ld(t0, __ argument_address(temp2, -1));
-+    __ oop_equal(recv, t0, L);
-+    __ ld(x10, __ argument_address(temp2, -1));
-+    __ ebreak();
-+    __ BIND(L);
++  // equal, succeed
++  if (result_as_bool) {
++    li(result, 1);
++  } else {
++    mv(result, expected);
 +  }
++  j(done);
 +
-+  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
-+  BLOCK_COMMENT("} jump_to_lambda_form");
++  // not equal, failed
++  bind(ne_done);
++  if (result_as_bool) {
++    mv(result, zr);
++  } else {
++    mv(result, t0);
++  }
++
++  bind(done);
 +}
 +
-+// Code generation
-+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
-+                                                                vmIntrinsics::ID iid) {
-+  const bool not_for_compiler_entry = false;  // this is the interpreter entry
-+  assert(is_signature_polymorphic(iid), "expected invoke iid");
-+  if (iid == vmIntrinsics::_invokeGeneric ||
-+      iid == vmIntrinsics::_compiledLambdaForm) {
-+    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
-+    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
-+    // They all allow an appendix argument.
-+    __ ebreak();           // empty stubs make SG sick
-+    return NULL;
-+  }
-+
-+  // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted)
-+  // xmethod: Method*
-+  // x13: argument locator (parameter slot count, added to sp)
-+  // x11: used as temp to hold mh or receiver
-+  Register argp   = x13;   // argument list ptr, live on error paths
-+  Register mh     = x11;   // MH receiver; dies quickly and is recycled
++void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
++                                  Register new_val,
++                                  enum operand_size size,
++                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
++                                  Register result) {
++  Label fail, done, sc_done;
++  load_reserved(addr, size, acquire);
++  bne(t0, expected, fail);
++  store_conditional(addr, new_val, size, release);
++  beqz(t0, sc_done);
 +
-+  // here's where control starts out:
-+  __ align(CodeEntryAlignment);
-+  address entry_point = __ pc();
++  // fail
++  bind(fail);
++  li(result, 1);
++  j(done);
 +
-+  if (VerifyMethodHandles) {
-+    assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++  // sc_done
++  bind(sc_done);
++  mv(result, 0);
++  bind(done);
++}
 +
-+    Label L;
-+    BLOCK_COMMENT("verify_intrinsic_id {");
-+    __ lhu(t0, Address(xmethod, Method::intrinsic_id_offset_in_bytes()));
-+    __ mv(t1, (int) iid);
-+    __ beq(t0, t1, L);
-+    if (iid == vmIntrinsics::_linkToVirtual ||
-+        iid == vmIntrinsics::_linkToSpecial) {
-+      // could do this for all kinds, but would explode assembly code size
-+      trace_method_handle(_masm, "bad Method*::intrinsic_id");
-+    }
-+    __ ebreak();
-+    __ bind(L);
-+    BLOCK_COMMENT("} verify_intrinsic_id");
-+  }
++#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE)                                              \
++void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
++  prev = prev->is_valid() ? prev : zr;                                                      \
++  if (incr.is_register()) {                                                                 \
++    AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE));              \
++  } else {                                                                                  \
++    mv(t0, incr.as_constant());                                                             \
++    AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE));                              \
++  }                                                                                         \
++  return;                                                                                   \
++}
 +
-+  // First task:  Find out how big the argument list is.
-+  Address x13_first_arg_addr;
-+  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
-+  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
-+  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
-+    __ ld(argp, Address(xmethod, Method::const_offset()));
-+    __ load_sized_value(argp,
-+                        Address(argp, ConstMethod::size_of_parameters_offset()),
-+                        sizeof(u2), /*is_signed*/ false);
-+    x13_first_arg_addr = __ argument_address(argp, -1);
-+  } else {
-+    DEBUG_ONLY(argp = noreg);
-+  }
++ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed)
++ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed)
++ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl)
++ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl)
 +
-+  if (!is_signature_polymorphic_static(iid)) {
-+    __ ld(mh, x13_first_arg_addr);
-+    DEBUG_ONLY(argp = noreg);
-+  }
++#undef ATOMIC_OP
 +
-+  // x13_first_arg_addr is live!
++#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE)                                       \
++void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) {      \
++  prev = prev->is_valid() ? prev : zr;                                               \
++  AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE));                       \
++  return;                                                                            \
++}
 +
-+  trace_method_handle_interpreter_entry(_masm, iid);
-+  if (iid == vmIntrinsics::_invokeBasic) {
-+    generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry);
++ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed)
++ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed)
++ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl)
++ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
 +
-+  } else {
-+    // Adjust argument list by popping the trailing MemberName argument.
-+    Register recv = noreg;
-+    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
-+      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
-+      __ ld(recv = x12, x13_first_arg_addr);
-+    }
-+    DEBUG_ONLY(argp = noreg);
-+    Register xmember = xmethod;  // MemberName ptr; incoming method ptr is dead now
-+    __ pop_reg(xmember);             // extract last argument
-+    generate_method_handle_dispatch(_masm, iid, recv, xmember, not_for_compiler_entry);
-+  }
++#undef ATOMIC_XCHG
 +
-+  return entry_point;
++#define ATOMIC_XCHGU(OP1, OP2)                                                       \
++void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) {     \
++  atomic_##OP2(prev, newv, addr);                                                    \
++  zero_extend(prev, prev, 32);                                                       \
++  return;                                                                            \
 +}
 +
++ATOMIC_XCHGU(xchgwu, xchgw)
++ATOMIC_XCHGU(xchgalwu, xchgalw)
 +
-+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
-+                                                    vmIntrinsics::ID iid,
-+                                                    Register receiver_reg,
-+                                                    Register member_reg,
-+                                                    bool for_compiler_entry) {
-+  assert(is_signature_polymorphic(iid), "expected invoke iid");
-+  // temps used in this code are not used in *either* compiled or interpreted calling sequences
-+  Register temp1 = x7;
-+  Register temp2 = x28;
-+  Register temp3 = x29;  // x30 is live by this point: it contains the sender SP
-+  if (for_compiler_entry) {
-+    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
-+    assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
-+    assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
-+    assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
++#undef ATOMIC_XCHGU
++
++void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
++  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
++  assert(CodeCache::find_blob(entry.target()) != NULL,
++         "destination of far call not found in code cache");
++  int32_t offset = 0;
++  if (far_branches()) {
++    // We can use auipc + jalr here because we know that the total size of
++    // the code cache cannot exceed 2Gb.
++    la_patchable(tmp, entry, offset);
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
++    jalr(x0, tmp, offset);
++  } else {
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
++    j(entry);
 +  }
++}
 +
-+  assert_different_registers(temp1, temp2, temp3, receiver_reg);
-+  assert_different_registers(temp1, temp2, temp3, member_reg);
++void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
++  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
++  assert(CodeCache::find_blob(entry.target()) != NULL,
++         "destination of far call not found in code cache");
++  int32_t offset = 0;
++  if (far_branches()) {
++    // We can use auipc + jalr here because we know that the total size of
++    // the code cache cannot exceed 2Gb.
++    la_patchable(tmp, entry, offset);
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
++    jalr(x1, tmp, offset); // link
++  } else {
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
++    jal(entry); // link
++  }
++}
 +
-+  if (iid == vmIntrinsics::_invokeBasic) {
-+    // indirect through MH.form.vmentry.vmtarget
-+    jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry);
++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register tmp_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   Label* L_slow_path,
++                                                   Register super_check_offset) {
++  assert_different_registers(sub_klass, super_klass, tmp_reg);
++  bool must_load_sco = (super_check_offset == noreg);
++  if (must_load_sco) {
++    assert(tmp_reg != noreg, "supply either a temp or a register offset");
 +  } else {
-+    // The method is a member invoker used by direct method handles.
-+    if (VerifyMethodHandles) {
-+      // make sure the trailing argument really is a MemberName (caller responsibility)
-+      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
-+                   "MemberName required for invokeVirtual etc.");
-+    }
++    assert_different_registers(sub_klass, super_klass, super_check_offset);
++  }
 +
-+    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
-+    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
-+    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()));
-+    Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes()));
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in batch");
 +
-+    Register temp1_recv_klass = temp1;
-+    if (iid != vmIntrinsics::_linkToStatic) {
-+      __ verify_oop(receiver_reg);
-+      if (iid == vmIntrinsics::_linkToSpecial) {
-+        // Don't actually load the klass; just null-check the receiver.
-+        __ null_check(receiver_reg);
-+      } else {
-+        // load receiver klass itself
-+        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
-+        __ load_klass(temp1_recv_klass, receiver_reg);
-+        __ verify_klass_ptr(temp1_recv_klass);
-+      }
-+      BLOCK_COMMENT("check_receiver {");
-+      // The receiver for the MemberName must be in receiver_reg.
-+      // Check the receiver against the MemberName.clazz
-+      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
-+        // Did not load it above...
-+        __ load_klass(temp1_recv_klass, receiver_reg);
-+        __ verify_klass_ptr(temp1_recv_klass);
-+      }
-+      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
-+        Label L_ok;
-+        Register temp2_defc = temp2;
-+        __ load_heap_oop(temp2_defc, member_clazz, temp3);
-+        load_klass_from_Class(_masm, temp2_defc);
-+        __ verify_klass_ptr(temp2_defc);
-+        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
-+        // If we get here, the type check failed!
-+        __ ebreak();
-+        __ bind(L_ok);
-+      }
-+      BLOCK_COMMENT("} check_receiver");
-+    }
-+    if (iid == vmIntrinsics::_linkToSpecial ||
-+        iid == vmIntrinsics::_linkToStatic) {
-+      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
-+    }
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  int sco_offset = in_bytes(Klass::super_check_offset_offset());
++  Address super_check_offset_addr(super_klass, sco_offset);
 +
-+    // Live registers at this point:
-+    //  member_reg - MemberName that was the trailing argument
-+    //  temp1_recv_klass - klass of stacked receiver, if needed
-+    //  x30 - interpreter linkage (if interpreted)
-+    //  x11 ... x10 - compiler arguments (if compiled)
++  // Hacked jmp, which may only be used just before L_fallthrough.
++#define final_jmp(label)                                                \
++  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
++  else                            j(label)             /*omit semi*/
 +
-+    Label L_incompatible_class_change_error;
-+    switch (iid) {
-+      case vmIntrinsics::_linkToSpecial:
-+        if (VerifyMethodHandles) {
-+          verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
-+        }
-+        __ load_heap_oop(xmethod, member_vmtarget);
-+        __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
-+        break;
++  // If the pointers are equal, we are done (e.g., String[] elements).
++  // This self-check enables sharing of secondary supertype arrays among
++  // non-primary types such as array-of-interface. Otherwise, each such
++  // type would need its own customized SSA.
++  // We move this check to the front fo the fast path because many
++  // type checks are in fact trivially successful in this manner,
++  // so we get a nicely predicted branch right at the start of the check.
++  beq(sub_klass, super_klass, *L_success);
 +
-+      case vmIntrinsics::_linkToStatic:
-+        if (VerifyMethodHandles) {
-+          verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
-+        }
-+        __ load_heap_oop(xmethod, member_vmtarget);
-+        __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
-+        break;
++  // Check the supertype display:
++  if (must_load_sco) {
++    lwu(tmp_reg, super_check_offset_addr);
++    super_check_offset = tmp_reg;
++  }
++  add(t0, sub_klass, super_check_offset);
++  Address super_check_addr(t0);
++  ld(t0, super_check_addr); // load displayed supertype
 +
-+      case vmIntrinsics::_linkToVirtual: {
-+        // same as TemplateTable::invokevirtual,
-+        // minus the CP setup and profiling:
++  // Ths check has worked decisively for primary supers.
++  // Secondary supers are sought in the super_cache ('super_cache_addr').
++  // (Secondary supers are interfaces and very deeply nested subtypes.)
++  // This works in the same check above because of a tricky aliasing
++  // between the super_Cache and the primary super dispaly elements.
++  // (The 'super_check_addr' can address either, as the case requires.)
++  // Note that the cache is updated below if it does not help us find
++  // what we need immediately.
++  // So if it was a primary super, we can just fail immediately.
++  // Otherwise, it's the slow path for us (no success at this point).
 +
-+        if (VerifyMethodHandles) {
-+          verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
-+        }
++  beq(super_klass, t0, *L_success);
++  mv(t1, sc_offset);
++  if (L_failure == &L_fallthrough) {
++    beq(super_check_offset, t1, *L_slow_path);
++  } else {
++    bne(super_check_offset, t1, *L_failure, /* is_far */ true);
++    final_jmp(*L_slow_path);
++  }
 +
-+        // pick out the vtable index from the MemberName, and then we can discard it:
-+        Register temp2_index = temp2;
-+        __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
++  bind(L_fallthrough);
 +
-+        if (VerifyMethodHandles) {
-+          Label L_index_ok;
-+          __ bgez(temp2_index, L_index_ok);
-+          __ ebreak();
-+          __ BIND(L_index_ok);
-+        }
++#undef final_jmp
++}
 +
-+        // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
-+        // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
++// Scans count pointer sized words at [addr] for occurence of value,
++// generic
++void MacroAssembler::repne_scan(Register addr, Register value, Register count,
++                                Register tmp) {
++  Label Lloop, Lexit;
++  beqz(count, Lexit);
++  bind(Lloop);
++  ld(tmp, addr);
++  beq(value, tmp, Lexit);
++  add(addr, addr, wordSize);
++  sub(count, count, 1);
++  bnez(count, Lloop);
++  bind(Lexit);
++}
 +
-+        // get target Method* & entry point
-+        __ lookup_virtual_method(temp1_recv_klass, temp2_index, xmethod);
-+        break;
-+      }
++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register tmp1_reg,
++                                                   Register tmp2_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure) {
++  assert_different_registers(sub_klass, super_klass, tmp1_reg);
++  if (tmp2_reg != noreg) {
++    assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
++  }
++#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg)
 +
-+      case vmIntrinsics::_linkToInterface: {
-+        // same as TemplateTable::invokeinterface
-+        // (minus the CP setup and profiling, with different argument motion)
-+        if (VerifyMethodHandles) {
-+          verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
-+        }
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 +
-+        Register temp3_intf = temp3;
-+        __ load_heap_oop(temp3_intf, member_clazz);
-+        load_klass_from_Class(_masm, temp3_intf);
-+        __ verify_klass_ptr(temp3_intf);
++  assert(label_nulls <= 1, "at most one NULL in the batch");
 +
-+        Register rindex = xmethod;
-+        __ access_load_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg);
-+        if (VerifyMethodHandles) {
-+          Label L;
-+          __ bgez(rindex, L);
-+          __ ebreak();
-+          __ bind(L);
-+        }
++  // A couple of usefule fields in sub_klass:
++  int ss_offset = in_bytes(Klass::secondary_supers_offset());
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  Address secondary_supers_addr(sub_klass, ss_offset);
++  Address super_cache_addr(     sub_klass, sc_offset);
 +
-+        // given intf, index, and recv klass, dispatch to the implementation method
-+        __ lookup_interface_method(temp1_recv_klass, temp3_intf,
-+                                   // note: next two args must be the same:
-+                                   rindex, xmethod,
-+                                   temp2,
-+                                   L_incompatible_class_change_error);
-+        break;
-+      }
++  BLOCK_COMMENT("check_klass_subtype_slow_path");
 +
-+      default:
-+        fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
-+        break;
-+    }
++  // Do a linear scan of the secondary super-klass chain.
++  // This code is rarely used, so simplicity is a virtue here.
++  // The repne_scan instruction uses fixed registers, which we must spill.
++  // Don't worry too much about pre-existing connecitons with the input regs.
 +
-+    // live at this point:  xmethod, x30 (if interpreted)
++  assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
++  assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
 +
-+    // After figuring out which concrete method to call, jump into it.
-+    // Note that this works in the interpreter with no data motion.
-+    // But the compiled version will require that r2_recv be shifted out.
-+    __ verify_method_ptr(xmethod);
-+    jump_from_method_handle(_masm, xmethod, temp1, for_compiler_entry);
-+    if (iid == vmIntrinsics::_linkToInterface) {
-+      __ bind(L_incompatible_class_change_error);
-+      __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
++  RegSet pushed_registers;
++  if (!IS_A_TEMP(x12)) {
++    pushed_registers += x12;
++  }
++  if (!IS_A_TEMP(x15)) {
++    pushed_registers += x15;
++  }
++
++  if (super_klass != x10 || UseCompressedOops) {
++    if (!IS_A_TEMP(x10)) {
++      pushed_registers += x10;
 +    }
 +  }
 +
-+}
++  push_reg(pushed_registers, sp);
 +
-+#ifndef PRODUCT
-+void trace_method_handle_stub(const char* adaptername,
-+                              oop mh,
-+                              intptr_t* saved_regs,
-+                              intptr_t* entry_sp) {  }
++  // Get super_klass value into x10 (even if it was in x15 or x12)
++  mv(x10, super_klass);
 +
-+// The stub wraps the arguments in a struct on the stack to avoid
-+// dealing with the different calling conventions for passing 6
-+// arguments.
-+struct MethodHandleStubArguments {
-+  const char* adaptername;
-+  oopDesc* mh;
-+  intptr_t* saved_regs;
-+  intptr_t* entry_sp;
-+};
-+void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {  }
++#ifndef PRODUCT
++  mv(t1, (address)&SharedRuntime::_partial_subtype_ctr);
++  Address pst_counter_addr(t1);
++  ld(t0, pst_counter_addr);
++  add(t0, t0, 1);
++  sd(t0, pst_counter_addr);
++#endif // PRODUCT
 +
-+void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {  }
-+#endif //PRODUCT
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
-new file mode 100644
-index 000000000..8ed69efe8
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
-@@ -0,0 +1,58 @@
-+/*
-+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // We will consult the secondary-super array.
++  ld(x15, secondary_supers_addr);
++  // Load the array length.
++  lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes()));
++  // Skip to start of data.
++  add(x15, x15, Array<Klass*>::base_offset_in_bytes());
 +
-+// Platform-specific definitions for method handles.
-+// These definitions are inlined into class MethodHandles.
++  // Set t0 to an obvious invalid value, falling through by default
++  li(t0, -1);
++  // Scan X12 words at [X15] for an occurrence of X10.
++  repne_scan(x15, x10, x12, t0);
 +
-+// Adapters
-+enum /* platform_dependent_constants */ {
-+  adapter_code_size = 32000 DEBUG_ONLY(+ 120000)
-+};
++  // pop will restore x10, so we should use a temp register to keep its value
++  mv(t1, x10);
 +
-+public:
++  // Unspill the temp registers:
++  pop_reg(pushed_registers, sp);
 +
-+  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
++  bne(t1, t0, *L_failure);
 +
-+  static void verify_klass(MacroAssembler* _masm,
-+                           Register obj, SystemDictionary::WKID klass_id,
-+                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
++  // Success. Cache the super we found an proceed in triumph.
++  sd(super_klass, super_cache_addr);
 +
-+  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
-+    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
-+                 "reference is a MH");
++  if (L_success != &L_fallthrough) {
++    j(*L_success);
 +  }
 +
-+  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
++#undef IS_A_TEMP
 +
-+  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
-+  // Takes care of special dispatch from single stepping too.
-+  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
-+                                      bool for_compiler_entry);
++  bind(L_fallthrough);
++}
 +
-+  static void jump_to_lambda_form(MacroAssembler* _masm,
-+                                  Register recv, Register method_temp,
-+                                  Register temp2,
-+                                  bool for_compiler_entry);
-diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-new file mode 100644
-index 000000000..4b1573130
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-@@ -0,0 +1,404 @@
-+/*
-+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "runtime/handles.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "utilities/ostream.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_Runtime1.hpp"
-+#endif
-+
-+Register NativeInstruction::extract_rs1(address instr) {
-+  assert_cond(instr != NULL);
-+  return as_Register(Assembler::extract(((unsigned*)instr)[0], 19, 15));
++// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
++void MacroAssembler::tlab_allocate(Register obj,
++                                   Register var_size_in_bytes,
++                                   int con_size_in_bytes,
++                                   Register tmp1,
++                                   Register tmp2,
++                                   Label& slow_case,
++                                   bool is_far) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far);
 +}
 +
-+Register NativeInstruction::extract_rs2(address instr) {
-+  assert_cond(instr != NULL);
-+  return as_Register(Assembler::extract(((unsigned*)instr)[0], 24, 20));
++// Defines obj, preserves var_size_in_bytes
++void MacroAssembler::eden_allocate(Register obj,
++                                   Register var_size_in_bytes,
++                                   int con_size_in_bytes,
++                                   Register tmp,
++                                   Label& slow_case,
++                                   bool is_far) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far);
 +}
 +
-+Register NativeInstruction::extract_rd(address instr) {
-+  assert_cond(instr != NULL);
-+  return as_Register(Assembler::extract(((unsigned*)instr)[0], 11, 7));
-+}
 +
-+uint32_t NativeInstruction::extract_opcode(address instr) {
-+  assert_cond(instr != NULL);
-+  return Assembler::extract(((unsigned*)instr)[0], 6, 0);
-+}
++// get_thread() can be called anywhere inside generated code so we
++// need to save whatever non-callee save context might get clobbered
++// by the call to Thread::current() or, indeed, the call setup code.
++void MacroAssembler::get_thread(Register thread) {
++  // save all call-clobbered regs except thread
++  RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) +
++                      RegSet::range(x28, x31) + ra - thread;
++  push_reg(saved_regs, sp);
 +
-+uint32_t NativeInstruction::extract_funct3(address instr) {
-+  assert_cond(instr != NULL);
-+  return Assembler::extract(((unsigned*)instr)[0], 14, 12);
-+}
++  int32_t offset = 0;
++  movptr_with_offset(ra, CAST_FROM_FN_PTR(address, Thread::current), offset);
++  jalr(ra, ra, offset);
++  if (thread != x10) {
++    mv(thread, x10);
++  }
 +
-+bool NativeInstruction::is_pc_relative_at(address instr) {
-+  // auipc + jalr
-+  // auipc + addi
-+  // auipc + load
-+  // auipc + fload_load
-+  return (is_auipc_at(instr)) &&
-+         (is_addi_at(instr + instruction_size) ||
-+          is_jalr_at(instr + instruction_size) ||
-+          is_load_at(instr + instruction_size) ||
-+          is_float_load_at(instr + instruction_size)) &&
-+         check_pc_relative_data_dependency(instr);
++  // restore pushed registers
++  pop_reg(saved_regs, sp);
 +}
 +
-+// ie:ld(Rd, Label)
-+bool NativeInstruction::is_load_pc_relative_at(address instr) {
-+  return is_auipc_at(instr) && // auipc
-+         is_ld_at(instr + instruction_size) && // ld
-+         check_load_pc_relative_data_dependency(instr);
++void MacroAssembler::load_byte_map_base(Register reg) {
++  CardTable::CardValue* byte_map_base =
++    ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
++  li(reg, (uint64_t)byte_map_base);
 +}
 +
-+bool NativeInstruction::is_movptr_at(address instr) {
-+  return is_lui_at(instr) && // Lui
-+         is_addi_at(instr + instruction_size) && // Addi
-+         is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11
-+         is_addi_at(instr + instruction_size * 3) && // Addi
-+         is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6
-+         (is_addi_at(instr + instruction_size * 5) ||
-+          is_jalr_at(instr + instruction_size * 5) ||
-+          is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load
-+         check_movptr_data_dependency(instr);
-+}
++void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
++  relocInfo::relocType rtype = dest.rspec().reloc()->type();
++  unsigned long low_address = (uintptr_t)CodeCache::low_bound();
++  unsigned long high_address = (uintptr_t)CodeCache::high_bound();
++  unsigned long dest_address = (uintptr_t)dest.target();
++  long offset_low = dest_address - low_address;
++  long offset_high = dest_address - high_address;
 +
-+bool NativeInstruction::is_li32_at(address instr) {
-+  return is_lui_at(instr) && // lui
-+         is_addiw_at(instr + instruction_size) && // addiw
-+         check_li32_data_dependency(instr);
-+}
++  assert(is_valid_riscv64_address(dest.target()), "bad address");
++  assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
 +
-+bool NativeInstruction::is_li64_at(address instr) {
-+  return is_lui_at(instr) && // lui
-+         is_addi_at(instr + instruction_size) && // addi
-+         is_slli_shift_at(instr + instruction_size * 2, 12) &&  // Slli Rd, Rs, 12
-+         is_addi_at(instr + instruction_size * 3) && // addi
-+         is_slli_shift_at(instr + instruction_size * 4, 12) &&  // Slli Rd, Rs, 12
-+         is_addi_at(instr + instruction_size * 5) && // addi
-+         is_slli_shift_at(instr + instruction_size * 6, 8) &&   // Slli Rd, Rs, 8
-+         is_addi_at(instr + instruction_size * 7) && // addi
-+         check_li64_data_dependency(instr);
++  InstructionMark im(this);
++  code_section()->relocate(inst_mark(), dest.rspec());
++  // RISC-V doesn't compute a page-aligned address, in order to partially
++  // compensate for the use of *signed* offsets in its base+disp12
++  // addressing mode (RISC-V's PC-relative reach remains asymmetric
++  // [-(2G + 2K), 2G - 2k).
++  if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
++    int64_t distance = dest.target() - pc();
++    auipc(reg1, (int32_t)distance + 0x800);
++    offset = ((int32_t)distance << 20) >> 20;
++  } else {
++    movptr_with_offset(reg1, dest.target(), offset);
++  }
 +}
 +
-+void NativeCall::verify() {
-+  assert(NativeCall::is_call_at((address)this), "unexpected code at call site");
++void MacroAssembler::build_frame(int framesize) {
++  assert(framesize >= 2, "framesize must include space for FP/RA");
++  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
++  sub(sp, sp, framesize);
++  sd(fp, Address(sp, framesize - 2 * wordSize));
++  sd(ra, Address(sp, framesize - wordSize));
++  if (PreserveFramePointer) { add(fp, sp, framesize); }
++  verify_cross_modify_fence_not_required();
 +}
 +
-+address NativeCall::destination() const {
-+  address addr = (address)this;
-+  assert(NativeInstruction::is_jal_at(instruction_address()), "inst must be jal.");
-+  address destination = MacroAssembler::target_addr_for_insn(instruction_address());
-+
-+  // Do we use a trampoline stub for this call?
-+  CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
-+  assert(cb && cb->is_nmethod(), "sanity");
-+  nmethod *nm = (nmethod *)cb;
-+  if (nm != NULL && nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
-+    // Yes we do, so get the destination from the trampoline stub.
-+    const address trampoline_stub_addr = destination;
-+    destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
-+  }
-+
-+  return destination;
++void MacroAssembler::remove_frame(int framesize) {
++  assert(framesize >= 2, "framesize must include space for FP/RA");
++  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
++  ld(fp, Address(sp, framesize - 2 * wordSize));
++  ld(ra, Address(sp, framesize - wordSize));
++  add(sp, sp, framesize);
 +}
 +
-+// Similar to replace_mt_safe, but just changes the destination. The
-+// important thing is that free-running threads are able to execute this
-+// call instruction at all times.
-+//
-+// Used in the runtime linkage of calls; see class CompiledIC.
-+//
-+// Add parameter assert_lock to switch off assertion
-+// during code generation, where no patching lock is needed.
-+void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
-+  assert(!assert_lock ||
-+         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
-+         "concurrent code patching");
++void MacroAssembler::reserved_stack_check() {
++    // testing if reserved zone needs to be enabled
++    Label no_reserved_zone_enabling;
 +
-+  ResourceMark rm;
-+  address addr_call = addr_at(0);
-+  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
++    ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
++    bltu(sp, t0, no_reserved_zone_enabling);
 +
-+  // Patch the constant in the call's trampoline stub.
-+  address trampoline_stub_addr = get_trampoline();
-+  if (trampoline_stub_addr != NULL) {
-+    assert (!is_NativeCallTrampolineStub_at(dest), "chained trampolines");
-+    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
-+  }
++    enter();   // RA and FP are live.
++    mv(c_rarg0, xthread);
++    int32_t offset = 0;
++    la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset);
++    jalr(x1, t0, offset);
++    leave();
 +
-+  // Patch the call.
-+  if (Assembler::reachable_from_branch_at(addr_call, dest)) {
-+    set_destination(dest);
-+  } else {
-+    assert (trampoline_stub_addr != NULL, "we need a trampoline");
-+    set_destination(trampoline_stub_addr);
-+  }
++    // We have already removed our own frame.
++    // throw_delayed_StackOverflowError will think that it's been
++    // called by our caller.
++    offset = 0;
++    la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset);
++    jalr(x0, t0, offset);
++    should_not_reach_here();
 +
-+  ICache::invalidate_range(addr_call, instruction_size);
++    bind(no_reserved_zone_enabling);
 +}
 +
-+address NativeCall::get_trampoline() {
-+  address call_addr = addr_at(0);
-+
-+  CodeBlob *code = CodeCache::find_blob(call_addr);
-+  assert(code != NULL, "Could not find the containing code blob");
-+
-+  address jal_destination = MacroAssembler::pd_call_destination(call_addr);
-+  if (code != NULL && code->contains(jal_destination) && is_NativeCallTrampolineStub_at(jal_destination)) {
-+    return jal_destination;
-+  }
++// Move the address of the polling page into dest.
++void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
++  ld(dest, Address(xthread, JavaThread::polling_page_offset()));
++}
 +
-+  if (code != NULL && code->is_nmethod()) {
-+    return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
++// Read the polling page.  The address of the polling page must
++// already be in r.
++address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
++  address mark;
++  {
++    InstructionMark im(this);
++    code_section()->relocate(inst_mark(), rtype);
++    lwu(zr, Address(r, offset));
++    mark = inst_mark();
 +  }
-+
-+  return NULL;
++  verify_cross_modify_fence_not_required();
++  return mark;
 +}
 +
-+// Inserts a native call instruction at a given pc
-+void NativeCall::insert(address code_pos, address entry) { Unimplemented(); }
-+
-+//-------------------------------------------------------------------
-+
-+void NativeMovConstReg::verify() {
-+  if (!(nativeInstruction_at(instruction_address())->is_movptr() ||
-+        is_auipc_at(instruction_address()))) {
-+    fatal("should be MOVPTR or AUIPC");
++void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
++#ifdef ASSERT
++  {
++    ThreadInVMfromUnknown tiv;
++    assert (UseCompressedOops, "should only be used for compressed oops");
++    assert (Universe::heap() != NULL, "java heap should be initialized");
++    assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
++    assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
 +  }
++#endif
++  int oop_index = oop_recorder()->find_index(obj);
++  InstructionMark im(this);
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++  code_section()->relocate(inst_mark(), rspec);
++  li32(dst, 0xDEADBEEF);
++  zero_extend(dst, dst, 32);
 +}
 +
-+intptr_t NativeMovConstReg::data() const {
-+  address addr = MacroAssembler::target_addr_for_insn(instruction_address());
-+  if (maybe_cpool_ref(instruction_address())) {
-+    return *(intptr_t*)addr;
-+  } else {
-+    return (intptr_t)addr;
-+  }
++void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
++  int index = oop_recorder()->find_index(k);
++  assert(!Universe::heap()->is_in(k), "should not be an oop");
++
++  InstructionMark im(this);
++  RelocationHolder rspec = metadata_Relocation::spec(index);
++  code_section()->relocate(inst_mark(), rspec);
++  narrowKlass nk = CompressedKlassPointers::encode(k);
++  li32(dst, nk);
++  zero_extend(dst, dst, 32);
 +}
 +
-+void NativeMovConstReg::set_data(intptr_t x) {
-+  if (maybe_cpool_ref(instruction_address())) {
-+    address addr = MacroAssembler::target_addr_for_insn(instruction_address());
-+    *(intptr_t*)addr = x;
-+  } else {
-+    // Store x into the instruction stream.
-+    MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x);
-+    ICache::invalidate_range(instruction_address(), movptr_instruction_size);
-+  }
++// Maybe emit a call via a trampoline.  If the code cache is small
++// trampolines won't be emitted.
++address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
++  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
++  assert(entry.rspec().type() == relocInfo::runtime_call_type ||
++         entry.rspec().type() == relocInfo::opt_virtual_call_type ||
++         entry.rspec().type() == relocInfo::static_call_type ||
++         entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
 +
-+  // Find and replace the oop/metadata corresponding to this
-+  // instruction in oops section.
-+  CodeBlob* cb = CodeCache::find_blob(instruction_address());
-+  if(cb != NULL) {
-+    nmethod* nm = cb->as_nmethod_or_null();
-+    if (nm != NULL) {
-+      RelocIterator iter(nm, instruction_address(), next_instruction_address());
-+      while (iter.next()) {
-+        if (iter.type() == relocInfo::oop_type) {
-+          oop* oop_addr = iter.oop_reloc()->oop_addr();
-+          *oop_addr = cast_to_oop(x);
-+          break;
-+        } else if (iter.type() == relocInfo::metadata_type) {
-+          Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
-+          *metadata_addr = (Metadata*)x;
-+          break;
-+        }
++  // We need a trampoline if branches are far.
++  if (far_branches()) {
++    bool in_scratch_emit_size = false;
++#ifdef COMPILER2
++    // We don't want to emit a trampoline if C2 is generating dummy
++    // code during its branch shortening phase.
++    CompileTask* task = ciEnv::current()->task();
++    in_scratch_emit_size =
++      (task != NULL && is_c2_compile(task->comp_level()) &&
++       Compile::current()->output()->in_scratch_emit_size());
++#endif
++    if (!in_scratch_emit_size) {
++      address stub = emit_trampoline_stub(offset(), entry.target());
++      if (stub == NULL) {
++        postcond(pc() == badAddress);
++        return NULL; // CodeCache is full
 +      }
 +    }
++  }
++
++  if (cbuf != NULL) { cbuf->set_insts_mark(); }
++  relocate(entry.rspec());
++  if (!far_branches()) {
++    jal(entry.target());
 +  } else {
-+    ShouldNotReachHere();
++    jal(pc());
 +  }
++  // just need to return a non-null address
++  postcond(pc() != badAddress);
++  return pc();
 +}
 +
-+void NativeMovConstReg::print() {
-+  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
-+                p2i(instruction_address()), data());
++address MacroAssembler::ic_call(address entry, jint method_index) {
++  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
++  movptr(t1, (address)Universe::non_oop_word());
++  assert_cond(entry != NULL);
++  return trampoline_call(Address(entry, rh));
 +}
 +
-+//-------------------------------------------------------------------
++// Emit a trampoline stub for a call to a target which is too far away.
++//
++// code sequences:
++//
++// call-site:
++//   branch-and-link to <destination> or <trampoline stub>
++//
++// Related trampoline stub for this call site in the stub section:
++//   load the call target from the constant pool
++//   branch (RA still points to the call site above)
 +
-+int NativeMovRegMem::offset() const  {
-+  Unimplemented();
-+  return 0;
-+}
++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
++                                             address dest) {
++  address stub = start_a_stub(NativeInstruction::instruction_size
++                            + NativeCallTrampolineStub::instruction_size);
++  if (stub == NULL) {
++    return NULL;  // CodeBuffer::expand failed
++  }
 +
-+void NativeMovRegMem::set_offset(int x) { Unimplemented(); }
++  // Create a trampoline stub relocation which relates this trampoline stub
++  // with the call instruction at insts_call_instruction_offset in the
++  // instructions code-section.
 +
-+void NativeMovRegMem::verify() {
-+  Unimplemented();
-+}
++  // make sure 4 byte aligned here, so that the destination address would be
++  // 8 byte aligned after 3 intructions
++  // when we reach here we may get a 2-byte alignment so need to align it
++  align(wordSize, NativeCallTrampolineStub::data_offset);
 +
-+//--------------------------------------------------------------------------------
++  relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
++                                            insts_call_instruction_offset));
++  const int stub_start_offset = offset();
 +
-+void NativeJump::verify() { }
++  // Now, create the trampoline stub's code:
++  // - load the call
++  // - call
++  Label target;
++  ld(t0, target);  // auipc + ld
++  jr(t0);          // jalr
++  bind(target);
++  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
++         "should be");
++  assert(offset() % wordSize == 0, "bad alignment");
++  emit_int64((intptr_t)dest);
 +
++  const address stub_start_addr = addr_at(stub_start_offset);
 +
-+void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) {
++  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
++
++  end_a_stub();
++  return stub_start_addr;
 +}
 +
++Address MacroAssembler::add_memory_helper(const Address dst) {
++  switch (dst.getMode()) {
++    case Address::base_plus_offset:
++      // This is the expected mode, although we allow all the other
++      // forms below.
++      return form_address(t1, dst.base(), dst.offset());
++    default:
++      la(t1, dst);
++      return Address(t1);
++  }
++}
 +
-+address NativeJump::jump_destination() const {
-+  address dest = MacroAssembler::target_addr_for_insn(instruction_address());
++void MacroAssembler::add_memory_int64(const Address dst, int64_t imm) {
++  Address adr = add_memory_helper(dst);
++  assert_different_registers(adr.base(), t0);
++  ld(t0, adr);
++  addi(t0, t0, imm);
++  sd(t0, adr);
++}
 +
-+  // We use jump to self as the unresolved address which the inline
-+  // cache code (and relocs) know about
++void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) {
++  Address adr = add_memory_helper(dst);
++  assert_different_registers(adr.base(), t0);
++  lwu(t0, adr);
++  addiw(t0, t0, imm);
++  sw(t0, adr);
++}
 +
-+  // return -1 if jump to self
-+  dest = (dest == (address) this) ? (address) -1 : dest;
-+  return dest;
-+};
++void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
++  assert_different_registers(src1, t0);
++  int32_t offset;
++  la_patchable(t0, src2, offset);
++  ld(t0, Address(t0, offset));
++  beq(src1, t0, equal);
++}
 +
-+//-------------------------------------------------------------------
++void MacroAssembler::load_method_holder_cld(Register result, Register method) {
++  load_method_holder(result, method);
++  ld(result, Address(result, InstanceKlass::class_loader_data_offset()));
++}
 +
-+address NativeGeneralJump::jump_destination() const {
-+  NativeMovConstReg* move = nativeMovConstReg_at(instruction_address());
-+  address dest = (address) move->data();
++void MacroAssembler::load_method_holder(Register holder, Register method) {
++  ld(holder, Address(method, Method::const_offset()));                      // ConstMethod*
++  ld(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
++  ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
++}
 +
-+  // We use jump to self as the unresolved address which the inline
-+  // cache code (and relocs) know about
++// string indexof
++// compute index by trailing zeros
++void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
++                                   Register match_mask, Register result,
++                                   Register ch2, Register tmp,
++                                   bool haystack_isL)
++{
++  int haystack_chr_shift = haystack_isL ? 0 : 1;
++  srl(match_mask, match_mask, trailing_zeros);
++  srli(match_mask, match_mask, 1);
++  srli(tmp, trailing_zeros, LogBitsPerByte);
++  if (!haystack_isL) andi(tmp, tmp, 0xE);
++  add(haystack, haystack, tmp);
++  ld(ch2, Address(haystack));
++  if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
++  add(result, result, tmp);
++}
 +
-+  // return -1 if jump to self
-+  dest = (dest == (address) this) ? (address) -1 : dest;
-+  return dest;
++// string indexof
++// Find pattern element in src, compute match mask,
++// only the first occurrence of 0x80/0x8000 at low bits is the valid match index
++// match mask patterns and corresponding indices would be like:
++// - 0x8080808080808080 (Latin1)
++// -   7 6 5 4 3 2 1 0  (match index)
++// - 0x8000800080008000 (UTF16)
++// -   3   2   1   0    (match index)
++void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
++                                        Register mask1, Register mask2)
++{
++  xorr(src, pattern, src);
++  sub(match_mask, src, mask1);
++  orr(src, src, mask2);
++  notr(src, src);
++  andr(match_mask, match_mask, src);
 +}
 +
-+//-------------------------------------------------------------------
++#ifdef COMPILER2
++// Code for BigInteger::mulAdd instrinsic
++// out     = x10
++// in      = x11
++// offset  = x12  (already out.length-offset)
++// len     = x13
++// k       = x14
++// tmp     = x28
++//
++// pseudo code from java implementation:
++// long kLong = k & LONG_MASK;
++// carry = 0;
++// offset = out.length-offset - 1;
++// for (int j = len - 1; j >= 0; j--) {
++//     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
++//     out[offset--] = (int)product;
++//     carry = product >>> 32;
++// }
++// return (int)carry;
++void MacroAssembler::mul_add(Register out, Register in, Register offset,
++                             Register len, Register k, Register tmp) {
++  Label L_tail_loop, L_unroll, L_end;
++  mv(tmp, out);
++  mv(out, zr);
++  blez(len, L_end);
++  zero_extend(k, k, 32);
++  slliw(t0, offset, LogBytesPerInt);
++  add(offset, tmp, t0);
++  slliw(t0, len, LogBytesPerInt);
++  add(in, in, t0);
++
++  const int unroll = 8;
++  li(tmp, unroll);
++  blt(len, tmp, L_tail_loop);
++  bind(L_unroll);
++  for (int i = 0; i < unroll; i++) {
++    sub(in, in, BytesPerInt);
++    lwu(t0, Address(in, 0));
++    mul(t1, t0, k);
++    add(t0, t1, out);
++    sub(offset, offset, BytesPerInt);
++    lwu(t1, Address(offset, 0));
++    add(t0, t0, t1);
++    sw(t0, Address(offset, 0));
++    srli(out, t0, 32);
++  }
++  subw(len, len, tmp);
++  bge(len, tmp, L_unroll);
++
++  bind(L_tail_loop);
++  blez(len, L_end);
++  sub(in, in, BytesPerInt);
++  lwu(t0, Address(in, 0));
++  mul(t1, t0, k);
++  add(t0, t1, out);
++  sub(offset, offset, BytesPerInt);
++  lwu(t1, Address(offset, 0));
++  add(t0, t0, t1);
++  sw(t0, Address(offset, 0));
++  srli(out, t0, 32);
++  subw(len, len, 1);
++  j(L_tail_loop);
 +
-+bool NativeInstruction::is_safepoint_poll() {
-+  return is_lwu_to_zr(address(this));
++  bind(L_end);
 +}
 +
-+bool NativeInstruction::is_lwu_to_zr(address instr) {
-+  return (extract_opcode(instr) == 0b0000011 &&
-+          extract_funct3(instr) == 0b110 &&
-+          extract_rd(instr) == zr);         // zr
++// add two unsigned input and output carry
++void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
++{
++  assert_different_registers(dst, carry);
++  assert_different_registers(dst, src2);
++  add(dst, src1, src2);
++  sltu(carry, dst, src2);
 +}
 +
-+// A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction.
-+bool NativeInstruction::is_sigill_zombie_not_entrant() {
-+  // jvmci
-+  return uint_at(0) == 0xffffffff;
++// add two input with carry
++void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry)
++{
++  assert_different_registers(dst, carry);
++  add(dst, src1, src2);
++  add(dst, dst, carry);
 +}
 +
-+void NativeIllegalInstruction::insert(address code_pos) {
-+  assert_cond(code_pos != NULL);
-+  *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction
++// add two unsigned input with carry and output carry
++void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry)
++{
++  assert_different_registers(dst, src2);
++  adc(dst, src1, src2, carry);
++  sltu(carry, dst, src2);
 +}
 +
-+//-------------------------------------------------------------------
-+
-+// MT-safe inserting of a jump over a jump or a nop (used by
-+// nmethod::make_not_entrant_or_zombie)
-+
-+void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
-+
-+  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
++void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
++                                     Register src1, Register src2, Register carry)
++{
++  cad(dest_lo, dest_lo, src1, carry);
++  add(dest_hi, dest_hi, carry);
++  cad(dest_lo, dest_lo, src2, carry);
++  add(final_dest_hi, dest_hi, carry);
++}
 +
-+  assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() ||
-+         nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(),
-+         "riscv cannot replace non-jump with jump");
-+
-+  // Patch this nmethod atomically.
-+  if (Assembler::reachable_from_branch_at(verified_entry, dest)) {
-+    ptrdiff_t offset = dest - verified_entry;
-+    guarantee(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction."); // 1M
-+
-+    uint32_t insn = 0;
-+    address pInsn = (address)&insn;
-+    Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
-+    Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
-+    Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
-+    Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
-+    Assembler::patch(pInsn, 11, 7, 0); // zero, no link jump
-+    Assembler::patch(pInsn, 6, 0, 0b1101111); // j, (jal x0 offset)
-+    *(unsigned int*)verified_entry = insn;
-+  } else {
-+    // We use an illegal instruction for marking a method as
-+    // not_entrant or zombie.
-+    NativeIllegalInstruction::insert(verified_entry);
-+  }
++/**
++ * Multiply 32 bit by 32 bit first loop.
++ */
++void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
++                                           Register y, Register y_idx, Register z,
++                                           Register carry, Register product,
++                                           Register idx, Register kdx)
++{
++  // jlong carry, x[], y[], z[];
++  // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
++  //     long product = y[idx] * x[xstart] + carry;
++  //     z[kdx] = (int)product;
++  //     carry = product >>> 32;
++  // }
++  // z[xstart] = (int)carry;
 +
-+  ICache::invalidate_range(verified_entry, instruction_size);
-+}
++  Label L_first_loop, L_first_loop_exit;
++  blez(idx, L_first_loop_exit);
 +
-+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
-+  CodeBuffer cb(code_pos, instruction_size);
-+  MacroAssembler a(&cb);
++  shadd(t0, xstart, x, t0, LogBytesPerInt);
++  lwu(x_xstart, Address(t0, 0));
 +
-+  int32_t offset = 0;
-+  a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli
-+  a.jalr(x0, t0, offset); // jalr
++  bind(L_first_loop);
++  subw(idx, idx, 1);
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  lwu(y_idx, Address(t0, 0));
++  mul(product, x_xstart, y_idx);
++  add(product, product, carry);
++  srli(carry, product, 32);
++  subw(kdx, kdx, 1);
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sw(product, Address(t0, 0));
++  bgtz(idx, L_first_loop);
 +
-+  ICache::invalidate_range(code_pos, instruction_size);
++  bind(L_first_loop_exit);
 +}
 +
-+// MT-safe patching of a long jump instruction.
-+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
-+  ShouldNotCallThis();
-+}
++/**
++ * Multiply 64 bit by 64 bit first loop.
++ */
++void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
++                                           Register y, Register y_idx, Register z,
++                                           Register carry, Register product,
++                                           Register idx, Register kdx)
++{
++  //
++  //  jlong carry, x[], y[], z[];
++  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
++  //    huge_128 product = y[idx] * x[xstart] + carry;
++  //    z[kdx] = (jlong)product;
++  //    carry  = (jlong)(product >>> 64);
++  //  }
++  //  z[xstart] = carry;
++  //
 +
++  Label L_first_loop, L_first_loop_exit;
++  Label L_one_x, L_one_y, L_multiply;
 +
-+address NativeCallTrampolineStub::destination(nmethod *nm) const {
-+  return ptr_at(data_offset);
-+}
++  subw(xstart, xstart, 1);
++  bltz(xstart, L_one_x);
 +
-+void NativeCallTrampolineStub::set_destination(address new_destination) {
-+  set_ptr_at(data_offset, new_destination);
-+  OrderAccess::release();
-+}
++  shadd(t0, xstart, x, t0, LogBytesPerInt);
++  ld(x_xstart, Address(t0, 0));
++  ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
 +
-+uint32_t NativeMembar::get_kind() {
-+  uint32_t insn = uint_at(0);
++  bind(L_first_loop);
++  subw(idx, idx, 1);
++  bltz(idx, L_first_loop_exit);
++  subw(idx, idx, 1);
++  bltz(idx, L_one_y);
 +
-+  uint32_t predecessor = Assembler::extract(insn, 27, 24);
-+  uint32_t successor = Assembler::extract(insn, 23, 20);
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  ld(y_idx, Address(t0, 0));
++  ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
++  bind(L_multiply);
 +
-+  return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor);
-+}
++  mulhu(t0, x_xstart, y_idx);
++  mul(product, x_xstart, y_idx);
++  cad(product, product, carry, t1);
++  adc(carry, t0, zr, t1);
 +
-+void NativeMembar::set_kind(uint32_t order_kind) {
-+  uint32_t predecessor = 0;
-+  uint32_t successor = 0;
++  subw(kdx, kdx, 2);
++  ror_imm(product, product, 32); // back to big-endian
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sd(product, Address(t0, 0));
 +
-+  MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor);
++  j(L_first_loop);
 +
-+  uint32_t insn = uint_at(0);
-+  address pInsn = (address) &insn;
-+  Assembler::patch(pInsn, 27, 24, predecessor);
-+  Assembler::patch(pInsn, 23, 20, successor);
++  bind(L_one_y);
++  lwu(y_idx, Address(y, 0));
++  j(L_multiply);
 +
-+  address membar = addr_at(0);
-+  *(unsigned int*) membar = insn;
++  bind(L_one_x);
++  lwu(x_xstart, Address(x, 0));
++  j(L_first_loop);
++
++  bind(L_first_loop_exit);
 +}
-diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
-new file mode 100644
-index 000000000..e8a4e0a46
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
-@@ -0,0 +1,561 @@
-+/*
-+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
++
++/**
++ * Multiply 128 bit by 128 bit. Unrolled inner loop.
 + *
 + */
++void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
++                                             Register carry, Register carry2,
++                                             Register idx, Register jdx,
++                                             Register yz_idx1, Register yz_idx2,
++                                             Register tmp, Register tmp3, Register tmp4,
++                                             Register tmp6, Register product_hi)
++{
++  //   jlong carry, x[], y[], z[];
++  //   int kdx = xstart+1;
++  //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
++  //     huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
++  //     jlong carry2  = (jlong)(tmp3 >>> 64);
++  //     huge_128 tmp4 = (y[idx]   * product_hi) + z[kdx+idx] + carry2;
++  //     carry  = (jlong)(tmp4 >>> 64);
++  //     z[kdx+idx+1] = (jlong)tmp3;
++  //     z[kdx+idx] = (jlong)tmp4;
++  //   }
++  //   idx += 2;
++  //   if (idx > 0) {
++  //     yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
++  //     z[kdx+idx] = (jlong)yz_idx1;
++  //     carry  = (jlong)(yz_idx1 >>> 64);
++  //   }
++  //
 +
-+#ifndef CPU_RISCV_NATIVEINST_RISCV_HPP
-+#define CPU_RISCV_NATIVEINST_RISCV_HPP
-+
-+#include "asm/assembler.hpp"
-+#include "runtime/icache.hpp"
-+#include "runtime/os.hpp"
-+
-+// We have interfaces for the following instructions:
-+// - NativeInstruction
-+// - - NativeCall
-+// - - NativeMovConstReg
-+// - - NativeMovRegMem
-+// - - NativeJump
-+// - - NativeGeneralJump
-+// - - NativeIllegalInstruction
-+// - - NativeCallTrampolineStub
-+// - - NativeMembar
++  Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
 +
-+// The base class for different kinds of native instruction abstractions.
-+// Provides the primitive operations to manipulate code relative to this.
++  srliw(jdx, idx, 2);
 +
-+class NativeInstruction {
-+  friend class Relocation;
-+  friend bool is_NativeCallTrampolineStub_at(address);
-+ public:
-+  enum {
-+    instruction_size = 4
-+  };
++  bind(L_third_loop);
 +
-+  juint encoding() const {
-+    return uint_at(0);
-+  }
++  subw(jdx, jdx, 1);
++  bltz(jdx, L_third_loop_exit);
++  subw(idx, idx, 4);
 +
-+  bool is_jal()                             const { return is_jal_at(addr_at(0));         }
-+  bool is_movptr()                          const { return is_movptr_at(addr_at(0));      }
-+  bool is_call()                            const { return is_call_at(addr_at(0));        }
-+  bool is_jump()                            const { return is_jump_at(addr_at(0));        }
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  ld(yz_idx2, Address(t0, 0));
++  ld(yz_idx1, Address(t0, wordSize));
 +
-+  static bool is_jal_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; }
-+  static bool is_jalr_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
-+  static bool is_branch_at(address instr)     { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; }
-+  static bool is_ld_at(address instr)         { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
-+  static bool is_load_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; }
-+  static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; }
-+  static bool is_auipc_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; }
-+  static bool is_jump_at(address instr)       { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
-+  static bool is_addi_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
-+  static bool is_addiw_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
-+  static bool is_lui_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; }
-+  static bool is_slli_shift_at(address instr, uint32_t shift) {
-+    assert_cond(instr != NULL);
-+    return (extract_opcode(instr) == 0b0010011 && // opcode field
-+            extract_funct3(instr) == 0b001 &&     // funct3 field, select the type of operation 
-+            Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift);    // shamt field
-+  }
++  shadd(tmp6, idx, z, t0, LogBytesPerInt);
 +
-+  static Register extract_rs1(address instr);
-+  static Register extract_rs2(address instr);
-+  static Register extract_rd(address instr);
-+  static uint32_t extract_opcode(address instr);
-+  static uint32_t extract_funct3(address instr);
++  ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
++  ror_imm(yz_idx2, yz_idx2, 32);
 +
-+  // the instruction sequence of movptr is as below:
-+  //     lui
-+  //     addi
-+  //     slli
-+  //     addi
-+  //     slli
-+  //     addi/jalr/load
-+  static bool check_movptr_data_dependency(address instr) {
-+    address lui = instr;
-+    address addi1 = lui + instruction_size;
-+    address slli1 = addi1 + instruction_size;
-+    address addi2 = slli1 + instruction_size;
-+    address slli2 = addi2 + instruction_size;
-+    address last_instr = slli2 + instruction_size;
-+    return extract_rs1(addi1) == extract_rd(lui) &&
-+           extract_rs1(addi1) == extract_rd(addi1) &&
-+           extract_rs1(slli1) == extract_rd(addi1) &&
-+           extract_rs1(slli1) == extract_rd(slli1) &&
-+           extract_rs1(addi2) == extract_rd(slli1) &&
-+           extract_rs1(addi2) == extract_rd(addi2) &&
-+           extract_rs1(slli2) == extract_rd(addi2) &&
-+           extract_rs1(slli2) == extract_rd(slli2) &&
-+           extract_rs1(last_instr) == extract_rd(slli2);
-+  }
++  ld(t1, Address(tmp6, 0));
++  ld(t0, Address(tmp6, wordSize));
 +
-+  // the instruction sequence of li64 is as below:
-+  //     lui
-+  //     addi
-+  //     slli
-+  //     addi
-+  //     slli
-+  //     addi
-+  //     slli
-+  //     addi
-+  static bool check_li64_data_dependency(address instr) {
-+    address lui = instr;
-+    address addi1 = lui + instruction_size;
-+    address slli1 = addi1 + instruction_size;
-+    address addi2 = slli1 + instruction_size;
-+    address slli2 = addi2 + instruction_size;
-+    address addi3 = slli2 + instruction_size;
-+    address slli3 = addi3 + instruction_size;
-+    address addi4 = slli3 + instruction_size;
-+    return extract_rs1(addi1) == extract_rd(lui) &&
-+           extract_rs1(addi1) == extract_rd(addi1) &&
-+           extract_rs1(slli1) == extract_rd(addi1) &&
-+           extract_rs1(slli1) == extract_rd(slli1) &&
-+           extract_rs1(addi2) == extract_rd(slli1) &&
-+           extract_rs1(addi2) == extract_rd(addi2) &&
-+           extract_rs1(slli2) == extract_rd(addi2) &&
-+           extract_rs1(slli2) == extract_rd(slli2) &&
-+           extract_rs1(addi3) == extract_rd(slli2) &&
-+           extract_rs1(addi3) == extract_rd(addi3) &&
-+           extract_rs1(slli3) == extract_rd(addi3) &&
-+           extract_rs1(slli3) == extract_rd(slli3) &&
-+           extract_rs1(addi4) == extract_rd(slli3) &&
-+           extract_rs1(addi4) == extract_rd(addi4);
-+  }
++  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
++  mulhu(tmp4, product_hi, yz_idx1);
 +
-+  // the instruction sequence of li32 is as below:
-+  //     lui
-+  //     addiw
-+  static bool check_li32_data_dependency(address instr) {
-+    address lui = instr;
-+    address addiw = lui + instruction_size;
++  ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
++  ror_imm(t1, t1, 32, tmp);
 +
-+    return extract_rs1(addiw) == extract_rd(lui) &&
-+           extract_rs1(addiw) == extract_rd(addiw);
-+  }
++  mul(tmp, product_hi, yz_idx2); //  yz_idx2 * product_hi -> carry2:tmp
++  mulhu(carry2, product_hi, yz_idx2);
 +
-+  // the instruction sequence of pc-relative is as below:
-+  //     auipc
-+  //     jalr/addi/load/float_load
-+  static bool check_pc_relative_data_dependency(address instr) {
-+    address auipc = instr;
-+    address last_instr = auipc + instruction_size;
++  cad(tmp3, tmp3, carry, carry);
++  adc(tmp4, tmp4, zr, carry);
++  cad(tmp3, tmp3, t0, t0);
++  cadc(tmp4, tmp4, tmp, t0);
++  adc(carry, carry2, zr, t0);
++  cad(tmp4, tmp4, t1, carry2);
++  adc(carry, carry, zr, carry2);
 +
-+    return extract_rs1(last_instr) == extract_rd(auipc);
-+  }
++  ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
++  ror_imm(tmp4, tmp4, 32);
++  sd(tmp4, Address(tmp6, 0));
++  sd(tmp3, Address(tmp6, wordSize));
 +
-+  // the instruction sequence of load_label is as below:
-+  //     auipc
-+  //     load
-+  static bool check_load_pc_relative_data_dependency(address instr) {
-+    address auipc = instr;
-+    address last_instr = auipc + instruction_size;
++  j(L_third_loop);
 +
-+    return extract_rs1(last_instr) == extract_rd(auipc);
-+  }
++  bind(L_third_loop_exit);
 +
-+  static bool is_movptr_at(address instr);
-+  static bool is_li32_at(address instr);
-+  static bool is_li64_at(address instr);
-+  static bool is_pc_relative_at(address branch);
-+  static bool is_load_pc_relative_at(address branch);
++  andi(idx, idx, 0x3);
++  beqz(idx, L_post_third_loop_done);
 +
-+  static bool is_call_at(address instr) {
-+    if (is_jal_at(instr) || is_jalr_at(instr)) {
-+      return true;
-+    }
-+    return false;
-+  }
-+  static bool is_lwu_to_zr(address instr);
++  Label L_check_1;
++  subw(idx, idx, 2);
++  bltz(idx, L_check_1);
 +
-+  inline bool is_nop();
-+  inline bool is_jump_or_nop();
-+  bool is_safepoint_poll();
-+  bool is_sigill_zombie_not_entrant();
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  ld(yz_idx1, Address(t0, 0));
++  ror_imm(yz_idx1, yz_idx1, 32);
 +
-+ protected:
-+  address addr_at(int offset) const    { return address(this) + offset; }
++  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
++  mulhu(tmp4, product_hi, yz_idx1);
 +
-+  jint int_at(int offset) const        { return *(jint*) addr_at(offset); }
-+  juint uint_at(int offset) const      { return *(juint*) addr_at(offset); }
++  shadd(t0, idx, z, t0, LogBytesPerInt);
++  ld(yz_idx2, Address(t0, 0));
++  ror_imm(yz_idx2, yz_idx2, 32, tmp);
 +
-+  address ptr_at(int offset) const     { return *(address*) addr_at(offset); }
++  add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
 +
-+  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
++  ror_imm(tmp3, tmp3, 32, tmp);
++  sd(tmp3, Address(t0, 0));
 +
++  bind(L_check_1);
 +
-+  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i; }
-+  void set_uint_at(int offset, jint  i)       { *(juint*)addr_at(offset) = i; }
-+  void set_ptr_at (int offset, address  ptr)  { *(address*) addr_at(offset) = ptr; }
-+  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o; }
++  andi(idx, idx, 0x1);
++  subw(idx, idx, 1);
++  bltz(idx, L_post_third_loop_done);
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  lwu(tmp4, Address(t0, 0));
++  mul(tmp3, tmp4, product_hi); //  tmp4 * product_hi -> carry2:tmp3
++  mulhu(carry2, tmp4, product_hi);
 +
-+ public:
++  shadd(t0, idx, z, t0, LogBytesPerInt);
++  lwu(tmp4, Address(t0, 0));
 +
-+  inline friend NativeInstruction* nativeInstruction_at(address addr);
++  add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0);
 +
-+  static bool maybe_cpool_ref(address instr) {
-+    return is_auipc_at(instr);
-+  }
++  shadd(t0, idx, z, t0, LogBytesPerInt);
++  sw(tmp3, Address(t0, 0));
 +
-+  bool is_membar() {
-+    return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0;
-+  }
-+};
++  slli(t0, carry2, 32);
++  srli(carry, tmp3, 32);
++  orr(carry, carry, t0);
 +
-+inline NativeInstruction* nativeInstruction_at(address addr) {
-+  return (NativeInstruction*)addr;
++  bind(L_post_third_loop_done);
 +}
 +
-+// The natural type of an RISCV instruction is uint32_t
-+inline NativeInstruction* nativeInstruction_at(uint32_t *addr) {
-+  return (NativeInstruction*)addr;
-+}
++/**
++ * Code for BigInteger::multiplyToLen() intrinsic.
++ *
++ * x10: x
++ * x11: xlen
++ * x12: y
++ * x13: ylen
++ * x14: z
++ * x15: zlen
++ * x16: tmp1
++ * x17: tmp2
++ * x7:  tmp3
++ * x28: tmp4
++ * x29: tmp5
++ * x30: tmp6
++ * x31: tmp7
++ */
++void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
++                                     Register z, Register zlen,
++                                     Register tmp1, Register tmp2, Register tmp3, Register tmp4,
++                                     Register tmp5, Register tmp6, Register product_hi)
++{
++  assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
 +
-+inline NativeCall* nativeCall_at(address addr);
-+// The NativeCall is an abstraction for accessing/manipulating native
-+// call instructions (used to manipulate inline caches, primitive &
-+// DSO calls, etc.).
++  const Register idx = tmp1;
++  const Register kdx = tmp2;
++  const Register xstart = tmp3;
 +
-+class NativeCall: public NativeInstruction {
-+ public:
-+  enum RISCV_specific_constants {
-+    instruction_size            =    4,
-+    instruction_offset          =    0,
-+    displacement_offset         =    0,
-+    return_address_offset       =    4
-+  };
++  const Register y_idx = tmp4;
++  const Register carry = tmp5;
++  const Register product = xlen;
++  const Register x_xstart = zlen; // reuse register
 +
-+  address instruction_address() const       { return addr_at(instruction_offset); }
-+  address next_instruction_address() const  { return addr_at(return_address_offset); }
-+  address return_address() const            { return addr_at(return_address_offset); }
-+  address destination() const;
++  mv(idx, ylen); // idx = ylen;
++  mv(kdx, zlen); // kdx = xlen+ylen;
++  mv(carry, zr); // carry = 0;
 +
-+  void set_destination(address dest)      {
-+    if (is_jal()) {
-+      intptr_t offset = (intptr_t)(dest - instruction_address());
-+      assert((offset & 0x1) == 0, "should be aligned");
-+      assert(is_imm_in_range(offset, 20, 1), "set_destination, offset is too large to be patched in one jal insrusction\n");
-+      unsigned int insn = 0b1101111; // jal
-+      address pInsn = (address)(&insn);
-+      Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
-+      Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
-+      Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
-+      Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
-+      Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra
-+      set_int_at(displacement_offset, insn);
-+      return;
-+    }
-+    ShouldNotReachHere();
-+  }
++  Label L_multiply_64_x_64_loop, L_done;
 +
-+  void  verify_alignment()                       { ; }
-+  void  verify();
-+  void  print();
++  subw(xstart, xlen, 1);
++  bltz(xstart, L_done);
 +
-+  // Creation
-+  inline friend NativeCall* nativeCall_at(address addr);
-+  inline friend NativeCall* nativeCall_before(address return_address);
++  const Register jdx = tmp1;
 +
-+  static bool is_call_before(address return_address) {
-+    return is_call_at(return_address - NativeCall::return_address_offset);
-+  }
++  if (AvoidUnalignedAccesses) {
++    // Check if x and y are both 8-byte aligned.
++    orr(t0, xlen, ylen);
++    andi(t0, t0, 0x1);
++    beqz(t0, L_multiply_64_x_64_loop);
++
++    multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
++    shadd(t0, xstart, z, t0, LogBytesPerInt);
++    sw(carry, Address(t0, 0));
++
++    Label L_second_loop_unaligned;
++    bind(L_second_loop_unaligned);
++    mv(carry, zr);
++    mv(jdx, ylen);
++    subw(xstart, xstart, 1);
++    bltz(xstart, L_done);
++    sub(sp, sp, 2 * wordSize);
++    sd(z, Address(sp, 0));
++    sd(zr, Address(sp, wordSize));
++    shadd(t0, xstart, z, t0, LogBytesPerInt);
++    addi(z, t0, 4);
++    shadd(t0, xstart, x, t0, LogBytesPerInt);
++    lwu(product, Address(t0, 0));
++    Label L_third_loop, L_third_loop_exit;
++
++    blez(jdx, L_third_loop_exit);
++
++    bind(L_third_loop);
++    subw(jdx, jdx, 1);
++    shadd(t0, jdx, y, t0, LogBytesPerInt);
++    lwu(t0, Address(t0, 0));
++    mul(t1, t0, product);
++    add(t0, t1, carry);
++    shadd(tmp6, jdx, z, t1, LogBytesPerInt);
++    lwu(t1, Address(tmp6, 0));
++    add(t0, t0, t1);
++    sw(t0, Address(tmp6, 0));
++    srli(carry, t0, 32);
++    bgtz(jdx, L_third_loop);
++
++    bind(L_third_loop_exit);
++    ld(z, Address(sp, 0));
++    addi(sp, sp, 2 * wordSize);
++    shadd(t0, xstart, z, t0, LogBytesPerInt);
++    sw(carry, Address(t0, 0));
 +
-+  // MT-safe patching of a call instruction.
-+  static void insert(address code_pos, address entry);
++    j(L_second_loop_unaligned);
++  }
 +
-+  static void replace_mt_safe(address instr_addr, address code_buffer);
++  bind(L_multiply_64_x_64_loop);
++  multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
 +
-+  // Similar to replace_mt_safe, but just changes the destination.  The
-+  // important thing is that free-running threads are able to execute
-+  // this call instruction at all times.  If the call is an immediate BL
-+  // instruction we can simply rely on atomicity of 32-bit writes to
-+  // make sure other threads will see no intermediate states.
++  Label L_second_loop_aligned;
++  beqz(kdx, L_second_loop_aligned);
 +
-+  // We cannot rely on locks here, since the free-running threads must run at
-+  // full speed.
-+  //
-+  // Used in the runtime linkage of calls; see class CompiledIC.
-+  // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
++  Label L_carry;
++  subw(kdx, kdx, 1);
++  beqz(kdx, L_carry);
 +
-+  // The parameter assert_lock disables the assertion during code generation.
-+  void set_destination_mt_safe(address dest, bool assert_lock = true);
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
++  srli(carry, carry, 32);
++  subw(kdx, kdx, 1);
 +
-+  address get_trampoline();
-+};
++  bind(L_carry);
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
 +
-+inline NativeCall* nativeCall_at(address addr) {
-+  assert_cond(addr != NULL);
-+  NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset);
-+#ifdef ASSERT
-+  call->verify();
-+#endif
-+  return call;
-+}
++  // Second and third (nested) loops.
++  //
++  // for (int i = xstart-1; i >= 0; i--) { // Second loop
++  //   carry = 0;
++  //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
++  //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
++  //                    (z[k] & LONG_MASK) + carry;
++  //     z[k] = (int)product;
++  //     carry = product >>> 32;
++  //   }
++  //   z[i] = (int)carry;
++  // }
++  //
++  // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
 +
-+inline NativeCall* nativeCall_before(address return_address) {
-+  assert_cond(return_address != NULL);
-+  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
-+#ifdef ASSERT
-+  call->verify();
-+#endif
-+  return call;
-+}
++  bind(L_second_loop_aligned);
++  mv(carry, zr); // carry = 0;
++  mv(jdx, ylen); // j = ystart+1
 +
-+// An interface for accessing/manipulating native mov reg, imm instructions.
-+// (used to manipulate inlined 64-bit data calls, etc.)
-+class NativeMovConstReg: public NativeInstruction {
-+ public:
-+  enum RISCV_specific_constants {
-+    movptr_instruction_size             =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi.  See movptr().
-+    movptr_with_offset_instruction_size =    5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset().
-+    load_pc_relative_instruction_size   =    2 * NativeInstruction::instruction_size, // auipc, ld
-+    instruction_offset                  =    0,
-+    displacement_offset                 =    0
-+  };
++  subw(xstart, xstart, 1); // i = xstart-1;
++  bltz(xstart, L_done);
 +
-+  address instruction_address() const       { return addr_at(instruction_offset); }
-+  address next_instruction_address() const  {
-+    // if the instruction at 5 * instruction_size is addi,
-+    // it means a lui + addi + slli + addi + slli + addi instruction sequence,
-+    // and the next instruction address should be addr_at(6 * instruction_size).
-+    // However, when the instruction at 5 * instruction_size isn't addi,
-+    // the next instruction address should be addr_at(5 * instruction_size)
-+    if (nativeInstruction_at(instruction_address())->is_movptr()) {
-+      if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) {
-+        // Assume: lui, addi, slli, addi, slli, addi
-+        return addr_at(movptr_instruction_size);
-+      } else {
-+        // Assume: lui, addi, slli, addi, slli
-+        return addr_at(movptr_with_offset_instruction_size);
-+      }
-+    } else if (is_load_pc_relative_at(instruction_address())) {
-+      // Assume: auipc, ld
-+      return addr_at(load_pc_relative_instruction_size);
-+    }
-+    guarantee(false, "Unknown instruction in NativeMovConstReg");
-+    return NULL;
-+  }
++  sub(sp, sp, 4 * wordSize);
++  sd(z, Address(sp, 0));
 +
-+  intptr_t data() const;
-+  void  set_data(intptr_t x);
++  Label L_last_x;
++  shadd(t0, xstart, z, t0, LogBytesPerInt);
++  addi(z, t0, 4);
++  subw(xstart, xstart, 1); // i = xstart-1;
++  bltz(xstart, L_last_x);
 +
-+  void flush() {
-+    if (!maybe_cpool_ref(instruction_address())) {
-+      ICache::invalidate_range(instruction_address(), movptr_instruction_size);
-+    }
-+  }
++  shadd(t0, xstart, x, t0, LogBytesPerInt);
++  ld(product_hi, Address(t0, 0));
++  ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
 +
-+  void  verify();
-+  void  print();
++  Label L_third_loop_prologue;
++  bind(L_third_loop_prologue);
 +
-+  // Creation
-+  inline friend NativeMovConstReg* nativeMovConstReg_at(address addr);
-+  inline friend NativeMovConstReg* nativeMovConstReg_before(address addr);
-+};
++  sd(ylen, Address(sp, wordSize));
++  sd(x, Address(sp, 2 * wordSize));
++  sd(xstart, Address(sp, 3 * wordSize));
++  multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
++                          tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
++  ld(z, Address(sp, 0));
++  ld(ylen, Address(sp, wordSize));
++  ld(x, Address(sp, 2 * wordSize));
++  ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
++  addi(sp, sp, 4 * wordSize);
 +
-+inline NativeMovConstReg* nativeMovConstReg_at(address addr) {
-+  assert_cond(addr != NULL);
-+  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset);
-+#ifdef ASSERT
-+  test->verify();
-+#endif
-+  return test;
-+}
++  addiw(tmp3, xlen, 1);
++  shadd(t0, tmp3, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
 +
-+inline NativeMovConstReg* nativeMovConstReg_before(address addr) {
-+  assert_cond(addr != NULL);
-+  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
-+#ifdef ASSERT
-+  test->verify();
-+#endif
-+  return test;
++  subw(tmp3, tmp3, 1);
++  bltz(tmp3, L_done);
++
++  srli(carry, carry, 32);
++  shadd(t0, tmp3, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
++  j(L_second_loop_aligned);
++
++  // Next infrequent code is moved outside loops.
++  bind(L_last_x);
++  lwu(product_hi, Address(x, 0));
++  j(L_third_loop_prologue);
++
++  bind(L_done);
 +}
++#endif
 +
-+// RISCV should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented.
-+class NativeMovRegMem: public NativeInstruction {
-+ public:
-+  int instruction_start() const {
-+    Unimplemented();
-+    return 0;
++// Count bits of trailing zero chars from lsb to msb until first non-zero element.
++// For LL case, one byte for one element, so shift 8 bits once, and for other case,
++// shift 16 bits once.
++void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2)
++{
++  if (UseRVB) {
++    assert_different_registers(Rd, Rs, tmp1);
++    int step = isLL ? 8 : 16;
++    ctz(Rd, Rs);
++    andi(tmp1, Rd, step - 1);
++    sub(Rd, Rd, tmp1);
++    return;
 +  }
++  assert_different_registers(Rd, Rs, tmp1, tmp2);
++  Label Loop;
++  int step = isLL ? 8 : 16;
++  li(Rd, -step);
++  mv(tmp2, Rs);
 +
-+  address instruction_address() const {
-+    Unimplemented();
-+    return NULL;
-+  }
++  bind(Loop);
++  addi(Rd, Rd, step);
++  andi(tmp1, tmp2, ((1 << step) - 1));
++  srli(tmp2, tmp2, step);
++  beqz(tmp1, Loop);
++}
 +
-+  int num_bytes_to_end_of_patch() const {
-+    Unimplemented();
-+    return 0;
++// This instruction reads adjacent 4 bytes from the lower half of source register,
++// inflate into a register, for example:
++// Rs: A7A6A5A4A3A2A1A0
++// Rd: 00A300A200A100A0
++void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2)
++{
++  assert_different_registers(Rd, Rs, tmp1, tmp2);
++  li(tmp1, 0xFF);
++  mv(Rd, zr);
++  for (int i = 0; i <= 3; i++)
++  {
++    andr(tmp2, Rs, tmp1);
++    if (i) {
++      slli(tmp2, tmp2, i * 8);
++    }
++    orr(Rd, Rd, tmp2);
++    if (i != 3) {
++      slli(tmp1, tmp1, 8);
++    }
 +  }
++}
 +
-+  int offset() const;
-+
-+  void set_offset(int x);
++// This instruction reads adjacent 4 bytes from the upper half of source register,
++// inflate into a register, for example:
++// Rs: A7A6A5A4A3A2A1A0
++// Rd: 00A700A600A500A4
++void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2)
++{
++  assert_different_registers(Rd, Rs, tmp1, tmp2);
++  li(tmp1, 0xFF00000000);
++  mv(Rd, zr);
++  for (int i = 0; i <= 3; i++)
++  {
++    andr(tmp2, Rs, tmp1);
++    orr(Rd, Rd, tmp2);
++    srli(Rd, Rd, 8);
++    if (i != 3) {
++      slli(tmp1, tmp1, 8);
++    }
++  }
++}
 +
-+  void add_offset_in_bytes(int add_offset) { Unimplemented(); }
++// The size of the blocks erased by the zero_blocks stub.  We must
++// handle anything smaller than this ourselves in zero_words().
++const int MacroAssembler::zero_words_block_size = 8;
 +
-+  void verify();
-+  void print();
++// zero_words() is used by C2 ClearArray patterns.  It is as small as
++// possible, handling small word counts locally and delegating
++// anything larger to the zero_blocks stub.  It is expanded many times
++// in compiled code, so it is important to keep it short.
 +
-+ private:
-+  inline friend NativeMovRegMem* nativeMovRegMem_at (address addr);
-+};
++// ptr:   Address of a buffer to be zeroed.
++// cnt:   Count in HeapWords.
++//
++// ptr, cnt, and t0 are clobbered.
++address MacroAssembler::zero_words(Register ptr, Register cnt)
++{
++  assert(is_power_of_2(zero_words_block_size), "adjust this");
++  assert(ptr == x28 && cnt == x29, "mismatch in register usage");
++  assert_different_registers(cnt, t0);
 +
-+inline NativeMovRegMem* nativeMovRegMem_at (address addr) {
-+  Unimplemented();
-+  return NULL;
++  BLOCK_COMMENT("zero_words {");
++  mv(t0, zero_words_block_size);
++  Label around, done, done16;
++  bltu(cnt, t0, around);
++  {
++    RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
++    assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
++    if (StubRoutines::riscv::complete()) {
++      address tpc = trampoline_call(zero_blocks);
++      if (tpc == NULL) {
++        DEBUG_ONLY(reset_labels(around));
++        postcond(pc() == badAddress);
++        return NULL;
++      }
++    } else {
++      jal(zero_blocks);
++    }
++  }
++  bind(around);
++  for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
++    Label l;
++    andi(t0, cnt, i);
++    beqz(t0, l);
++    for (int j = 0; j < i; j++) {
++      sd(zr, Address(ptr, 0));
++      addi(ptr, ptr, 8);
++    }
++    bind(l);
++  }
++  {
++    Label l;
++    andi(t0, cnt, 1);
++    beqz(t0, l);
++    sd(zr, Address(ptr, 0));
++    bind(l);
++  }
++  BLOCK_COMMENT("} zero_words");
++  postcond(pc() != badAddress);
++  return pc();
 +}
 +
-+class NativeJump: public NativeInstruction {
-+ public:
-+  enum RISCV_specific_constants {
-+    instruction_size            =    NativeInstruction::instruction_size,
-+    instruction_offset          =    0,
-+    data_offset                 =    0,
-+    next_instruction_offset     =    NativeInstruction::instruction_size
-+  };
-+
-+  address instruction_address() const       { return addr_at(instruction_offset); }
-+  address next_instruction_address() const  { return addr_at(instruction_size); }
-+  address jump_destination() const;
++#define SmallArraySize (18 * BytesPerLong)
 +
-+  // Creation
-+  inline friend NativeJump* nativeJump_at(address address);
++// base:  Address of a buffer to be zeroed, 8 bytes aligned.
++// cnt:   Immediate count in HeapWords.
++void MacroAssembler::zero_words(Register base, u_int64_t cnt)
++{
++  assert_different_registers(base, t0, t1);
 +
-+  void verify();
++  BLOCK_COMMENT("zero_words {");
 +
-+  // Unit testing stuff
-+  static void test() {}
++  if (cnt <= SmallArraySize / BytesPerLong) {
++    for (int i = 0; i < (int)cnt; i++) {
++      sd(zr, Address(base, i * wordSize));
++    }
++  } else {
++    const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
++    int remainder = cnt % unroll;
++    for (int i = 0; i < remainder; i++) {
++      sd(zr, Address(base, i * wordSize));
++    }
 +
-+  // Insertion of native jump instruction
-+  static void insert(address code_pos, address entry);
-+  // MT-safe insertion of native jump at verified method entry
-+  static void check_verified_entry_alignment(address entry, address verified_entry);
-+  static void patch_verified_entry(address entry, address verified_entry, address dest);
-+};
++    Label loop;
++    Register cnt_reg = t0;
++    Register loop_base = t1;
++    cnt = cnt - remainder;
++    li(cnt_reg, cnt);
++    add(loop_base, base, remainder * wordSize);
++    bind(loop);
++    sub(cnt_reg, cnt_reg, unroll);
++    for (int i = 0; i < unroll; i++) {
++      sd(zr, Address(loop_base, i * wordSize));
++    }
++    add(loop_base, loop_base, unroll * wordSize);
++    bnez(cnt_reg, loop);
++  }
 +
-+inline NativeJump* nativeJump_at(address addr) {
-+  NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset);
-+#ifdef ASSERT
-+  jump->verify();
-+#endif
-+  return jump;
++  BLOCK_COMMENT("} zero_words");
 +}
 +
-+class NativeGeneralJump: public NativeJump {
-+public:
-+  enum RISCV_specific_constants {
-+    instruction_size            =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr
-+    instruction_offset          =    0,
-+    data_offset                 =    0,
-+    next_instruction_offset     =    6 * NativeInstruction::instruction_size  // lui, addi, slli, addi, slli, jalr
-+  };
++// base:   Address of a buffer to be filled, 8 bytes aligned.
++// cnt:    Count in 8-byte unit.
++// value:  Value to be filled with.
++// base will point to the end of the buffer after filling.
++void MacroAssembler::fill_words(Register base, Register cnt, Register value)
++{
++//  Algorithm:
++//
++//    t0 = cnt & 7
++//    cnt -= t0
++//    p += t0
++//    switch (t0):
++//      switch start:
++//      do while cnt
++//        cnt -= 8
++//          p[-8] = value
++//        case 7:
++//          p[-7] = value
++//        case 6:
++//          p[-6] = value
++//          // ...
++//        case 1:
++//          p[-1] = value
++//        case 0:
++//          p += 8
++//      do-while end
++//    switch end
 +
-+  address jump_destination() const;
++  assert_different_registers(base, cnt, value, t0, t1);
 +
-+  static void insert_unconditional(address code_pos, address entry);
-+  static void replace_mt_safe(address instr_addr, address code_buffer);
-+};
++  Label fini, skip, entry, loop;
++  const int unroll = 8; // Number of sd instructions we'll unroll
 +
-+inline NativeGeneralJump* nativeGeneralJump_at(address addr) {
-+  assert_cond(addr != NULL);
-+  NativeGeneralJump* jump = (NativeGeneralJump*)(addr);
-+  debug_only(jump->verify();)
-+  return jump;
-+}
++  beqz(cnt, fini);
 +
-+class NativeIllegalInstruction: public NativeInstruction {
-+ public:
-+  // Insert illegal opcode as specific address
-+  static void insert(address code_pos);
-+};
++  andi(t0, cnt, unroll - 1);
++  sub(cnt, cnt, t0);
++  // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
++  shadd(base, t0, base, t1, 3);
++  la(t1, entry);
++  slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
++  sub(t1, t1, t0);
++  jr(t1);
 +
-+inline bool NativeInstruction::is_nop()         {
-+  uint32_t insn = *(uint32_t*)addr_at(0);
-+  return insn == 0x13;
++  bind(loop);
++  add(base, base, unroll * 8);
++  for (int i = -unroll; i < 0; i++) {
++    sd(value, Address(base, i * 8));
++  }
++  bind(entry);
++  sub(cnt, cnt, unroll);
++  bgez(cnt, loop);
++
++  bind(fini);
 +}
 +
-+inline bool NativeInstruction::is_jump_or_nop() {
-+  return is_nop() || is_jump();
++#define FCVT_SAFE(FLOATCVT, FLOATEQ)                                                             \
++void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) {           \
++  Label L_Okay;                                                                                  \
++  fscsr(zr);                                                                                     \
++  FLOATCVT(dst, src);                                                                            \
++  frcsr(tmp);                                                                                    \
++  andi(tmp, tmp, 0x1E);                                                                          \
++  beqz(tmp, L_Okay);                                                                             \
++  FLOATEQ(tmp, src, src);                                                                        \
++  bnez(tmp, L_Okay);                                                                             \
++  mv(dst, zr);                                                                                   \
++  bind(L_Okay);                                                                                  \
 +}
 +
-+// Call trampoline stubs.
-+class NativeCallTrampolineStub : public NativeInstruction {
-+ public:
++FCVT_SAFE(fcvt_w_s, feq_s)
++FCVT_SAFE(fcvt_l_s, feq_s)
++FCVT_SAFE(fcvt_w_d, feq_d)
++FCVT_SAFE(fcvt_l_d, feq_d)
 +
-+  enum RISCV_specific_constants {
-+    // Refer to function emit_trampoline_stub.
-+    instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address
-+    data_offset      = 3 * NativeInstruction::instruction_size,            // auipc + ld + jr
-+  };
++#undef FCVT_SAFE
 +
-+  address destination(nmethod *nm = NULL) const;
-+  void set_destination(address new_destination);
-+  ptrdiff_t destination_offset() const;
-+};
++#define FCMP(FLOATTYPE, FLOATSIG)                                                       \
++void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1,            \
++                                         FloatRegister Rs2, int unordered_result) {     \
++  Label Ldone;                                                                          \
++  if (unordered_result < 0) {                                                           \
++    /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */    \
++    /* installs 1 if gt else 0 */                                                       \
++    flt_##FLOATSIG(result, Rs2, Rs1);                                                   \
++    /* Rs1 > Rs2, install 1 */                                                          \
++    bgtz(result, Ldone);                                                                \
++    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
++    addi(result, result, -1);                                                           \
++    /* Rs1 = Rs2, install 0 */                                                          \
++    /* NaN or Rs1 < Rs2, install -1 */                                                  \
++    bind(Ldone);                                                                        \
++  } else {                                                                              \
++    /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */    \
++    /* installs 1 if gt or unordered else 0 */                                          \
++    flt_##FLOATSIG(result, Rs1, Rs2);                                                   \
++    /* Rs1 < Rs2, install -1 */                                                         \
++    bgtz(result, Ldone);                                                                \
++    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
++    addi(result, result, -1);                                                           \
++    /* Rs1 = Rs2, install 0 */                                                          \
++    /* NaN or Rs1 > Rs2, install 1 */                                                   \
++    bind(Ldone);                                                                        \
++    neg(result, result);                                                                \
++  }                                                                                     \
++}
 +
-+inline bool is_NativeCallTrampolineStub_at(address addr) {
-+  // Ensure that the stub is exactly
-+  //      ld   t0, L--->auipc + ld
-+  //      jr   t0
-+  // L:
++FCMP(float, s);
++FCMP(double, d);
 +
-+  // judge inst + register + imm
-+  // 1). check the instructions: auipc + ld + jalr
-+  // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0
-+  // 3). check if the offset in ld[31:20] equals the data_offset
-+  assert_cond(addr != NULL);
-+  const int instr_size = NativeInstruction::instruction_size;
-+  if (NativeInstruction::is_auipc_at(addr) && NativeInstruction::is_ld_at(addr + instr_size) && NativeInstruction::is_jalr_at(addr + 2 * instr_size) &&
-+      (NativeInstruction::extract_rd(addr)                    == x5) &&
-+      (NativeInstruction::extract_rd(addr + instr_size)       == x5) &&
-+      (NativeInstruction::extract_rs1(addr + instr_size)      == x5) &&
-+      (NativeInstruction::extract_rs1(addr + 2 * instr_size)  == x5) &&
-+      (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) {
-+    return true;
-+  }
-+  return false;
-+}
++#undef FCMP
 +
-+inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
-+  assert_cond(addr != NULL);
-+  assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found");
-+  return (NativeCallTrampolineStub*)addr;
-+}
++// Zero words; len is in bytes
++// Destroys all registers except addr
++// len must be a nonzero multiple of wordSize
++void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) {
++  assert_different_registers(addr, len, tmp, t0, t1);
 +
-+class NativeMembar : public NativeInstruction {
-+public:
-+  uint32_t get_kind();
-+  void set_kind(uint32_t order_kind);
-+};
++#ifdef ASSERT
++  {
++    Label L;
++    andi(t0, len, BytesPerWord - 1);
++    beqz(t0, L);
++    stop("len is not a multiple of BytesPerWord");
++    bind(L);
++  }
++#endif // ASSERT
 +
-+inline NativeMembar *NativeMembar_at(address addr) {
-+  assert_cond(addr != NULL);
-+  assert(nativeInstruction_at(addr)->is_membar(), "no membar found");
-+  return (NativeMembar*)addr;
-+}
++#ifndef PRODUCT
++  block_comment("zero memory");
++#endif // PRODUCT
 +
-+#endif // CPU_RISCV_NATIVEINST_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
-new file mode 100644
-index 000000000..04a36c1c7
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
-@@ -0,0 +1,46 @@
-+/*
-+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef CPU_RISCV_REGISTERMAP_RISCV_HPP
-+#define CPU_RISCV_REGISTERMAP_RISCV_HPP
-+
-+// machine-dependent implemention for register maps
-+  friend class frame;
-+
-+ private:
-+  // This is the hook for finding a register in an "well-known" location,
-+  // such as a register block of a predetermined format.
-+  // Since there is none, we just return NULL.
-+  // See registerMap_riscv.hpp for an example of grabbing registers
-+  // from register save areas of a standard layout.
-+  address pd_location(VMReg reg) const {return NULL;}
-+
-+  // no PD state to clear or copy:
-+  void pd_clear() {}
-+  void pd_initialize() {}
-+  void pd_initialize_from(const RegisterMap* map) {}
-+
-+#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
-new file mode 100644
-index 000000000..b30c1b107
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
-@@ -0,0 +1,193 @@
-+/*
-+ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "asm/assembler.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "asm/register.hpp"
-+#include "interp_masm_riscv.hpp"
-+#include "register_riscv.hpp"
-+
-+REGISTER_DEFINITION(Register, noreg);
-+
-+REGISTER_DEFINITION(Register, x0);
-+REGISTER_DEFINITION(Register, x1);
-+REGISTER_DEFINITION(Register, x2);
-+REGISTER_DEFINITION(Register, x3);
-+REGISTER_DEFINITION(Register, x4);
-+REGISTER_DEFINITION(Register, x5);
-+REGISTER_DEFINITION(Register, x6);
-+REGISTER_DEFINITION(Register, x7);
-+REGISTER_DEFINITION(Register, x8);
-+REGISTER_DEFINITION(Register, x9);
-+REGISTER_DEFINITION(Register, x10);
-+REGISTER_DEFINITION(Register, x11);
-+REGISTER_DEFINITION(Register, x12);
-+REGISTER_DEFINITION(Register, x13);
-+REGISTER_DEFINITION(Register, x14);
-+REGISTER_DEFINITION(Register, x15);
-+REGISTER_DEFINITION(Register, x16);
-+REGISTER_DEFINITION(Register, x17);
-+REGISTER_DEFINITION(Register, x18);
-+REGISTER_DEFINITION(Register, x19);
-+REGISTER_DEFINITION(Register, x20);
-+REGISTER_DEFINITION(Register, x21);
-+REGISTER_DEFINITION(Register, x22);
-+REGISTER_DEFINITION(Register, x23);
-+REGISTER_DEFINITION(Register, x24);
-+REGISTER_DEFINITION(Register, x25);
-+REGISTER_DEFINITION(Register, x26);
-+REGISTER_DEFINITION(Register, x27);
-+REGISTER_DEFINITION(Register, x28);
-+REGISTER_DEFINITION(Register, x29);
-+REGISTER_DEFINITION(Register, x30);
-+REGISTER_DEFINITION(Register, x31);
-+
-+REGISTER_DEFINITION(FloatRegister, fnoreg);
-+
-+REGISTER_DEFINITION(FloatRegister, f0);
-+REGISTER_DEFINITION(FloatRegister, f1);
-+REGISTER_DEFINITION(FloatRegister, f2);
-+REGISTER_DEFINITION(FloatRegister, f3);
-+REGISTER_DEFINITION(FloatRegister, f4);
-+REGISTER_DEFINITION(FloatRegister, f5);
-+REGISTER_DEFINITION(FloatRegister, f6);
-+REGISTER_DEFINITION(FloatRegister, f7);
-+REGISTER_DEFINITION(FloatRegister, f8);
-+REGISTER_DEFINITION(FloatRegister, f9);
-+REGISTER_DEFINITION(FloatRegister, f10);
-+REGISTER_DEFINITION(FloatRegister, f11);
-+REGISTER_DEFINITION(FloatRegister, f12);
-+REGISTER_DEFINITION(FloatRegister, f13);
-+REGISTER_DEFINITION(FloatRegister, f14);
-+REGISTER_DEFINITION(FloatRegister, f15);
-+REGISTER_DEFINITION(FloatRegister, f16);
-+REGISTER_DEFINITION(FloatRegister, f17);
-+REGISTER_DEFINITION(FloatRegister, f18);
-+REGISTER_DEFINITION(FloatRegister, f19);
-+REGISTER_DEFINITION(FloatRegister, f20);
-+REGISTER_DEFINITION(FloatRegister, f21);
-+REGISTER_DEFINITION(FloatRegister, f22);
-+REGISTER_DEFINITION(FloatRegister, f23);
-+REGISTER_DEFINITION(FloatRegister, f24);
-+REGISTER_DEFINITION(FloatRegister, f25);
-+REGISTER_DEFINITION(FloatRegister, f26);
-+REGISTER_DEFINITION(FloatRegister, f27);
-+REGISTER_DEFINITION(FloatRegister, f28);
-+REGISTER_DEFINITION(FloatRegister, f29);
-+REGISTER_DEFINITION(FloatRegister, f30);
-+REGISTER_DEFINITION(FloatRegister, f31);
-+
-+REGISTER_DEFINITION(VectorRegister, vnoreg);
-+
-+REGISTER_DEFINITION(VectorRegister, v0);
-+REGISTER_DEFINITION(VectorRegister, v1);
-+REGISTER_DEFINITION(VectorRegister, v2);
-+REGISTER_DEFINITION(VectorRegister, v3);
-+REGISTER_DEFINITION(VectorRegister, v4);
-+REGISTER_DEFINITION(VectorRegister, v5);
-+REGISTER_DEFINITION(VectorRegister, v6);
-+REGISTER_DEFINITION(VectorRegister, v7);
-+REGISTER_DEFINITION(VectorRegister, v8);
-+REGISTER_DEFINITION(VectorRegister, v9);
-+REGISTER_DEFINITION(VectorRegister, v10);
-+REGISTER_DEFINITION(VectorRegister, v11);
-+REGISTER_DEFINITION(VectorRegister, v12);
-+REGISTER_DEFINITION(VectorRegister, v13);
-+REGISTER_DEFINITION(VectorRegister, v14);
-+REGISTER_DEFINITION(VectorRegister, v15);
-+REGISTER_DEFINITION(VectorRegister, v16);
-+REGISTER_DEFINITION(VectorRegister, v17);
-+REGISTER_DEFINITION(VectorRegister, v18);
-+REGISTER_DEFINITION(VectorRegister, v19);
-+REGISTER_DEFINITION(VectorRegister, v20);
-+REGISTER_DEFINITION(VectorRegister, v21);
-+REGISTER_DEFINITION(VectorRegister, v22);
-+REGISTER_DEFINITION(VectorRegister, v23);
-+REGISTER_DEFINITION(VectorRegister, v24);
-+REGISTER_DEFINITION(VectorRegister, v25);
-+REGISTER_DEFINITION(VectorRegister, v26);
-+REGISTER_DEFINITION(VectorRegister, v27);
-+REGISTER_DEFINITION(VectorRegister, v28);
-+REGISTER_DEFINITION(VectorRegister, v29);
-+REGISTER_DEFINITION(VectorRegister, v30);
-+REGISTER_DEFINITION(VectorRegister, v31);
++  Label loop;
++  Label entry;
 +
-+REGISTER_DEFINITION(Register, c_rarg0);
-+REGISTER_DEFINITION(Register, c_rarg1);
-+REGISTER_DEFINITION(Register, c_rarg2);
-+REGISTER_DEFINITION(Register, c_rarg3);
-+REGISTER_DEFINITION(Register, c_rarg4);
-+REGISTER_DEFINITION(Register, c_rarg5);
-+REGISTER_DEFINITION(Register, c_rarg6);
-+REGISTER_DEFINITION(Register, c_rarg7);
++  // Algorithm:
++  //
++  //  t0 = cnt & 7
++  //  cnt -= t0
++  //  p += t0
++  //  switch (t0) {
++  //    do {
++  //      cnt -= 8
++  //        p[-8] = 0
++  //      case 7:
++  //        p[-7] = 0
++  //      case 6:
++  //        p[-6] = 0
++  //        ...
++  //      case 1:
++  //        p[-1] = 0
++  //      case 0:
++  //        p += 8
++  //     } while (cnt)
++  //  }
 +
-+REGISTER_DEFINITION(FloatRegister, c_farg0);
-+REGISTER_DEFINITION(FloatRegister, c_farg1);
-+REGISTER_DEFINITION(FloatRegister, c_farg2);
-+REGISTER_DEFINITION(FloatRegister, c_farg3);
-+REGISTER_DEFINITION(FloatRegister, c_farg4);
-+REGISTER_DEFINITION(FloatRegister, c_farg5);
-+REGISTER_DEFINITION(FloatRegister, c_farg6);
-+REGISTER_DEFINITION(FloatRegister, c_farg7);
++  const int unroll = 8;   // Number of sd(zr) instructions we'll unroll
 +
-+REGISTER_DEFINITION(Register, j_rarg0);
-+REGISTER_DEFINITION(Register, j_rarg1);
-+REGISTER_DEFINITION(Register, j_rarg2);
-+REGISTER_DEFINITION(Register, j_rarg3);
-+REGISTER_DEFINITION(Register, j_rarg4);
-+REGISTER_DEFINITION(Register, j_rarg5);
-+REGISTER_DEFINITION(Register, j_rarg6);
-+REGISTER_DEFINITION(Register, j_rarg7);
++  srli(len, len, LogBytesPerWord);
++  andi(t0, len, unroll - 1);  // t0 = cnt % unroll
++  sub(len, len, t0);          // cnt -= unroll
++  // tmp always points to the end of the region we're about to zero
++  shadd(tmp, t0, addr, t1, LogBytesPerWord);
++  la(t1, entry);
++  slli(t0, t0, 2);
++  sub(t1, t1, t0);
++  jr(t1);
++  bind(loop);
++  sub(len, len, unroll);
++  for (int i = -unroll; i < 0; i++) {
++    Assembler::sd(zr, Address(tmp, i * wordSize));
++  }
++  bind(entry);
++  add(tmp, tmp, unroll * wordSize);
++  bnez(len, loop);
++}
 +
-+REGISTER_DEFINITION(FloatRegister, j_farg0);
-+REGISTER_DEFINITION(FloatRegister, j_farg1);
-+REGISTER_DEFINITION(FloatRegister, j_farg2);
-+REGISTER_DEFINITION(FloatRegister, j_farg3);
-+REGISTER_DEFINITION(FloatRegister, j_farg4);
-+REGISTER_DEFINITION(FloatRegister, j_farg5);
-+REGISTER_DEFINITION(FloatRegister, j_farg6);
-+REGISTER_DEFINITION(FloatRegister, j_farg7);
++// shift left by shamt and add
++// Rd = (Rs1 << shamt) + Rs2
++void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
++  if (UseRVB) {
++    if (shamt == 1) {
++      sh1add(Rd, Rs1, Rs2);
++      return;
++    } else if (shamt == 2) {
++      sh2add(Rd, Rs1, Rs2);
++      return;
++    } else if (shamt == 3) {
++      sh3add(Rd, Rs1, Rs2);
++      return;
++    }
++  }
 +
-+REGISTER_DEFINITION(Register, zr);
-+REGISTER_DEFINITION(Register, gp);
-+REGISTER_DEFINITION(Register, tp);
-+REGISTER_DEFINITION(Register, xmethod);
-+REGISTER_DEFINITION(Register, ra);
-+REGISTER_DEFINITION(Register, sp);
-+REGISTER_DEFINITION(Register, fp);
-+REGISTER_DEFINITION(Register, xheapbase);
-+REGISTER_DEFINITION(Register, xcpool);
-+REGISTER_DEFINITION(Register, xmonitors);
-+REGISTER_DEFINITION(Register, xlocals);
-+REGISTER_DEFINITION(Register, xthread);
-+REGISTER_DEFINITION(Register, xbcp);
-+REGISTER_DEFINITION(Register, xdispatch);
-+REGISTER_DEFINITION(Register, esp);
++  if (shamt != 0) {
++    slli(tmp, Rs1, shamt);
++    add(Rd, Rs2, tmp);
++  } else {
++    add(Rd, Rs1, Rs2);
++  }
++}
 +
-+REGISTER_DEFINITION(Register, t0);
-+REGISTER_DEFINITION(Register, t1);
-+REGISTER_DEFINITION(Register, t2);
-diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
-new file mode 100644
-index 000000000..76215ef2a
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/register_riscv.cpp
-@@ -0,0 +1,69 @@
-+/*
-+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
++  if (UseRVB) {
++    if (bits == 16) {
++      zext_h(dst, src);
++      return;
++    } else if (bits == 32) {
++      zext_w(dst, src);
++      return;
++    }
++  }
 +
-+#include "precompiled.hpp"
-+#include "register_riscv.hpp"
++  if (bits == 8) {
++    zext_b(dst, src);
++  } else {
++    slli(dst, src, XLEN - bits);
++    srli(dst, dst, XLEN - bits);
++  }
++}
 +
-+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers *
-+                                          RegisterImpl::max_slots_per_register;
-+const int ConcreteRegisterImpl::max_fpr =
-+    ConcreteRegisterImpl::max_gpr +
-+    FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
++void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
++  if (UseRVB) {
++    if (bits == 8) {
++      sext_b(dst, src);
++      return;
++    } else if (bits == 16) {
++      sext_h(dst, src);
++      return;
++    }
++  }
 +
-+const int ConcreteRegisterImpl::max_vpr =
-+    ConcreteRegisterImpl::max_fpr +
-+    VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register;
++  if (bits == 32) {
++    sext_w(dst, src);
++  } else {
++    slli(dst, src, XLEN - bits);
++    srai(dst, dst, XLEN - bits);
++  }
++}
 +
++void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
++{
++  if (src1 == src2) {
++    mv(dst, zr);
++    return;
++  }
++  Label done;
++  Register left = src1;
++  Register right = src2;
++  if (dst == src1) {
++    assert_different_registers(dst, src2, tmp);
++    mv(tmp, src1);
++    left = tmp;
++  } else if (dst == src2) {
++    assert_different_registers(dst, src1, tmp);
++    mv(tmp, src2);
++    right = tmp;
++  }
 +
-+const char* RegisterImpl::name() const {
-+  const char* names[number_of_registers] = {
-+    "zr", "ra", "sp", "gp", "tp", "x5", "x6", "x7", "fp", "x9",
-+    "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
-+    "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals",
-+    "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod"
-+  };
-+  return is_valid() ? names[encoding()] : "noreg";
++  // installs 1 if gt else 0
++  slt(dst, right, left);
++  bnez(dst, done);
++  slt(dst, left, right);
++  // dst = -1 if lt; else if eq , dst = 0
++  neg(dst, dst);
++  bind(done);
 +}
 +
-+const char* FloatRegisterImpl::name() const {
-+  const char* names[number_of_registers] = {
-+    "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
-+    "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
-+    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
-+    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
-+  };
-+  return is_valid() ? names[encoding()] : "noreg";
++void MacroAssembler::safepoint_ifence() {
++  ifence();
++#ifndef PRODUCT
++  if (VerifyCrossModifyFence) {
++    // Clear the thread state.
++    sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
++  }
++#endif
 +}
 +
-+const char* VectorRegisterImpl::name() const {
-+  const char* names[number_of_registers] = {
-+    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
-+    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
-+    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
-+    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
-+  };
-+  return is_valid() ? names[encoding()] : "noreg";
++#ifndef PRODUCT
++void MacroAssembler::verify_cross_modify_fence_not_required() {
++  if (VerifyCrossModifyFence) {
++    // Check if thread needs a cross modify fence.
++    lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
++    Label fence_not_required;
++    beqz(t0, fence_not_required);
++    // If it does then fail.
++    la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure)));
++    mv(c_rarg0, xthread);
++    jalr(t0);
++    bind(fence_not_required);
++  }
 +}
-diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
++#endif
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
 new file mode 100644
-index 000000000..8beba6776
+index 00000000000..23e09475be1
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/register_riscv.hpp
-@@ -0,0 +1,337 @@
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -0,0 +1,858 @@
 +/*
-+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -27087,22119 +26571,23371 @@ index 000000000..8beba6776
 + *
 + */
 +
-+#ifndef CPU_RISCV_REGISTER_RISCV_HPP
-+#define CPU_RISCV_REGISTER_RISCV_HPP
-+
-+#include "asm/register.hpp"
-+
-+#define CSR_FFLAGS   0x001        // Floating-Point Accrued Exceptions.
-+#define CSR_FRM      0x002        // Floating-Point Dynamic Rounding Mode.
-+#define CSR_FCSR     0x003        // Floating-Point Control and Status Register (frm + fflags).
-+#define CSR_VSTART   0x008        // Vector start position
-+#define CSR_VXSAT    0x009        // Fixed-Point Saturate Flag
-+#define CSR_VXRM     0x00A        // Fixed-Point Rounding Mode
-+#define CSR_VCSR     0x00F        // Vector control and status register
-+#define CSR_VL       0xC20        // Vector length
-+#define CSR_VTYPE    0xC21        // Vector data type register
-+#define CSR_VLENB    0xC22        // VLEN/8 (vector register length in bytes)
-+#define CSR_CYCLE    0xc00        // Cycle counter for RDCYCLE instruction.
-+#define CSR_TIME     0xc01        // Timer for RDTIME instruction.
-+#define CSR_INSTERT  0xc02        // Instructions-retired counter for RDINSTRET instruction.
++#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
++#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
 +
-+class VMRegImpl;
-+typedef VMRegImpl* VMReg;
++#include "asm/assembler.hpp"
++#include "metaprogramming/enableIf.hpp"
++#include "oops/compressedOops.hpp"
++#include "utilities/powerOfTwo.hpp"
 +
-+// Use Register as shortcut
-+class RegisterImpl;
-+typedef RegisterImpl* Register;
++// MacroAssembler extends Assembler by frequently used macros.
++//
++// Instructions for which a 'better' code sequence exists depending
++// on arguments should also go in here.
 +
-+inline Register as_Register(int encoding) {
-+  return (Register)(intptr_t) encoding;
-+}
++class MacroAssembler: public Assembler {
 +
-+class RegisterImpl: public AbstractRegisterImpl {
 + public:
-+  enum {
-+    number_of_registers      = 32,
-+    number_of_byte_registers = 32,
-+    max_slots_per_register   = 2
-+  };
-+
-+  // derived registers, offsets, and addresses
-+  Register successor() const                          { return as_Register(encoding() + 1); }
++  MacroAssembler(CodeBuffer* code) : Assembler(code) {
++  }
++  virtual ~MacroAssembler() {}
 +
-+  // construction
-+  inline friend Register as_Register(int encoding);
++  void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
 +
-+  VMReg as_VMReg();
++  // Place a fence.i after code may have been modified due to a safepoint.
++  void safepoint_ifence();
 +
-+  // accessors
-+  int   encoding() const                         { assert(is_valid(), "invalid register"); return (intptr_t)this; }
-+  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
-+  bool  has_byte_register() const                { return 0 <= (intptr_t)this && (intptr_t)this < number_of_byte_registers; }
-+  const char* name() const;
-+  int   encoding_nocheck() const                 { return (intptr_t)this; }
++  // Alignment
++  void align(int modulus, int extra_offset = 0);
 +
-+  // Return the bit which represents this register.  This is intended
-+  // to be ORed into a bitmask: for usage see class RegSet below.
-+  unsigned long bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; }
-+};
++  // Stack frame creation/removal
++  // Note that SP must be updated to the right place before saving/restoring RA and FP
++  // because signal based thread suspend/resume could happen asynchronously.
++  void enter() {
++    addi(sp, sp, - 2 * wordSize);
++    sd(ra, Address(sp, wordSize));
++    sd(fp, Address(sp));
++    addi(fp, sp, 2 * wordSize);
++  }
 +
-+// The integer registers of the riscv architecture
++  void leave() {
++    addi(sp, fp, - 2 * wordSize);
++    ld(fp, Address(sp));
++    ld(ra, Address(sp, wordSize));
++    addi(sp, sp, 2 * wordSize);
++  }
 +
-+CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
 +
-+CONSTANT_REGISTER_DECLARATION(Register, x0,    (0));
-+CONSTANT_REGISTER_DECLARATION(Register, x1,    (1));
-+CONSTANT_REGISTER_DECLARATION(Register, x2,    (2));
-+CONSTANT_REGISTER_DECLARATION(Register, x3,    (3));
-+CONSTANT_REGISTER_DECLARATION(Register, x4,    (4));
-+CONSTANT_REGISTER_DECLARATION(Register, x5,    (5));
-+CONSTANT_REGISTER_DECLARATION(Register, x6,    (6));
-+CONSTANT_REGISTER_DECLARATION(Register, x7,    (7));
-+CONSTANT_REGISTER_DECLARATION(Register, x8,    (8));
-+CONSTANT_REGISTER_DECLARATION(Register, x9,    (9));
-+CONSTANT_REGISTER_DECLARATION(Register, x10,  (10));
-+CONSTANT_REGISTER_DECLARATION(Register, x11,  (11));
-+CONSTANT_REGISTER_DECLARATION(Register, x12,  (12));
-+CONSTANT_REGISTER_DECLARATION(Register, x13,  (13));
-+CONSTANT_REGISTER_DECLARATION(Register, x14,  (14));
-+CONSTANT_REGISTER_DECLARATION(Register, x15,  (15));
-+CONSTANT_REGISTER_DECLARATION(Register, x16,  (16));
-+CONSTANT_REGISTER_DECLARATION(Register, x17,  (17));
-+CONSTANT_REGISTER_DECLARATION(Register, x18,  (18));
-+CONSTANT_REGISTER_DECLARATION(Register, x19,  (19));
-+CONSTANT_REGISTER_DECLARATION(Register, x20,  (20));
-+CONSTANT_REGISTER_DECLARATION(Register, x21,  (21));
-+CONSTANT_REGISTER_DECLARATION(Register, x22,  (22));
-+CONSTANT_REGISTER_DECLARATION(Register, x23,  (23));
-+CONSTANT_REGISTER_DECLARATION(Register, x24,  (24));
-+CONSTANT_REGISTER_DECLARATION(Register, x25,  (25));
-+CONSTANT_REGISTER_DECLARATION(Register, x26,  (26));
-+CONSTANT_REGISTER_DECLARATION(Register, x27,  (27));
-+CONSTANT_REGISTER_DECLARATION(Register, x28,  (28));
-+CONSTANT_REGISTER_DECLARATION(Register, x29,  (29));
-+CONSTANT_REGISTER_DECLARATION(Register, x30,  (30));
-+CONSTANT_REGISTER_DECLARATION(Register, x31,  (31));
++  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
++  // The pointer will be loaded into the thread register.
++  void get_thread(Register thread);
 +
-+// Use FloatRegister as shortcut
-+class FloatRegisterImpl;
-+typedef FloatRegisterImpl* FloatRegister;
++  // Support for VM calls
++  //
++  // It is imperative that all calls into the VM are handled via the call_VM macros.
++  // They make sure that the stack linkage is setup correctly. call_VM's correspond
++  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
 +
-+inline FloatRegister as_FloatRegister(int encoding) {
-+  return (FloatRegister)(intptr_t) encoding;
-+}
++  void call_VM(Register oop_result,
++               address entry_point,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
 +
-+// The implementation of floating point registers for the architecture
-+class FloatRegisterImpl: public AbstractRegisterImpl {
-+ public:
-+  enum {
-+    number_of_registers     = 32,
-+    max_slots_per_register  = 2
-+  };
++  // Overloadings with last_Java_sp
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               int number_of_arguments = 0,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
 +
-+  // construction
-+  inline friend FloatRegister as_FloatRegister(int encoding);
++  void get_vm_result(Register oop_result, Register java_thread);
++  void get_vm_result_2(Register metadata_result, Register java_thread);
 +
-+  VMReg as_VMReg();
++  // These always tightly bind to MacroAssembler::call_VM_leaf_base
++  // bypassing the virtual implementation
++  void call_VM_leaf(address entry_point,
++                    int number_of_arguments = 0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_0, Register arg_1);
++  void call_VM_leaf(address entry_point,
++                    Register arg_0, Register arg_1, Register arg_2);
 +
-+  // derived registers, offsets, and addresses
-+  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
++  // These always tightly bind to MacroAssembler::call_VM_base
++  // bypassing the virtual implementation
++  void super_call_VM_leaf(address entry_point, Register arg_0);
++  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
++  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
++  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
 +
-+  // accessors
-+  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
-+  int   encoding_nocheck() const                         { return (intptr_t)this; }
-+  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
-+  const char* name() const;
++  // last Java Frame (fills frame anchor)
++  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
++  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
++  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp);
 +
-+};
++  // thread in the default location (xthread)
++  void reset_last_Java_frame(bool clear_fp);
 +
-+// The float registers of the RISCV architecture
++  void call_native(address entry_point,
++                   Register arg_0);
++  void call_native_base(
++    address entry_point,                // the entry point
++    Label*  retaddr = NULL
++  );
 +
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
++  virtual void call_VM_leaf_base(
++    address entry_point,                // the entry point
++    int     number_of_arguments,        // the number of arguments to pop after the call
++    Label*  retaddr = NULL
++  );
 +
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
++  virtual void call_VM_leaf_base(
++    address entry_point,                // the entry point
++    int     number_of_arguments,        // the number of arguments to pop after the call
++    Label&  retaddr) {
++    call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
++  }
 +
-+// Use VectorRegister as shortcut
-+class VectorRegisterImpl;
-+typedef VectorRegisterImpl* VectorRegister;
++  virtual void call_VM_base(           // returns the register containing the thread upon return
++    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
++    Register java_thread,              // the thread if computed before     ; use noreg otherwise
++    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
++    address  entry_point,              // the entry point
++    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
++    bool     check_exceptions          // whether to check for pending exceptions after return
++  );
 +
-+inline VectorRegister as_VectorRegister(int encoding) {
-+  return (VectorRegister)(intptr_t) encoding;
-+}
++  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
 +
-+// The implementation of vector registers for riscv-v
-+class VectorRegisterImpl: public AbstractRegisterImpl {
-+ public:
-+  enum {
-+    number_of_registers    = 32,
-+    max_slots_per_register = 4
-+  };
++  virtual void check_and_handle_earlyret(Register java_thread);
++  virtual void check_and_handle_popframe(Register java_thread);
 +
-+  // construction
-+  inline friend VectorRegister as_VectorRegister(int encoding);
++  void resolve_weak_handle(Register result, Register tmp);
++  void resolve_oop_handle(Register result, Register tmp = x15);
++  void resolve_jobject(Register value, Register thread, Register tmp);
 +
-+  VMReg as_VMReg();
++  void movoop(Register dst, jobject obj, bool immediate = false);
++  void mov_metadata(Register dst, Metadata* obj);
++  void bang_stack_size(Register size, Register tmp);
++  void set_narrow_oop(Register dst, jobject obj);
++  void set_narrow_klass(Register dst, Klass* k);
 +
-+  // derived registers, offsets, and addresses
-+  VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
++  void load_mirror(Register dst, Register method, Register tmp = x15);
++  void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
++                      Address src, Register tmp1, Register thread_tmp);
++  void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
++                       Register src, Register tmp1, Register thread_tmp);
++  void load_klass(Register dst, Register src);
++  void store_klass(Register dst, Register src);
++  void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L);
 +
-+  // accessors
-+  int encoding() const             { assert(is_valid(), "invalid register"); return (intptr_t)this; }
-+  int encoding_nocheck() const     { return (intptr_t)this; }
-+  bool is_valid() const            { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
-+  const char* name() const;
++  void encode_klass_not_null(Register r);
++  void decode_klass_not_null(Register r);
++  void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
++  void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
++  void decode_heap_oop_not_null(Register r);
++  void decode_heap_oop_not_null(Register dst, Register src);
++  void decode_heap_oop(Register d, Register s);
++  void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
++  void encode_heap_oop(Register d, Register s);
++  void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
++  void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
++                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
++                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
++                      Register thread_tmp = noreg, DecoratorSet decorators = 0);
 +
-+};
++  void store_klass_gap(Register dst, Register src);
 +
-+// The vector registers of RVV
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1));
++  // currently unimplemented
++  // Used for storing NULL. All other oop constants should be
++  // stored using routines that take a jobject.
++  void store_heap_oop_null(Address dst);
 +
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v0     , ( 0));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v1     , ( 1));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v2     , ( 2));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v3     , ( 3));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v4     , ( 4));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v5     , ( 5));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v6     , ( 6));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v7     , ( 7));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v8     , ( 8));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v9     , ( 9));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v10    , (10));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v11    , (11));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v12    , (12));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v13    , (13));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v14    , (14));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v15    , (15));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v16    , (16));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v17    , (17));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v18    , (18));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v19    , (19));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v20    , (20));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v21    , (21));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v22    , (22));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v23    , (23));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v24    , (24));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v25    , (25));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v26    , (26));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v27    , (27));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v28    , (28));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v29    , (29));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v30    , (30));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v31    , (31));
++  // This dummy is to prevent a call to store_heap_oop from
++  // converting a zero (linke NULL) into a Register by giving
++  // the compiler two choices it can't resolve
 +
++  void store_heap_oop(Address dst, void* dummy);
 +
-+// Need to know the total number of registers of all sorts for SharedInfo.
-+// Define a class that exports it.
-+class ConcreteRegisterImpl : public AbstractRegisterImpl {
-+ public:
-+  enum {
-+  // A big enough number for C2: all the registers plus flags
-+  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
-+  // There is no requirement that any ordering here matches any ordering c2 gives
-+  // it's optoregs.
++  // Support for NULL-checks
++  //
++  // Generates code that causes a NULL OS exception if the content of reg is NULL.
++  // If the accessed location is M[reg + offset] and the offset is known, provide the
++  // offset. No explicit code generateion is needed if the offset is within a certain
++  // range (0 <= offset <= page_size).
 +
-+    number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
-+                           FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
-+                           VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers)
-+  };
++  virtual void null_check(Register reg, int offset = -1);
++  static bool needs_explicit_null_check(intptr_t offset);
++  static bool uses_implicit_null_check(void* address);
 +
-+  // added to make it compile
-+  static const int max_gpr;
-+  static const int max_fpr;
-+  static const int max_vpr;
-+};
++  // idiv variant which deals with MINLONG as dividend and -1 as divisor
++  int corrected_idivl(Register result, Register rs1, Register rs2,
++                      bool want_remainder);
++  int corrected_idivq(Register result, Register rs1, Register rs2,
++                      bool want_remainder);
 +
-+// A set of registers
-+class RegSet {
-+  uint32_t _bitset;
++  // interface method calling
++  void lookup_interface_method(Register recv_klass,
++                               Register intf_klass,
++                               RegisterOrConstant itable_index,
++                               Register method_result,
++                               Register scan_tmp,
++                               Label& no_such_interface,
++                               bool return_method = true);
 +
-+public:
-+  RegSet(uint32_t bitset) : _bitset(bitset) { }
++  // virtual method calling
++  // n.n. x86 allows RegisterOrConstant for vtable_index
++  void lookup_virtual_method(Register recv_klass,
++                             RegisterOrConstant vtable_index,
++                             Register method_result);
 +
-+  RegSet() : _bitset(0) { }
++  // Form an addres from base + offset in Rd. Rd my or may not
++  // actually be used: you must use the Address that is returned. It
++  // is up to you to ensure that the shift provided mathces the size
++  // of your data.
++  Address form_address(Register Rd, Register base, long byte_offset);
 +
-+  RegSet(Register r1) : _bitset(r1->bit()) { }
++  // allocation
++  void tlab_allocate(
++    Register obj,                   // result: pointer to object after successful allocation
++    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,     // object size in bytes if   known at compile time
++    Register tmp1,                  // temp register
++    Register tmp2,                  // temp register
++    Label&   slow_case,             // continuation point of fast allocation fails
++    bool is_far = false
++  );
 +
-+  ~RegSet() {}
++  void eden_allocate(
++    Register obj,                   // result: pointer to object after successful allocation
++    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,     // object size in bytes if   known at compile time
++    Register tmp,                   // temp register
++    Label&   slow_case,             // continuation point if fast allocation fails
++    bool is_far = false
++  );
 +
-+  RegSet operator+(const RegSet aSet) const {
-+    RegSet result(_bitset | aSet._bitset);
-+    return result;
-+  }
++  // Test sub_klass against super_klass, with fast and slow paths.
 +
-+  RegSet operator-(const RegSet aSet) const {
-+    RegSet result(_bitset & ~aSet._bitset);
-+    return result;
-+  }
++  // The fast path produces a tri-state answer: yes / no / maybe-slow.
++  // One of the three labels can be NULL, meaning take the fall-through.
++  // If super_check_offset is -1, the value is loaded up from super_klass.
++  // No registers are killed, except tmp_reg
++  void check_klass_subtype_fast_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register tmp_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     Label* L_slow_path,
++                                     Register super_check_offset = noreg);
 +
-+  RegSet &operator+=(const RegSet aSet) {
-+    *this = *this + aSet;
-+    return *this;
-+  }
++  // The reset of the type cehck; must be wired to a corresponding fast path.
++  // It does not repeat the fast path logic, so don't use it standalone.
++  // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable.
++  // Updates the sub's secondary super cache as necessary.
++  void check_klass_subtype_slow_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register tmp1_reg,
++                                     Register tmp2_reg,
++                                     Label* L_success,
++                                     Label* L_failure);
 +
-+  static RegSet of(Register r1) {
-+    return RegSet(r1);
-+  }
++  void check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register tmp_reg,
++                           Label& L_success);
 +
-+  static RegSet of(Register r1, Register r2) {
-+    return of(r1) + r2;
-+  }
++  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 +
-+  static RegSet of(Register r1, Register r2, Register r3) {
-+    return of(r1, r2) + r3;
-+  }
++  // only if +VerifyOops
++  void verify_oop(Register reg, const char* s = "broken oop");
++  void verify_oop_addr(Address addr, const char* s = "broken oop addr");
 +
-+  static RegSet of(Register r1, Register r2, Register r3, Register r4) {
-+    return of(r1, r2, r3) + r4;
-+  }
++  void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
++  void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
 +
-+  static RegSet range(Register start, Register end) {
-+    uint32_t bits = ~0;
-+    bits <<= start->encoding();
-+    bits <<= (31 - end->encoding());
-+    bits >>= (31 - end->encoding());
++#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
++#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
 +
-+    return RegSet(bits);
-+  }
++  // A more convenient access to fence for our purposes
++  // We used four bit to indicate the read and write bits in the predecessors and successors,
++  // and extended i for r, o for w if UseConservativeFence enabled.
++  enum Membar_mask_bits {
++    StoreStore = 0b0101,               // (pred = ow   + succ =   ow)
++    LoadStore  = 0b1001,               // (pred = ir   + succ =   ow)
++    StoreLoad  = 0b0110,               // (pred = ow   + succ =   ir)
++    LoadLoad   = 0b1010,               // (pred = ir   + succ =   ir)
++    AnyAny     = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
++  };
 +
-+  uint32_t bits() const { return _bitset; }
-+};
++  void membar(uint32_t order_constraint);
 +
-+#endif // CPU_RISCV_REGISTER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
-new file mode 100644
-index 000000000..f49fd6439
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
-@@ -0,0 +1,113 @@
-+/*
-+ * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) {
++    predecessor = (order_constraint >> 2) & 0x3;
++    successor = order_constraint & 0x3;
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "code/relocInfo.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "runtime/safepoint.hpp"
++    // extend rw -> iorw:
++    // 01(w) -> 0101(ow)
++    // 10(r) -> 1010(ir)
++    // 11(rw)-> 1111(iorw)
++    if (UseConservativeFence) {
++      predecessor |= predecessor << 2;
++      successor |= successor << 2;
++    }
++  }
 +
-+void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
-+  if (verify_only) {
-+    return;
++  static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
++    return ((predecessor & 0x3) << 2) | (successor & 0x3);
 +  }
 +
-+  int bytes;
++  // prints msg, dumps registers and stops execution
++  void stop(const char* msg);
 +
-+  switch(type()) {
-+    case relocInfo::oop_type: {
-+      oop_Relocation *reloc = (oop_Relocation *)this;
-+      // in movoop when immediate == false
-+      if (NativeInstruction::is_load_pc_relative_at(addr())) {
-+        address constptr = (address)code()->oop_addr_at(reloc->oop_index());
-+        bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr);
-+        assert(*(address*)constptr == x, "error in oop relocation");
-+      } else {
-+        bytes = MacroAssembler::patch_oop(addr(), x);
-+      }
-+      break;
-+    }
-+    default:
-+      bytes = MacroAssembler::pd_patch_instruction_size(addr(), x);
-+      break;
-+  }
-+  ICache::invalidate_range(addr(), bytes);
-+}
++  static void debug64(char* msg, int64_t pc, int64_t regs[]);
 +
-+address Relocation::pd_call_destination(address orig_addr) {
-+  assert(is_call(), "should be an address instruction here");
-+  if (NativeCall::is_call_at(addr())) {
-+    address trampoline = nativeCall_at(addr())->get_trampoline();
-+    if (trampoline != NULL) {
-+      return nativeCallTrampolineStub_at(trampoline)->destination();
-+    }
++  void unimplemented(const char* what = "");
++
++  void should_not_reach_here() { stop("should not reach here"); }
++
++  static address target_addr_for_insn(address insn_addr);
++
++  // Required platform-specific helpers for Label::patch_instructions.
++  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
++  static int pd_patch_instruction_size(address branch, address target);
++  static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) {
++    pd_patch_instruction_size(branch, target);
 +  }
-+  if (orig_addr != NULL) {
-+    // the extracted address from the instructions in address orig_addr
-+    address new_addr = MacroAssembler::pd_call_destination(orig_addr);
-+    // If call is branch to self, don't try to relocate it, just leave it
-+    // as branch to self. This happens during code generation if the code
-+    // buffer expands. It will be relocated to the trampoline above once
-+    // code generation is complete.
-+    new_addr = (new_addr == orig_addr) ? addr() : new_addr;
-+    return new_addr;
++  static address pd_call_destination(address branch) {
++    return target_addr_for_insn(branch);
 +  }
-+  return MacroAssembler::pd_call_destination(addr());
-+}
 +
-+void Relocation::pd_set_call_destination(address x) {
-+  assert(is_call(), "should be an address instruction here");
-+  if (NativeCall::is_call_at(addr())) {
-+    address trampoline = nativeCall_at(addr())->get_trampoline();
-+    if (trampoline != NULL) {
-+      nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false);
-+      return;
-+    }
-+  }
-+  MacroAssembler::pd_patch_instruction_size(addr(), x);
-+  address pd_call = pd_call_destination(addr());
-+  assert(pd_call == x, "fail in reloc");
-+}
++  static int patch_oop(address insn_addr, address o);
++  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
++  void emit_static_call_stub();
 +
-+address* Relocation::pd_address_in_code() {
-+  assert(NativeCall::is_load_pc_relative_at(addr()), "Not the expected instruction sequence!");
-+  return (address*)(MacroAssembler::target_addr_for_insn(addr()));
-+}
++  // The following 4 methods return the offset of the appropriate move instruction
 +
-+address Relocation::pd_get_address_from_code() {
-+  return MacroAssembler::pd_call_destination(addr());
-+}
++  // Support for fast byte/short loading with zero extension (depending on particular CPU)
++  int load_unsigned_byte(Register dst, Address src);
++  int load_unsigned_short(Register dst, Address src);
 +
-+void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
-+  if (NativeInstruction::maybe_cpool_ref(addr())) {
-+    address old_addr = old_addr_for(addr(), src, dest);
-+    MacroAssembler::pd_patch_instruction_size(addr(), MacroAssembler::target_addr_for_insn(old_addr));
-+  }
-+}
++  // Support for fast byte/short loading with sign extension (depending on particular CPU)
++  int load_signed_byte(Register dst, Address src);
++  int load_signed_short(Register dst, Address src);
 +
-+void metadata_Relocation::pd_fix_value(address x) {
-+}
-diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
-new file mode 100644
-index 000000000..c30150e0a
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
-@@ -0,0 +1,45 @@
-+/*
-+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef CPU_RISCV_RELOCINFO_RISCV_HPP
-+#define CPU_RISCV_RELOCINFO_RISCV_HPP
-+
-+  // machine-dependent parts of class relocInfo
-+ private:
-+  enum {
-+    // Relocations are byte-aligned.
-+    offset_unit        =  1,
-+    // We don't use format().
-+    format_width       =  0
-+  };
++  // Load and store values by size and signed-ness
++  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
++  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
 +
 + public:
++  // Standard pseudoinstruction
++  void nop();
++  void mv(Register Rd, Register Rs);
++  void notr(Register Rd, Register Rs);
++  void neg(Register Rd, Register Rs);
++  void negw(Register Rd, Register Rs);
++  void sext_w(Register Rd, Register Rs);
++  void zext_b(Register Rd, Register Rs);
++  void seqz(Register Rd, Register Rs);          // set if = zero
++  void snez(Register Rd, Register Rs);          // set if != zero
++  void sltz(Register Rd, Register Rs);          // set if < zero
++  void sgtz(Register Rd, Register Rs);          // set if > zero
 +
-+  // This platform has no oops in the code that are not also
-+  // listed in the oop section.
-+  static bool mustIterateImmediateOopsInCode() { return false; }
++  // Float pseudoinstruction
++  void fmv_s(FloatRegister Rd, FloatRegister Rs);
++  void fabs_s(FloatRegister Rd, FloatRegister Rs);    // single-precision absolute value
++  void fneg_s(FloatRegister Rd, FloatRegister Rs);
 +
-+#endif // CPU_RISCV_RELOCINFO_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-new file mode 100644
-index 000000000..137e9b7c7
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -0,0 +1,10685 @@
-+//
-+// Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+// Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
-+// Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
-+//
-+//
++  // Double pseudoinstruction
++  void fmv_d(FloatRegister Rd, FloatRegister Rs);
++  void fabs_d(FloatRegister Rd, FloatRegister Rs);
++  void fneg_d(FloatRegister Rd, FloatRegister Rs);
 +
-+// RISCV Architecture Description File
++  // Pseudoinstruction for control and status register
++  void rdinstret(Register Rd);                  // read instruction-retired counter
++  void rdcycle(Register Rd);                    // read cycle counter
++  void rdtime(Register Rd);                     // read time
++  void csrr(Register Rd, unsigned csr);         // read csr
++  void csrw(unsigned csr, Register Rs);         // write csr
++  void csrs(unsigned csr, Register Rs);         // set bits in csr
++  void csrc(unsigned csr, Register Rs);         // clear bits in csr
++  void csrwi(unsigned csr, unsigned imm);
++  void csrsi(unsigned csr, unsigned imm);
++  void csrci(unsigned csr, unsigned imm);
++  void frcsr(Register Rd);                      // read float-point csr
++  void fscsr(Register Rd, Register Rs);         // swap float-point csr
++  void fscsr(Register Rs);                      // write float-point csr
++  void frrm(Register Rd);                       // read float-point rounding mode
++  void fsrm(Register Rd, Register Rs);          // swap float-point rounding mode
++  void fsrm(Register Rs);                       // write float-point rounding mode
++  void fsrmi(Register Rd, unsigned imm);
++  void fsrmi(unsigned imm);
++  void frflags(Register Rd);                    // read float-point exception flags
++  void fsflags(Register Rd, Register Rs);       // swap float-point exception flags
++  void fsflags(Register Rs);                    // write float-point exception flags
++  void fsflagsi(Register Rd, unsigned imm);
++  void fsflagsi(unsigned imm);
 +
-+//----------REGISTER DEFINITION BLOCK------------------------------------------
-+// This information is used by the matcher and the register allocator to
-+// describe individual registers and classes of registers within the target
-+// archtecture.
++  void beqz(Register Rs, const address &dest);
++  void bnez(Register Rs, const address &dest);
++  void blez(Register Rs, const address &dest);
++  void bgez(Register Rs, const address &dest);
++  void bltz(Register Rs, const address &dest);
++  void bgtz(Register Rs, const address &dest);
++  void la(Register Rd, Label &label);
++  void la(Register Rd, const address &dest);
++  void la(Register Rd, const Address &adr);
++  //label
++  void beqz(Register Rs, Label &l, bool is_far = false);
++  void bnez(Register Rs, Label &l, bool is_far = false);
++  void blez(Register Rs, Label &l, bool is_far = false);
++  void bgez(Register Rs, Label &l, bool is_far = false);
++  void bltz(Register Rs, Label &l, bool is_far = false);
++  void bgtz(Register Rs, Label &l, bool is_far = false);
++  void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 +
-+register %{
-+//----------Architecture Description Register Definitions----------------------
-+// General Registers
-+// "reg_def"  name ( register save type, C convention save type,
-+//                   ideal register type, encoding );
-+// Register Save Types:
-+//
-+// NS  = No-Save:       The register allocator assumes that these registers
-+//                      can be used without saving upon entry to the method, &
-+//                      that they do not need to be saved at call sites.
-+//
-+// SOC = Save-On-Call:  The register allocator assumes that these registers
-+//                      can be used without saving upon entry to the method,
-+//                      but that they must be saved at call sites.
-+//
-+// SOE = Save-On-Entry: The register allocator assumes that these registers
-+//                      must be saved before using them upon entry to the
-+//                      method, but they do not need to be saved at call
-+//                      sites.
-+//
-+// AS  = Always-Save:   The register allocator assumes that these registers
-+//                      must be saved before using them upon entry to the
-+//                      method, & that they must be saved at call sites.
-+//
-+// Ideal Register Type is used to determine how to save & restore a
-+// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
-+// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
-+//
-+// The encoding number is the actual bit-pattern placed into the opcodes.
++  void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } }
++  void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } }
++  void push_reg(Register Rs);
++  void pop_reg(Register Rd);
++  int  push_reg(unsigned int bitset, Register stack);
++  int  pop_reg(unsigned int bitset, Register stack);
++  void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
++  void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
++#ifdef COMPILER2
++  void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); }
++  void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); }
++#endif // COMPILER2
 +
-+// We must define the 64 bit int registers in two 32 bit halves, the
-+// real lower register and a virtual upper half register. upper halves
-+// are used by the register allocator but are not actually supplied as
-+// operands to memory ops.
-+//
-+// follow the C1 compiler in making registers
-+//
-+//   x7, x9-x17, x28-x31 volatile (caller save)
-+//   x0-x4, x8, x27 system (no save, no allocate)
-+//   x5-x6 non-allocatable (so we can use them as temporary regs)
++  // Push and pop everything that might be clobbered by a native
++  // runtime call except t0 and t1. (They are always
++  // temporary registers, so we don't have to protect them.)
++  // Additional registers can be excluded in a passed RegSet.
++  void push_call_clobbered_registers_except(RegSet exclude);
++  void pop_call_clobbered_registers_except(RegSet exclude);
 +
-+//
-+// as regards Java usage. we don't use any callee save registers
-+// because this makes it difficult to de-optimise a frame (see comment
-+// in x86 implementation of Deoptimization::unwind_callee_save_values)
-+//
++  void push_call_clobbered_registers() {
++    push_call_clobbered_registers_except(RegSet());
++  }
++  void pop_call_clobbered_registers() {
++    pop_call_clobbered_registers_except(RegSet());
++  }
 +
-+// General Registers
++  void pusha();
++  void popa();
++  void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
++  void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
 +
-+reg_def R0      ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()         ); // zr
-+reg_def R0_H    ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()->next() );
-+reg_def R1      ( SOC, SOC, Op_RegI, 1,  x1->as_VMReg()         ); // ra
-+reg_def R1_H    ( SOC, SOC, Op_RegI, 1,  x1->as_VMReg()->next() );
-+reg_def R2      ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()         ); // sp
-+reg_def R2_H    ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()->next() );
-+reg_def R3      ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()         ); // gp
-+reg_def R3_H    ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()->next() );
-+reg_def R4      ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()         ); // tp
-+reg_def R4_H    ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()->next() );
-+reg_def R7      ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()         );
-+reg_def R7_H    ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()->next() );
-+reg_def R8      ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()         ); // fp
-+reg_def R8_H    ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()->next() );
-+reg_def R9      ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()         );
-+reg_def R9_H    ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()->next() );
-+reg_def R10     ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()        );
-+reg_def R10_H   ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next());
-+reg_def R11     ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()        );
-+reg_def R11_H   ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next());
-+reg_def R12     ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()        );
-+reg_def R12_H   ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next());
-+reg_def R13     ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()        );
-+reg_def R13_H   ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next());
-+reg_def R14     ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()        );
-+reg_def R14_H   ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next());
-+reg_def R15     ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()        );
-+reg_def R15_H   ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next());
-+reg_def R16     ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()        );
-+reg_def R16_H   ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next());
-+reg_def R17     ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()        );
-+reg_def R17_H   ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next());
-+reg_def R18     ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()        );
-+reg_def R18_H   ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next());
-+reg_def R19     ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()        );
-+reg_def R19_H   ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next());
-+reg_def R20     ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()        ); // caller esp
-+reg_def R20_H   ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next());
-+reg_def R21     ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()        );
-+reg_def R21_H   ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next());
-+reg_def R22     ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()        );
-+reg_def R22_H   ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next());
-+reg_def R23     ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()        ); // java thread
-+reg_def R23_H   ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()->next());
-+reg_def R24     ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()        );
-+reg_def R24_H   ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next());
-+reg_def R25     ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()        );
-+reg_def R25_H   ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next());
-+reg_def R26     ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()        );
-+reg_def R26_H   ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next());
-+reg_def R27     ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()        ); // heapbase
-+reg_def R27_H   ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next());
-+reg_def R28     ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()        );
-+reg_def R28_H   ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next());
-+reg_def R29     ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()        );
-+reg_def R29_H   ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next());
-+reg_def R30     ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()        );
-+reg_def R30_H   ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next());
-+reg_def R31     ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()        );
-+reg_def R31_H   ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next());
++  // if heap base register is used - reinit it with the correct value
++  void reinit_heapbase();
 +
-+// ----------------------------
-+// Float/Double Registers
-+// ----------------------------
++  void bind(Label& L) {
++    Assembler::bind(L);
++    // fences across basic blocks should not be merged
++    code()->clear_last_insn();
++  }
 +
-+// Double Registers
++  // mv
++  template<typename T, ENABLE_IF(std::is_integral<T>::value)>
++  inline void mv(Register Rd, T o) {
++    li(Rd, (int64_t)o);
++  }
 +
-+// The rules of ADL require that double registers be defined in pairs.
-+// Each pair must be two 32-bit values, but not necessarily a pair of
-+// single float registers. In each pair, ADLC-assigned register numbers
-+// must be adjacent, with the lower number even. Finally, when the
-+// CPU stores such a register pair to memory, the word associated with
-+// the lower ADLC-assigned number must be stored to the lower address.
++  inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
 +
-+// RISCV has 32 floating-point registers. Each can store a single
-+// or double precision floating-point value.
++  void mv(Register Rd, Address dest);
++  void mv(Register Rd, address addr);
++  void mv(Register Rd, RegisterOrConstant src);
 +
-+// for Java use float registers f0-f31 are always save on call whereas
-+// the platform ABI treats f8-f9 and f18-f27 as callee save). Other
-+// float registers are SOC as per the platform spec
++  // logic
++  void andrw(Register Rd, Register Rs1, Register Rs2);
++  void orrw(Register Rd, Register Rs1, Register Rs2);
++  void xorrw(Register Rd, Register Rs1, Register Rs2);
 +
-+reg_def F0    ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()          );
-+reg_def F0_H  ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()->next()  );
-+reg_def F1    ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()          );
-+reg_def F1_H  ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()->next()  );
-+reg_def F2    ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()          );
-+reg_def F2_H  ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()->next()  );
-+reg_def F3    ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()          );
-+reg_def F3_H  ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()->next()  );
-+reg_def F4    ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()          );
-+reg_def F4_H  ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()->next()  );
-+reg_def F5    ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()          );
-+reg_def F5_H  ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()->next()  );
-+reg_def F6    ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()          );
-+reg_def F6_H  ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()->next()  );
-+reg_def F7    ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()          );
-+reg_def F7_H  ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()->next()  );
-+reg_def F8    ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()          );
-+reg_def F8_H  ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()->next()  );
-+reg_def F9    ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()          );
-+reg_def F9_H  ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()->next()  );
-+reg_def F10   ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()         );
-+reg_def F10_H ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()->next() );
-+reg_def F11   ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()         );
-+reg_def F11_H ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()->next() );
-+reg_def F12   ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()         );
-+reg_def F12_H ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()->next() );
-+reg_def F13   ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()         );
-+reg_def F13_H ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()->next() );
-+reg_def F14   ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()         );
-+reg_def F14_H ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()->next() );
-+reg_def F15   ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()         );
-+reg_def F15_H ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()->next() );
-+reg_def F16   ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()         );
-+reg_def F16_H ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()->next() );
-+reg_def F17   ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()         );
-+reg_def F17_H ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()->next() );
-+reg_def F18   ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()         );
-+reg_def F18_H ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()->next() );
-+reg_def F19   ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()         );
-+reg_def F19_H ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()->next() );
-+reg_def F20   ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()         );
-+reg_def F20_H ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()->next() );
-+reg_def F21   ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()         );
-+reg_def F21_H ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()->next() );
-+reg_def F22   ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()         );
-+reg_def F22_H ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()->next() );
-+reg_def F23   ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()         );
-+reg_def F23_H ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()->next() );
-+reg_def F24   ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()         );
-+reg_def F24_H ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()->next() );
-+reg_def F25   ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()         );
-+reg_def F25_H ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()->next() );
-+reg_def F26   ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()         );
-+reg_def F26_H ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()->next() );
-+reg_def F27   ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()         );
-+reg_def F27_H ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()->next() );
-+reg_def F28   ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()         );
-+reg_def F28_H ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()->next() );
-+reg_def F29   ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()         );
-+reg_def F29_H ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()->next() );
-+reg_def F30   ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()         );
-+reg_def F30_H ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()->next() );
-+reg_def F31   ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()         );
-+reg_def F31_H ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()->next() );
++  // revb
++  void revb_h_h(Register Rd, Register Rs, Register tmp = t0);                           // reverse bytes in halfword in lower 16 bits, sign-extend
++  void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);      // reverse bytes in lower word, sign-extend
++  void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0);                         // reverse bytes in halfword in lower 16 bits, zero-extend
++  void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);    // reverse bytes in halfwords in lower 32 bits, zero-extend
++  void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in upper 16 bits (48:63) and move to lower
++  void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each halfword
++  void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each word
++  void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);          // reverse bytes in doubleword
 +
-+// ----------------------------
-+// Vector Registers
-+// ----------------------------
++  void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
++  void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
++  void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
 +
-+// For RVV vector registers, we simply extend vector register size to 4
-+// 'logical' slots. This is nominally 128 bits but it actually covers
-+// all possible 'physical' RVV vector register lengths from 128 ~ 1024
-+// bits. The 'physical' RVV vector register length is detected during
-+// startup, so the register allocator is able to identify the correct
-+// number of bytes needed for an RVV spill/unspill.
-+// for Java use vector registers v0-v31 are always save on call just
-+// as the platform ABI treats v0-v31 as caller save.
++  void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
++  void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
++  void cmpxchg(Register addr, Register expected,
++               Register new_val,
++               enum operand_size size,
++               Assembler::Aqrl acquire, Assembler::Aqrl release,
++               Register result, bool result_as_bool = false);
++  void cmpxchg_weak(Register addr, Register expected,
++                    Register new_val,
++                    enum operand_size size,
++                    Assembler::Aqrl acquire, Assembler::Aqrl release,
++                    Register result);
++  void cmpxchg_narrow_value_helper(Register addr, Register expected,
++                                   Register new_val,
++                                   enum operand_size size,
++                                   Register tmp1, Register tmp2, Register tmp3);
++  void cmpxchg_narrow_value(Register addr, Register expected,
++                            Register new_val,
++                            enum operand_size size,
++                            Assembler::Aqrl acquire, Assembler::Aqrl release,
++                            Register result, bool result_as_bool,
++                            Register tmp1, Register tmp2, Register tmp3);
++  void weak_cmpxchg_narrow_value(Register addr, Register expected,
++                                 Register new_val,
++                                 enum operand_size size,
++                                 Assembler::Aqrl acquire, Assembler::Aqrl release,
++                                 Register result,
++                                 Register tmp1, Register tmp2, Register tmp3);
 +
-+reg_def V0    ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()           );
-+reg_def V0_H  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next()   );
-+reg_def V0_J  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(2)  );
-+reg_def V0_K  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(3)  );
++  void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
++  void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
++  void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
++  void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
 +
-+reg_def V1    ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg() 	        );
-+reg_def V1_H  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next()   );
-+reg_def V1_J  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(2)  );
-+reg_def V1_K  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(3)  );
++  void atomic_xchg(Register prev, Register newv, Register addr);
++  void atomic_xchgw(Register prev, Register newv, Register addr);
++  void atomic_xchgal(Register prev, Register newv, Register addr);
++  void atomic_xchgalw(Register prev, Register newv, Register addr);
++  void atomic_xchgwu(Register prev, Register newv, Register addr);
++  void atomic_xchgalwu(Register prev, Register newv, Register addr);
 +
-+reg_def V2    ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()           );
-+reg_def V2_H  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next()   );
-+reg_def V2_J  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(2)  );
-+reg_def V2_K  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(3)  );
++  static bool far_branches() {
++    return ReservedCodeCacheSize > branch_range;
++  }
 +
-+reg_def V3    ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()           );
-+reg_def V3_H  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next()   );
-+reg_def V3_J  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(2)  );
-+reg_def V3_K  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(3)  );
++  // Jumps that can reach anywhere in the code cache.
++  // Trashes tmp.
++  void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
++  void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
 +
-+reg_def V4    ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()           );
-+reg_def V4_H  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next()   );
-+reg_def V4_J  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(2)  );
-+reg_def V4_K  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(3)  );
++  static int far_branch_size() {
++    if (far_branches()) {
++      return 2 * 4;  // auipc + jalr, see far_call() & far_jump()
++    } else {
++      return 4;
++    }
++  }
 +
-+reg_def V5    ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg() 	        );
-+reg_def V5_H  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next()   );
-+reg_def V5_J  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(2)  );
-+reg_def V5_K  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(3)  );
++  void load_byte_map_base(Register reg);
 +
-+reg_def V6    ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()           );
-+reg_def V6_H  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next()   );
-+reg_def V6_J  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(2)  );
-+reg_def V6_K  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(3)  );
++  void bang_stack_with_offset(int offset) {
++    // stack grows down, caller passes positive offset
++    assert(offset > 0, "must bang with negative offset");
++    sub(t0, sp, offset);
++    sd(zr, Address(t0));
++  }
 +
-+reg_def V7    ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg() 	        );
-+reg_def V7_H  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next()   );
-+reg_def V7_J  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(2)  );
-+reg_def V7_K  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(3)  );
++  void la_patchable(Register reg1, const Address &dest, int32_t &offset);
 +
-+reg_def V8    ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()           );
-+reg_def V8_H  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next()   );
-+reg_def V8_J  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(2)  );
-+reg_def V8_K  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(3)  );
++  virtual void _call_Unimplemented(address call_site) {
++    mv(t1, call_site);
++  }
 +
-+reg_def V9    ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()           );
-+reg_def V9_H  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next()   );
-+reg_def V9_J  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(2)  );
-+reg_def V9_K  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(3)  );
++  #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
 +
-+reg_def V10   ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()          );
-+reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next()  );
-+reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) );
-+reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) );
++  // Frame creation and destruction shared between JITs.
++  void build_frame(int framesize);
++  void remove_frame(int framesize);
 +
-+reg_def V11   ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()          );
-+reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next()  );
-+reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) );
-+reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) );
++  void reserved_stack_check();
 +
-+reg_def V12   ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()          );
-+reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next()  );
-+reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) );
-+reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) );
++  void get_polling_page(Register dest, relocInfo::relocType rtype);
++  address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
 +
-+reg_def V13   ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()          );
-+reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next()  );
-+reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) );
-+reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) );
++  address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
++  address ic_call(address entry, jint method_index = 0);
 +
-+reg_def V14   ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()          );
-+reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next()  );
-+reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) );
-+reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) );
++  void add_memory_int64(const Address dst, int64_t imm);
++  void add_memory_int32(const Address dst, int32_t imm);
 +
-+reg_def V15   ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()          );
-+reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next()  );
-+reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) );
-+reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) );
++  void cmpptr(Register src1, Address src2, Label& equal);
 +
-+reg_def V16   ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()          );
-+reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next()  );
-+reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) );
-+reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) );
++  void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL);
++  void load_method_holder_cld(Register result, Register method);
++  void load_method_holder(Register holder, Register method);
 +
-+reg_def V17   ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()          );
-+reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next()  );
-+reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) );
-+reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) );
++  void compute_index(Register str1, Register trailing_zeros, Register match_mask,
++                     Register result, Register char_tmp, Register tmp,
++                     bool haystack_isL);
++  void compute_match_mask(Register src, Register pattern, Register match_mask,
++                          Register mask1, Register mask2);
 +
-+reg_def V18   ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()          );
-+reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next()  );
-+reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) );
-+reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) );
++#ifdef COMPILER2
++  void mul_add(Register out, Register in, Register offset,
++               Register len, Register k, Register tmp);
++  void cad(Register dst, Register src1, Register src2, Register carry);
++  void cadc(Register dst, Register src1, Register src2, Register carry);
++  void adc(Register dst, Register src1, Register src2, Register carry);
++  void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
++                       Register src1, Register src2, Register carry);
++  void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
++                             Register y, Register y_idx, Register z,
++                             Register carry, Register product,
++                             Register idx, Register kdx);
++  void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
++                             Register y, Register y_idx, Register z,
++                             Register carry, Register product,
++                             Register idx, Register kdx);
++  void multiply_128_x_128_loop(Register y, Register z,
++                               Register carry, Register carry2,
++                               Register idx, Register jdx,
++                               Register yz_idx1, Register yz_idx2,
++                               Register tmp, Register tmp3, Register tmp4,
++                               Register tmp6, Register product_hi);
++  void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
++                       Register z, Register zlen,
++                       Register tmp1, Register tmp2, Register tmp3, Register tmp4,
++                       Register tmp5, Register tmp6, Register product_hi);
++#endif
 +
-+reg_def V19   ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()          );
-+reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next()  );
-+reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) );
-+reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) );
++  void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
++  void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
 +
-+reg_def V20   ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()          );
-+reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next()  );
-+reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) );
-+reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) );
++  void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
 +
-+reg_def V21   ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()          );
-+reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next()  );
-+reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) );
-+reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) );
++  void zero_words(Register base, u_int64_t cnt);
++  address zero_words(Register ptr, Register cnt);
++  void fill_words(Register base, Register cnt, Register value);
++  void zero_memory(Register addr, Register len, Register tmp);
 +
-+reg_def V22   ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()          );
-+reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next()  );
-+reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) );
-+reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) );
++  // shift left by shamt and add
++  void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
 +
-+reg_def V23   ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()          );
-+reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next()  );
-+reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) );
-+reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) );
++  // Here the float instructions with safe deal with some exceptions.
++  // e.g. convert from NaN, +Inf, -Inf to int, float, double
++  // will trigger exception, we need to deal with these situations
++  // to get correct results.
++  void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
++  void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
++  void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
++  void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
 +
-+reg_def V24   ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()          );
-+reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next()  );
-+reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) );
-+reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) );
++  // vector load/store unit-stride instructions
++  void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
++    switch (sew) {
++      case Assembler::e64:
++        vle64_v(vd, base, vm);
++        break;
++      case Assembler::e32:
++        vle32_v(vd, base, vm);
++        break;
++      case Assembler::e16:
++        vle16_v(vd, base, vm);
++        break;
++      case Assembler::e8: // fall through
++      default:
++        vle8_v(vd, base, vm);
++        break;
++    }
++  }
 +
-+reg_def V25   ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()          );
-+reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next()  );
-+reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) );
-+reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) );
++  void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
++    switch (sew) {
++      case Assembler::e64:
++        vse64_v(store_data, base, vm);
++        break;
++      case Assembler::e32:
++        vse32_v(store_data, base, vm);
++        break;
++      case Assembler::e16:
++        vse16_v(store_data, base, vm);
++        break;
++      case Assembler::e8: // fall through
++      default:
++        vse8_v(store_data, base, vm);
++        break;
++    }
++  }
 +
-+reg_def V26   ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()          );
-+reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next()  );
-+reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) );
-+reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) );
++  static const int zero_words_block_size;
 +
-+reg_def V27   ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()          );
-+reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next()  );
-+reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) );
-+reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) );
++  void cast_primitive_type(BasicType type, Register Rt) {
++    switch (type) {
++      case T_BOOLEAN:
++        sltu(Rt, zr, Rt);
++        break;
++      case T_CHAR   :
++        zero_extend(Rt, Rt, 16);
++        break;
++      case T_BYTE   :
++        sign_extend(Rt, Rt, 8);
++        break;
++      case T_SHORT  :
++        sign_extend(Rt, Rt, 16);
++        break;
++      case T_INT    :
++        addw(Rt, Rt, zr);
++        break;
++      case T_LONG   : /* nothing to do */        break;
++      case T_VOID   : /* nothing to do */        break;
++      case T_FLOAT  : /* nothing to do */        break;
++      case T_DOUBLE : /* nothing to do */        break;
++      default: ShouldNotReachHere();
++    }
++  }
 +
-+reg_def V28   ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()          );
-+reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next()  );
-+reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) );
-+reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) );
++  // float cmp with unordered_result
++  void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
++  void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
 +
-+reg_def V29   ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()          );
-+reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next()  );
-+reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) );
-+reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) );
++  // Zero/Sign-extend
++  void zero_extend(Register dst, Register src, int bits);
++  void sign_extend(Register dst, Register src, int bits);
 +
-+reg_def V30   ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()          );
-+reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next()  );
-+reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) );
-+reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) );
++  // compare src1 and src2 and get -1/0/1 in dst.
++  // if [src1 > src2], dst = 1;
++  // if [src1 == src2], dst = 0;
++  // if [src1 < src2], dst = -1;
++  void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
 +
-+reg_def V31   ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()          );
-+reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next()  );
-+reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) );
-+reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) );
++  int push_fp(unsigned int bitset, Register stack);
++  int pop_fp(unsigned int bitset, Register stack);
 +
-+// ----------------------------
-+// Special Registers
-+// ----------------------------
++  int push_vp(unsigned int bitset, Register stack);
++  int pop_vp(unsigned int bitset, Register stack);
 +
-+// On riscv, the physical flag register is missing, so we use t1 instead,
-+// to bridge the RegFlag semantics in share/opto
++  // vext
++  void vmnot_m(VectorRegister vd, VectorRegister vs);
++  void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
++  void vfneg_v(VectorRegister vd, VectorRegister vs);
 +
-+reg_def RFLAGS   (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg()        );
++private:
 +
-+// Specify priority of register selection within phases of register
-+// allocation.  Highest priority is first.  A useful heuristic is to
-+// give registers a low priority when they are required by machine
-+// instructions, like EAX and EDX on I486, and choose no-save registers
-+// before save-on-call, & save-on-call before save-on-entry.  Registers
-+// which participate in fixed calling sequences should come last.
-+// Registers which are used as pairs must fall on an even boundary.
++#ifdef ASSERT
++  // Template short-hand support to clean-up after a failed call to trampoline
++  // call generation (see trampoline_call() below), when a set of Labels must
++  // be reset (before returning).
++  template<typename Label, typename... More>
++  void reset_labels(Label& lbl, More&... more) {
++    lbl.reset(); reset_labels(more...);
++  }
++  template<typename Label>
++  void reset_labels(Label& lbl) {
++    lbl.reset();
++  }
++#endif
++  void repne_scan(Register addr, Register value, Register count, Register tmp);
 +
-+alloc_class chunk0(
-+    // volatiles
-+    R7,  R7_H,
-+    R28, R28_H,
-+    R29, R29_H,
-+    R30, R30_H,
-+    R31, R31_H,
++  // Return true if an address is within the 48-bit RISCV64 address space.
++  bool is_valid_riscv64_address(address addr) {
++    return ((uintptr_t)addr >> 48) == 0;
++  }
 +
-+    // arg registers
-+    R10, R10_H,
-+    R11, R11_H,
-+    R12, R12_H,
-+    R13, R13_H,
-+    R14, R14_H,
-+    R15, R15_H,
-+    R16, R16_H,
-+    R17, R17_H,
++  void ld_constant(Register dest, const Address &const_addr) {
++    if (NearCpool) {
++      ld(dest, const_addr);
++    } else {
++      int32_t offset = 0;
++      la_patchable(dest, InternalAddress(const_addr.target()), offset);
++      ld(dest, Address(dest, offset));
++    }
++  }
 +
-+    // non-volatiles
-+    R9,  R9_H,
-+    R18, R18_H,
-+    R19, R19_H,
-+    R20, R20_H,
-+    R21, R21_H,
-+    R22, R22_H,
-+    R24, R24_H,
-+    R25, R25_H,
-+    R26, R26_H,
++  int bitset_to_regs(unsigned int bitset, unsigned char* regs);
++  Address add_memory_helper(const Address dst);
 +
-+    // non-allocatable registers
-+    R23, R23_H, // java thread
-+    R27, R27_H, // heapbase
-+    R4,  R4_H,  // thread
-+    R8,  R8_H,  // fp
-+    R0,  R0_H,  // zero
-+    R1,  R1_H,  // ra
-+    R2,  R2_H,  // sp
-+    R3,  R3_H,  // gp
-+);
++  void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
++  void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
 +
-+alloc_class chunk1(
++  // Check the current thread doesn't need a cross modify fence.
++  void verify_cross_modify_fence_not_required() PRODUCT_RETURN;
++};
 +
-+    // no save
-+    F0,  F0_H,
-+    F1,  F1_H,
-+    F2,  F2_H,
-+    F3,  F3_H,
-+    F4,  F4_H,
-+    F5,  F5_H,
-+    F6,  F6_H,
-+    F7,  F7_H,
-+    F28, F28_H,
-+    F29, F29_H,
-+    F30, F30_H,
-+    F31, F31_H,
++#ifdef ASSERT
++inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
++#endif
 +
-+    // arg registers
-+    F10, F10_H,
-+    F11, F11_H,
-+    F12, F12_H,
-+    F13, F13_H,
-+    F14, F14_H,
-+    F15, F15_H,
-+    F16, F16_H,
-+    F17, F17_H,
++/**
++ * class SkipIfEqual:
++ *
++ * Instantiating this class will result in assembly code being output that will
++ * jump around any code emitted between the creation of the instance and it's
++ * automatic destruction at the end of a scope block, depending on the value of
++ * the flag passed to the constructor, which will be checked at run-time.
++ */
++class SkipIfEqual {
++ private:
++  MacroAssembler* _masm;
++  Label _label;
 +
-+    // non-volatiles
-+    F8,  F8_H,
-+    F9,  F9_H,
-+    F18, F18_H,
-+    F19, F19_H,
-+    F20, F20_H,
-+    F21, F21_H,
-+    F22, F22_H,
-+    F23, F23_H,
-+    F24, F24_H,
-+    F25, F25_H,
-+    F26, F26_H,
-+    F27, F27_H,
-+);
++ public:
++   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
++   ~SkipIfEqual();
++};
 +
-+alloc_class chunk2(
-+    V0, V0_H, V0_J, V0_K,
-+    V1, V1_H, V1_J, V1_K,
-+    V2, V2_H, V2_J, V2_K,
-+    V3, V3_H, V3_J, V3_K,
-+    V4, V4_H, V4_J, V4_K,
-+    V5, V5_H, V5_J, V5_K,
-+    V6, V6_H, V6_J, V6_K,
-+    V7, V7_H, V7_J, V7_K,
-+    V8, V8_H, V8_J, V8_K,
-+    V9, V9_H, V9_J, V9_K,
-+    V10, V10_H, V10_J, V10_K,
-+    V11, V11_H, V11_J, V11_K,
-+    V12, V12_H, V12_J, V12_K,
-+    V13, V13_H, V13_J, V13_K,
-+    V14, V14_H, V14_J, V14_K,
-+    V15, V15_H, V15_J, V15_K,
-+    V16, V16_H, V16_J, V16_K,
-+    V17, V17_H, V17_J, V17_K,
-+    V18, V18_H, V18_J, V18_K,
-+    V19, V19_H, V19_J, V19_K,
-+    V20, V20_H, V20_J, V20_K,
-+    V21, V21_H, V21_J, V21_K,
-+    V22, V22_H, V22_J, V22_K,
-+    V23, V23_H, V23_J, V23_K,
-+    V24, V24_H, V24_J, V24_K,
-+    V25, V25_H, V25_J, V25_K,
-+    V26, V26_H, V26_J, V26_K,
-+    V27, V27_H, V27_J, V27_K,
-+    V28, V28_H, V28_J, V28_K,
-+    V29, V29_H, V29_J, V29_K,
-+    V30, V30_H, V30_J, V30_K,
-+    V31, V31_H, V31_J, V31_K,
-+);
++#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
+new file mode 100644
+index 00000000000..ef968ccd96d
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+alloc_class chunk3(RFLAGS);
++#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
++#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
 +
-+//----------Architecture Description Register Classes--------------------------
-+// Several register classes are automatically defined based upon information in
-+// this architecture description.
-+// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
-+// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
-+// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
-+// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
-+//
++// Still empty.
 +
-+// Class for all 32 bit general purpose registers
-+reg_class all_reg32(
-+    R0,
-+    R1,
-+    R2,
-+    R3,
-+    R4,
-+    R7,
-+    R8,
-+    R9,
-+    R10,
-+    R11,
-+    R12,
-+    R13,
-+    R14,
-+    R15,
-+    R16,
-+    R17,
-+    R18,
-+    R19,
-+    R20,
-+    R21,
-+    R22,
-+    R23,
-+    R24,
-+    R25,
-+    R26,
-+    R27,
-+    R28,
-+    R29,
-+    R30,
-+    R31
-+);
++#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
+diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp
+new file mode 100644
+index 00000000000..23a75d20502
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp
+@@ -0,0 +1,169 @@
++/*
++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+// Class for any 32 bit integer registers (excluding zr)
-+reg_class any_reg32 %{
-+  return _ANY_REG32_mask;
-+%}
++#ifndef CPU_RISCV_MATCHER_RISCV_HPP
++#define CPU_RISCV_MATCHER_RISCV_HPP
 +
-+// Singleton class for R10 int register
-+reg_class int_r10_reg(R10);
++  // Defined within class Matcher
 +
-+// Singleton class for R12 int register
-+reg_class int_r12_reg(R12);
++  // false => size gets scaled to BytesPerLong, ok.
++  static const bool init_array_count_is_in_bytes = false;
 +
-+// Singleton class for R13 int register
-+reg_class int_r13_reg(R13);
++  // Whether this platform implements the scalable vector feature
++  static const bool implements_scalable_vector = true;
 +
-+// Singleton class for R14 int register
-+reg_class int_r14_reg(R14);
++  static const bool supports_scalable_vector() {
++    return UseRVV;
++  }
 +
-+// Class for all long integer registers
-+reg_class all_reg(
-+    R0,  R0_H,
-+    R1,  R1_H,
-+    R2,  R2_H,
-+    R3,  R3_H,
-+    R4,  R4_H,
-+    R7,  R7_H,
-+    R8,  R8_H,
-+    R9,  R9_H,
-+    R10, R10_H,
-+    R11, R11_H,
-+    R12, R12_H,
-+    R13, R13_H,
-+    R14, R14_H,
-+    R15, R15_H,
-+    R16, R16_H,
-+    R17, R17_H,
-+    R18, R18_H,
-+    R19, R19_H,
-+    R20, R20_H,
-+    R21, R21_H,
-+    R22, R22_H,
-+    R23, R23_H,
-+    R24, R24_H,
-+    R25, R25_H,
-+    R26, R26_H,
-+    R27, R27_H,
-+    R28, R28_H,
-+    R29, R29_H,
-+    R30, R30_H,
-+    R31, R31_H
-+);
++  // riscv supports misaligned vectors store/load.
++  static constexpr bool misaligned_vectors_ok() {
++    return true;
++  }
 +
-+// Class for all long integer registers (excluding zr)
-+reg_class any_reg %{
-+  return _ANY_REG_mask;
-+%}
++  // Whether code generation need accurate ConvI2L types.
++  static const bool convi2l_type_required = false;
 +
-+// Class for non-allocatable 32 bit registers
-+reg_class non_allocatable_reg32(
-+    R0,                       // zr
-+    R1,                       // ra
-+    R2,                       // sp
-+    R3,                       // gp
-+    R4,                       // tp
-+    R23                       // java thread
-+);
++  // Does the CPU require late expand (see block.cpp for description of late expand)?
++  static const bool require_postalloc_expand = false;
 +
-+// Class for non-allocatable 64 bit registers
-+reg_class non_allocatable_reg(
-+    R0,  R0_H,                // zr
-+    R1,  R1_H,                // ra
-+    R2,  R2_H,                // sp
-+    R3,  R3_H,                // gp
-+    R4,  R4_H,                // tp
-+    R23, R23_H                // java thread
-+);
++  // Do we need to mask the count passed to shift instructions or does
++  // the cpu only look at the lower 5/6 bits anyway?
++  static const bool need_masked_shift_count = false;
 +
-+reg_class no_special_reg32 %{
-+  return _NO_SPECIAL_REG32_mask;
-+%}
++  // No support for generic vector operands.
++  static const bool supports_generic_vector_operands = false;
 +
-+reg_class no_special_reg %{
-+  return _NO_SPECIAL_REG_mask;
-+%}
++  static constexpr bool isSimpleConstant64(jlong value) {
++    // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
++    // Probably always true, even if a temp register is required.
++    return true;
++  }
 +
-+reg_class ptr_reg %{
-+  return _PTR_REG_mask;
-+%}
-+
-+reg_class no_special_ptr_reg %{
-+  return _NO_SPECIAL_PTR_REG_mask;
-+%}
-+
-+// Class for 64 bit register r10
-+reg_class r10_reg(
-+    R10, R10_H
-+);
-+
-+// Class for 64 bit register r11
-+reg_class r11_reg(
-+    R11, R11_H
-+);
-+
-+// Class for 64 bit register r12
-+reg_class r12_reg(
-+    R12, R12_H
-+);
-+
-+// Class for 64 bit register r13
-+reg_class r13_reg(
-+    R13, R13_H
-+);
-+
-+// Class for 64 bit register r14
-+reg_class r14_reg(
-+    R14, R14_H
-+);
-+
-+// Class for 64 bit register r15
-+reg_class r15_reg(
-+    R15, R15_H
-+);
++  // Use conditional move (CMOVL)
++  static constexpr int long_cmove_cost() {
++    // long cmoves are no more expensive than int cmoves
++    return 0;
++  }
 +
-+// Class for 64 bit register r16
-+reg_class r16_reg(
-+    R16, R16_H
-+);
++  static constexpr int float_cmove_cost() {
++    // float cmoves are no more expensive than int cmoves
++    return 0;
++  }
 +
-+// Class for method register
-+reg_class method_reg(
-+    R31, R31_H
-+);
++  // This affects two different things:
++  //  - how Decode nodes are matched
++  //  - how ImplicitNullCheck opportunities are recognized
++  // If true, the matcher will try to remove all Decodes and match them
++  // (as operands) into nodes. NullChecks are not prepared to deal with
++  // Decodes by final_graph_reshaping().
++  // If false, final_graph_reshaping() forces the decode behind the Cmp
++  // for a NullCheck. The matcher matches the Decode node into a register.
++  // Implicit_null_check optimization moves the Decode along with the
++  // memory operation back up before the NullCheck.
++  static bool narrow_oop_use_complex_address() {
++    return CompressedOops::shift() == 0;
++  }
 +
-+// Class for heapbase register
-+reg_class heapbase_reg(
-+    R27, R27_H
-+);
++  static bool narrow_klass_use_complex_address() {
++    return false;
++  }
 +
-+// Class for java thread register
-+reg_class java_thread_reg(
-+    R23, R23_H
-+);
++  static bool const_oop_prefer_decode() {
++    // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
++    return CompressedOops::base() == NULL;
++  }
 +
-+reg_class r28_reg(
-+    R28, R28_H
-+);
++  static bool const_klass_prefer_decode() {
++    // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
++    return CompressedKlassPointers::base() == NULL;
++  }
 +
-+reg_class r29_reg(
-+    R29, R29_H
-+);
++  // Is it better to copy float constants, or load them directly from
++  // memory?  Intel can load a float constant from a direct address,
++  // requiring no extra registers.  Most RISCs will have to materialize
++  // an address into a register first, so they would do better to copy
++  // the constant from stack.
++  static const bool rematerialize_float_constants = false;
 +
-+reg_class r30_reg(
-+    R30, R30_H
-+);
++  // If CPU can load and store mis-aligned doubles directly then no
++  // fixup is needed.  Else we split the double into 2 integer pieces
++  // and move it piece-by-piece.  Only happens when passing doubles into
++  // C code as the Java calling convention forces doubles to be aligned.
++  static const bool misaligned_doubles_ok = true;
 +
-+// Class for zero registesr
-+reg_class zr_reg(
-+    R0, R0_H
-+);
++  // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
++  static const bool strict_fp_requires_explicit_rounding = false;
 +
-+// Class for thread register
-+reg_class thread_reg(
-+    R4, R4_H
-+);
++  // Are floats converted to double when stored to stack during
++  // deoptimization?
++  static constexpr bool float_in_double() { return false; }
 +
-+// Class for frame pointer register
-+reg_class fp_reg(
-+    R8, R8_H
-+);
++  // Do ints take an entire long register or just half?
++  // The relevant question is how the int is callee-saved:
++  // the whole long is written but de-opt'ing will have to extract
++  // the relevant 32 bits.
++  static const bool int_in_long = true;
 +
-+// Class for link register
-+reg_class lr_reg(
-+    R1, R1_H
-+);
++  // Does the CPU supports vector variable shift instructions?
++  static constexpr bool supports_vector_variable_shifts(void) {
++    return false;
++  }
 +
-+// Class for long sp register
-+reg_class sp_reg(
-+    R2, R2_H
-+);
++  // Does the CPU supports vector variable rotate instructions?
++  static constexpr bool supports_vector_variable_rotates(void) {
++    return false;
++  }
 +
-+// Class for all float registers
-+reg_class float_reg(
-+    F0,
-+    F1,
-+    F2,
-+    F3,
-+    F4,
-+    F5,
-+    F6,
-+    F7,
-+    F8,
-+    F9,
-+    F10,
-+    F11,
-+    F12,
-+    F13,
-+    F14,
-+    F15,
-+    F16,
-+    F17,
-+    F18,
-+    F19,
-+    F20,
-+    F21,
-+    F22,
-+    F23,
-+    F24,
-+    F25,
-+    F26,
-+    F27,
-+    F28,
-+    F29,
-+    F30,
-+    F31
-+);
++  // Does the CPU supports vector constant rotate instructions?
++  static constexpr bool supports_vector_constant_rotates(int shift) {
++    return false;
++  }
 +
-+// Double precision float registers have virtual `high halves' that
-+// are needed by the allocator.
-+// Class for all double registers
-+reg_class double_reg(
-+    F0,  F0_H,
-+    F1,  F1_H,
-+    F2,  F2_H,
-+    F3,  F3_H,
-+    F4,  F4_H,
-+    F5,  F5_H,
-+    F6,  F6_H,
-+    F7,  F7_H,
-+    F8,  F8_H,
-+    F9,  F9_H,
-+    F10, F10_H,
-+    F11, F11_H,
-+    F12, F12_H,
-+    F13, F13_H,
-+    F14, F14_H,
-+    F15, F15_H,
-+    F16, F16_H,
-+    F17, F17_H,
-+    F18, F18_H,
-+    F19, F19_H,
-+    F20, F20_H,
-+    F21, F21_H,
-+    F22, F22_H,
-+    F23, F23_H,
-+    F24, F24_H,
-+    F25, F25_H,
-+    F26, F26_H,
-+    F27, F27_H,
-+    F28, F28_H,
-+    F29, F29_H,
-+    F30, F30_H,
-+    F31, F31_H
-+);
++  // Does the CPU supports vector unsigned comparison instructions?
++  static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
++    return false;
++  }
 +
-+// Class for all RVV vector registers
-+reg_class vectora_reg(
-+    V1, V1_H, V1_J, V1_K,
-+    V2, V2_H, V2_J, V2_K,
-+    V3, V3_H, V3_J, V3_K,
-+    V4, V4_H, V4_J, V4_K,
-+    V5, V5_H, V5_J, V5_K,
-+    V6, V6_H, V6_J, V6_K,
-+    V7, V7_H, V7_J, V7_K,
-+    V8, V8_H, V8_J, V8_K,
-+    V9, V9_H, V9_J, V9_K,
-+    V10, V10_H, V10_J, V10_K,
-+    V11, V11_H, V11_J, V11_K,
-+    V12, V12_H, V12_J, V12_K,
-+    V13, V13_H, V13_J, V13_K,
-+    V14, V14_H, V14_J, V14_K,
-+    V15, V15_H, V15_J, V15_K,
-+    V16, V16_H, V16_J, V16_K,
-+    V17, V17_H, V17_J, V17_K,
-+    V18, V18_H, V18_J, V18_K,
-+    V19, V19_H, V19_J, V19_K,
-+    V20, V20_H, V20_J, V20_K,
-+    V21, V21_H, V21_J, V21_K,
-+    V22, V22_H, V22_J, V22_K,
-+    V23, V23_H, V23_J, V23_K,
-+    V24, V24_H, V24_J, V24_K,
-+    V25, V25_H, V25_J, V25_K,
-+    V26, V26_H, V26_J, V26_K,
-+    V27, V27_H, V27_J, V27_K,
-+    V28, V28_H, V28_J, V28_K,
-+    V29, V29_H, V29_J, V29_K,
-+    V30, V30_H, V30_J, V30_K,
-+    V31, V31_H, V31_J, V31_K
-+);
++  // Some microarchitectures have mask registers used on vectors
++  static const bool has_predicated_vectors(void) {
++    return false;
++  }
 +
-+// Class for 64 bit register f0
-+reg_class f0_reg(
-+    F0, F0_H
-+);
++  // true means we have fast l2f convers
++  // false means that conversion is done by runtime call
++  static constexpr bool convL2FSupported(void) {
++      return true;
++  }
 +
-+// Class for 64 bit register f1
-+reg_class f1_reg(
-+    F1, F1_H
-+);
++  // Implements a variant of EncodeISOArrayNode that encode ASCII only
++  static const bool supports_encode_ascii_array = false;
 +
-+// Class for 64 bit register f2
-+reg_class f2_reg(
-+    F2, F2_H
-+);
++  // Returns pre-selection estimated size of a vector operation.
++  static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
++    return 0;
++  }
 +
-+// Class for 64 bit register f3
-+reg_class f3_reg(
-+    F3, F3_H
-+);
++#endif // CPU_RISCV_MATCHER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+new file mode 100644
+index 00000000000..1f7c0c87c21
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+@@ -0,0 +1,461 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+// class for vector register v1
-+reg_class v1_reg(
-+    V1, V1_H, V1_J, V1_K
-+);
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "classfile/javaClasses.inline.hpp"
++#include "classfile/vmClasses.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/flags/flagSetting.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
 +
-+// class for vector register v2
-+reg_class v2_reg(
-+    V2, V2_H, V2_J, V2_K
-+);
++#define __ _masm->
 +
-+// class for vector register v3
-+reg_class v3_reg(
-+    V3, V3_H, V3_J, V3_K
-+);
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
 +
-+// class for vector register v4
-+reg_class v4_reg(
-+    V4, V4_H, V4_J, V4_K
-+);
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 +
-+// class for vector register v5
-+reg_class v5_reg(
-+    V5, V5_H, V5_J, V5_K
-+);
++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
++  assert_cond(_masm != NULL);
++  if (VerifyMethodHandles) {
++    verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class),
++                 "MH argument is a Class");
++  }
++  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset()));
++}
 +
-+// class for condition codes
-+reg_class reg_flags(RFLAGS);
-+%}
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++  assert(x != 0, "%s should be nonzero", xname);
++  return x;
++}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //PRODUCT
 +
-+//----------DEFINITION BLOCK---------------------------------------------------
-+// Define name --> value mappings to inform the ADLC of an integer valued name
-+// Current support includes integer values in the range [0, 0x7FFFFFFF]
-+// Format:
-+//        int_def  <name>         ( <int_value>, <expression>);
-+// Generated Code in ad_<arch>.hpp
-+//        #define  <name>   (<expression>)
-+//        // value == <int_value>
-+// Generated code in ad_<arch>.cpp adlc_verification()
-+//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
-+//
++#ifdef ASSERT
++void MethodHandles::verify_klass(MacroAssembler* _masm,
++                                 Register obj, vmClassID klass_id,
++                                 const char* error_message) {
++  assert_cond(_masm != NULL);
++  InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id);
++  Klass* klass = vmClasses::klass_at(klass_id);
++  Register temp = t1;
++  Register temp2 = t0; // used by MacroAssembler::cmpptr
++  Label L_ok, L_bad;
++  BLOCK_COMMENT("verify_klass {");
++  __ verify_oop(obj);
++  __ beqz(obj, L_bad);
++  __ push_reg(RegSet::of(temp, temp2), sp);
++  __ load_klass(temp, obj);
++  __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
++  intptr_t super_check_offset = klass->super_check_offset();
++  __ ld(temp, Address(temp, super_check_offset));
++  __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
++  __ pop_reg(RegSet::of(temp, temp2), sp);
++  __ bind(L_bad);
++  __ stop(error_message);
++  __ BIND(L_ok);
++  __ pop_reg(RegSet::of(temp, temp2), sp);
++  BLOCK_COMMENT("} verify_klass");
++}
 +
-+// we follow the ppc-aix port in using a simple cost model which ranks
-+// register operations as cheap, memory ops as more expensive and
-+// branches as most expensive. the first two have a low as well as a
-+// normal cost. huge cost appears to be a way of saying don't do
-+// something
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {}
 +
-+definitions %{
-+  // The default cost (of a register move instruction).
-+  int_def DEFAULT_COST         (  100,               100);
-+  int_def ALU_COST             (  100,  1 * DEFAULT_COST);          // unknown, const, arith, shift, slt,
-+                                                                    // multi, auipc, nop, logical, move
-+  int_def LOAD_COST            (  300,  3 * DEFAULT_COST);          // load, fpload
-+  int_def STORE_COST           (  100,  1 * DEFAULT_COST);          // store, fpstore
-+  int_def XFER_COST            (  300,  3 * DEFAULT_COST);          // mfc, mtc, fcvt, fmove, fcmp
-+  int_def BRANCH_COST          (  100,  1 * DEFAULT_COST);          // branch, jmp, call
-+  int_def IMUL_COST            ( 1000, 10 * DEFAULT_COST);          // imul
-+  int_def IDIVSI_COST          ( 3400, 34 * DEFAULT_COST);          // idivdi
-+  int_def IDIVDI_COST          ( 6600, 66 * DEFAULT_COST);          // idivsi
-+  int_def FMUL_SINGLE_COST     (  500,  5 * DEFAULT_COST);          // fadd, fmul, fmadd
-+  int_def FMUL_DOUBLE_COST     (  700,  7 * DEFAULT_COST);          // fadd, fmul, fmadd
-+  int_def FDIV_COST            ( 2000, 20 * DEFAULT_COST);          // fdiv
-+  int_def FSQRT_COST           ( 2500, 25 * DEFAULT_COST);          // fsqrt
-+%}
++#endif //ASSERT
 +
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                            bool for_compiler_entry) {
++  assert_cond(_masm != NULL);
++  assert(method == xmethod, "interpreter calling convention");
++  Label L_no_such_method;
++  __ beqz(xmethod, L_no_such_method);
++  __ verify_method_ptr(method);
 +
++  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
 +
-+//----------SOURCE BLOCK-------------------------------------------------------
-+// This is a block of C++ code which provides values, functions, and
-+// definitions necessary in the rest of the architecture description
++    __ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
++    __ beqz(t0, run_compiled_code);
++    __ ld(t0, Address(method, Method::interpreter_entry_offset()));
++    __ jr(t0);
++    __ BIND(run_compiled_code);
++  }
 +
-+source_hpp %{
++  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
++                                                     Method::from_interpreted_offset();
++  __ ld(t0,Address(method, entry_offset));
++  __ jr(t0);
++  __ bind(L_no_such_method);
++  __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry()));
++}
 +
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "gc/shared/cardTable.hpp"
-+#include "gc/shared/cardTableBarrierSet.hpp"
-+#include "gc/shared/collectedHeap.hpp"
-+#include "opto/addnode.hpp"
-+#include "opto/convertnode.hpp"
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++                                        Register recv, Register method_temp,
++                                        Register temp2,
++                                        bool for_compiler_entry) {
++  assert_cond(_masm != NULL);
++  BLOCK_COMMENT("jump_to_lambda_form {");
++  // This is the initial entry point of a lazy method handle.
++  // After type checking, it picks up the invoker from the LambdaForm.
++  assert_different_registers(recv, method_temp, temp2);
++  assert(recv != noreg, "required register");
++  assert(method_temp == xmethod, "required register for loading method");
 +
-+extern RegMask _ANY_REG32_mask;
-+extern RegMask _ANY_REG_mask;
-+extern RegMask _PTR_REG_mask;
-+extern RegMask _NO_SPECIAL_REG32_mask;
-+extern RegMask _NO_SPECIAL_REG_mask;
-+extern RegMask _NO_SPECIAL_PTR_REG_mask;
++  // Load the invoker, as MH -> MH.form -> LF.vmentry
++  __ verify_oop(recv);
++  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2);
++  __ verify_oop(method_temp);
++  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg);
 +
-+class CallStubImpl {
++  if (VerifyMethodHandles && !for_compiler_entry) {
++    // make sure recv is already on stack
++    __ ld(temp2, Address(method_temp, Method::const_offset()));
++    __ load_sized_value(temp2,
++                        Address(temp2, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), /*is_signed*/ false);
++    Label L;
++    __ ld(t0, __ argument_address(temp2, -1));
++    __ beq(recv, t0, L);
++    __ ld(x10, __ argument_address(temp2, -1));
++    __ ebreak();
++    __ BIND(L);
++  }
 +
-+  //--------------------------------------------------------------
-+  //---<  Used for optimization in Compile::shorten_branches  >---
-+  //--------------------------------------------------------------
++  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
++  BLOCK_COMMENT("} jump_to_lambda_form");
++}
 +
-+ public:
-+  // Size of call trampoline stub.
-+  static uint size_call_trampoline() {
-+    return 0; // no call trampolines on this platform
++// Code generation
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++                                                                vmIntrinsics::ID iid) {
++  assert_cond(_masm != NULL);
++  const bool not_for_compiler_entry = false;  // this is the interpreter entry
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  if (iid == vmIntrinsics::_invokeGeneric ||
++      iid == vmIntrinsics::_compiledLambdaForm) {
++    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++    // They all allow an appendix argument.
++    __ ebreak();           // empty stubs make SG sick
++    return NULL;
 +  }
 +
-+  // number of relocations needed by a call trampoline stub
-+  static uint reloc_call_trampoline() {
-+    return 0; // no call trampolines on this platform
++  // No need in interpreter entry for linkToNative for now.
++  // Interpreter calls compiled entry through i2c.
++  if (iid == vmIntrinsics::_linkToNative) {
++    __ ebreak();
++    return NULL;
 +  }
-+};
 +
-+class HandlerImpl {
++  // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted)
++  // xmethod: Method*
++  // x13: argument locator (parameter slot count, added to sp)
++  // x11: used as temp to hold mh or receiver
++  // x10, x29: garbage temps, blown away
++  Register argp   = x13;   // argument list ptr, live on error paths
++  Register mh     = x11;   // MH receiver; dies quickly and is recycled
 +
-+ public:
++  // here's where control starts out:
++  __ align(CodeEntryAlignment);
++  address entry_point = __ pc();
 +
-+  static int emit_exception_handler(CodeBuffer &cbuf);
-+  static int emit_deopt_handler(CodeBuffer& cbuf);
++  if (VerifyMethodHandles) {
++    assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
 +
-+  static uint size_exception_handler() {
-+    return MacroAssembler::far_branch_size();
++    Label L;
++    BLOCK_COMMENT("verify_intrinsic_id {");
++    __ lhu(t0, Address(xmethod, Method::intrinsic_id_offset_in_bytes()));
++    __ mv(t1, (int) iid);
++    __ beq(t0, t1, L);
++    if (iid == vmIntrinsics::_linkToVirtual ||
++        iid == vmIntrinsics::_linkToSpecial) {
++      // could do this for all kinds, but would explode assembly code size
++      trace_method_handle(_masm, "bad Method*::intrinsic_id");
++    }
++    __ ebreak();
++    __ bind(L);
++    BLOCK_COMMENT("} verify_intrinsic_id");
 +  }
 +
-+  static uint size_deopt_handler() {
-+    // count auipc + far branch
-+    return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
++  // First task:  Find out how big the argument list is.
++  Address x13_first_arg_addr;
++  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++    __ ld(argp, Address(xmethod, Method::const_offset()));
++    __ load_sized_value(argp,
++                        Address(argp, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), /*is_signed*/ false);
++    x13_first_arg_addr = __ argument_address(argp, -1);
++  } else {
++    DEBUG_ONLY(argp = noreg);
 +  }
-+};
 +
-+// predicate controlling translation of StoreCM
-+bool unnecessary_storestore(const Node *storecm);
++  if (!is_signature_polymorphic_static(iid)) {
++    __ ld(mh, x13_first_arg_addr);
++    DEBUG_ONLY(argp = noreg);
++  }
 +
-+bool is_CAS(int opcode, bool maybe_volatile);
++  // x13_first_arg_addr is live!
 +
-+// predicate controlling translation of CompareAndSwapX
-+bool needs_acquiring_load_exclusive(const Node *load);
++  trace_method_handle_interpreter_entry(_masm, iid);
++  if (iid == vmIntrinsics::_invokeBasic) {
++    generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry);
++  } else {
++    // Adjust argument list by popping the trailing MemberName argument.
++    Register recv = noreg;
++    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++      __ ld(recv = x12, x13_first_arg_addr);
++    }
++    DEBUG_ONLY(argp = noreg);
++    Register xmember = xmethod;  // MemberName ptr; incoming method ptr is dead now
++    __ pop_reg(xmember);             // extract last argument
++    generate_method_handle_dispatch(_masm, iid, recv, xmember, not_for_compiler_entry);
++  }
 +
++  return entry_point;
++}
 +
-+// predicate using the temp register for decoding klass
-+bool maybe_use_tmp_register_decoding_klass();
-+%}
 +
-+source %{
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++                                                    vmIntrinsics::ID iid,
++                                                    Register receiver_reg,
++                                                    Register member_reg,
++                                                    bool for_compiler_entry) {
++  assert_cond(_masm != NULL);
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  // temps used in this code are not used in *either* compiled or interpreted calling sequences
++  Register temp1 = x7;
++  Register temp2 = x28;
++  Register temp3 = x29;  // x30 is live by this point: it contains the sender SP
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
++    assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
++    assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
++    assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
++  }
 +
-+  // Derived RegMask with conditionally allocatable registers
++  assert_different_registers(temp1, temp2, temp3, receiver_reg);
++  assert_different_registers(temp1, temp2, temp3, member_reg);
 +
-+  RegMask _ANY_REG32_mask;
-+  RegMask _ANY_REG_mask;
-+  RegMask _PTR_REG_mask;
-+  RegMask _NO_SPECIAL_REG32_mask;
-+  RegMask _NO_SPECIAL_REG_mask;
-+  RegMask _NO_SPECIAL_PTR_REG_mask;
++  if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
++    if (iid == vmIntrinsics::_linkToNative) {
++      assert(for_compiler_entry, "only compiler entry is supported");
++    }
++    // indirect through MH.form.vmentry.vmtarget
++    jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry);
++  } else {
++    // The method is a member invoker used by direct method handles.
++    if (VerifyMethodHandles) {
++      // make sure the trailing argument really is a MemberName (caller responsibility)
++      verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName),
++                   "MemberName required for invokeVirtual etc.");
++    }
 +
-+  void reg_mask_init() {
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset()));
++    Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset()));
 +
-+    _ANY_REG32_mask = _ALL_REG32_mask;
-+    _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg()));
++    Register temp1_recv_klass = temp1;
++    if (iid != vmIntrinsics::_linkToStatic) {
++      __ verify_oop(receiver_reg);
++      if (iid == vmIntrinsics::_linkToSpecial) {
++        // Don't actually load the klass; just null-check the receiver.
++        __ null_check(receiver_reg);
++      } else {
++        // load receiver klass itself
++        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      BLOCK_COMMENT("check_receiver {");
++      // The receiver for the MemberName must be in receiver_reg.
++      // Check the receiver against the MemberName.clazz
++      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++        // Did not load it above...
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++        Label L_ok;
++        Register temp2_defc = temp2;
++        __ load_heap_oop(temp2_defc, member_clazz, temp3);
++        load_klass_from_Class(_masm, temp2_defc);
++        __ verify_klass_ptr(temp2_defc);
++        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
++        // If we get here, the type check failed!
++        __ ebreak();
++        __ bind(L_ok);
++      }
++      BLOCK_COMMENT("} check_receiver");
++    }
++    if (iid == vmIntrinsics::_linkToSpecial ||
++        iid == vmIntrinsics::_linkToStatic) {
++      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
++    }
 +
-+    _ANY_REG_mask = _ALL_REG_mask;
-+    _ANY_REG_mask.SUBTRACT(_ZR_REG_mask);
++    // Live registers at this point:
++    //  member_reg - MemberName that was the trailing argument
++    //  temp1_recv_klass - klass of stacked receiver, if needed
++    //  x30 - interpreter linkage (if interpreted)
++    //  x11 ... x10 - compiler arguments (if compiled)
 +
-+    _PTR_REG_mask = _ALL_REG_mask;
-+    _PTR_REG_mask.SUBTRACT(_ZR_REG_mask);
++    Label L_incompatible_class_change_error;
++    switch (iid) {
++      case vmIntrinsics::_linkToSpecial:
++        if (VerifyMethodHandles) {
++          verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
++        }
++        __ load_heap_oop(xmethod, member_vmtarget);
++        __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
++        break;
 +
-+    _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
-+    _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
++      case vmIntrinsics::_linkToStatic:
++        if (VerifyMethodHandles) {
++          verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
++        }
++        __ load_heap_oop(xmethod, member_vmtarget);
++        __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
++        break;
 +
-+    _NO_SPECIAL_REG_mask = _ALL_REG_mask;
-+    _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
++      case vmIntrinsics::_linkToVirtual:
++      {
++        // same as TemplateTable::invokevirtual,
++        // minus the CP setup and profiling:
 +
-+    _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
-+    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
++        if (VerifyMethodHandles) {
++          verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
++        }
 +
-+    // x27 is not allocatable when compressed oops is on
-+    if (UseCompressedOops) {
-+      _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg()));
-+      _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
-+      _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
-+    }
++        // pick out the vtable index from the MemberName, and then we can discard it:
++        Register temp2_index = temp2;
++        __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
 +
-+    // x8 is not allocatable when PreserveFramePointer is on
-+    if (PreserveFramePointer) {
-+      _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
-+      _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask);
-+      _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask);
-+    }
-+  }
++        if (VerifyMethodHandles) {
++          Label L_index_ok;
++          __ bgez(temp2_index, L_index_ok);
++          __ ebreak();
++          __ BIND(L_index_ok);
++        }
 +
++        // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++        // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
 +
-+// predicate controlling translation of StoreCM
-+//
-+// returns true if a StoreStore must precede the card write otherwise
-+// false
-+bool unnecessary_storestore(const Node *storecm)
-+{
-+  assert(storecm != NULL && storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
++        // get target Method* & entry point
++        __ lookup_virtual_method(temp1_recv_klass, temp2_index, xmethod);
++        break;
++      }
 +
-+  // we need to generate a membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore)
-+  // between an object put and the associated card mark when we are using
-+  // CMS without conditional card marking
++      case vmIntrinsics::_linkToInterface:
++      {
++        // same as TemplateTable::invokeinterface
++        // (minus the CP setup and profiling, with different argument motion)
++        if (VerifyMethodHandles) {
++          verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
++        }
 +
-+  if (UseConcMarkSweepGC && !UseCondCardMark) {
-+    return false;
-+  }
++        Register temp3_intf = temp3;
++        __ load_heap_oop(temp3_intf, member_clazz);
++        load_klass_from_Class(_masm, temp3_intf);
++        __ verify_klass_ptr(temp3_intf);
 +
-+  // a storestore is unnecesary in all other cases
++        Register rindex = xmethod;
++        __ access_load_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg);
++        if (VerifyMethodHandles) {
++          Label L;
++          __ bgez(rindex, L);
++          __ ebreak();
++          __ bind(L);
++        }
 +
-+  return true;
-+}
++        // given intf, index, and recv klass, dispatch to the implementation method
++        __ lookup_interface_method(temp1_recv_klass, temp3_intf,
++                                   // note: next two args must be the same:
++                                   rindex, xmethod,
++                                   temp2,
++                                   L_incompatible_class_change_error);
++        break;
++      }
 +
-+// is_CAS(int opcode, bool maybe_volatile)
-+//
-+// return true if opcode is one of the possible CompareAndSwapX
-+// values otherwise false.
-+bool is_CAS(int opcode, bool maybe_volatile)
-+{
-+  switch(opcode) {
-+    // We handle these
-+    case Op_CompareAndSwapI:
-+    case Op_CompareAndSwapL:
-+    case Op_CompareAndSwapP:
-+    case Op_CompareAndSwapN:
-+    case Op_CompareAndSwapB:
-+    case Op_CompareAndSwapS:
-+    case Op_GetAndSetI:
-+    case Op_GetAndSetL:
-+    case Op_GetAndSetP:
-+    case Op_GetAndSetN:
-+    case Op_GetAndAddI:
-+    case Op_GetAndAddL:
-+#if INCLUDE_SHENANDOAHGC
-+    case Op_ShenandoahCompareAndSwapP:
-+    case Op_ShenandoahCompareAndSwapN:
-+#endif
-+      return true;
-+    case Op_CompareAndExchangeI:
-+    case Op_CompareAndExchangeN:
-+    case Op_CompareAndExchangeB:
-+    case Op_CompareAndExchangeS:
-+    case Op_CompareAndExchangeL:
-+    case Op_CompareAndExchangeP:
-+    case Op_WeakCompareAndSwapB:
-+    case Op_WeakCompareAndSwapS:
-+   case Op_WeakCompareAndSwapI:
-+    case Op_WeakCompareAndSwapL:
-+    case Op_WeakCompareAndSwapP:
-+    case Op_WeakCompareAndSwapN:
-+      return maybe_volatile;
-+    default:
-+      return false;
-+  }
-+}
++      default:
++        fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid));
++        break;
++    }
 +
-+// predicate controlling translation of CAS
-+//
-+// returns true if CAS needs to use an acquiring load otherwise false
-+bool needs_acquiring_load_exclusive(const Node *n)
-+{
-+  assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap");
-+  if (UseBarriersForVolatile) {
-+    return false;
-+  }
++    // live at this point:  xmethod, x30 (if interpreted)
 +
-+  LoadStoreNode* ldst = n->as_LoadStore();
-+  if (n != NULL && is_CAS(n->Opcode(), false)) {
-+    assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar");
-+  } else {
-+    return ldst != NULL && ldst->trailing_membar() != NULL;
++    // After figuring out which concrete method to call, jump into it.
++    // Note that this works in the interpreter with no data motion.
++    // But the compiled version will require that r2_recv be shifted out.
++    __ verify_method_ptr(xmethod);
++    jump_from_method_handle(_masm, xmethod, temp1, for_compiler_entry);
++    if (iid == vmIntrinsics::_linkToInterface) {
++      __ bind(L_incompatible_class_change_error);
++      __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
++    }
 +  }
-+  // so we can just return true here
-+  return true;
-+}
 +
-+bool maybe_use_tmp_register_decoding_klass() {
-+  return !UseCompressedOops &&
-+         Universe::narrow_klass_base() != NULL &&
-+         Universe::narrow_klass_shift() != 0;
 +}
-+#define __ _masm.
 +
-+// advance declarations for helper functions to convert register
-+// indices to register objects
++#ifndef PRODUCT
++void trace_method_handle_stub(const char* adaptername,
++                              oopDesc* mh,
++                              intptr_t* saved_regs,
++                              intptr_t* entry_sp) {  }
 +
-+// the ad file has to provide implementations of certain methods
-+// expected by the generic code
-+//
-+// REQUIRED FUNCTIONALITY
++// The stub wraps the arguments in a struct on the stack to avoid
++// dealing with the different calling conventions for passing 6
++// arguments.
++struct MethodHandleStubArguments {
++  const char* adaptername;
++  oopDesc* mh;
++  intptr_t* saved_regs;
++  intptr_t* entry_sp;
++};
++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {  }
 +
-+//=============================================================================
++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {  }
++#endif //PRODUCT
+diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
+new file mode 100644
+index 00000000000..f73aba29d67
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+// !!!!! Special hack to get all types of calls to specify the byte offset
-+//       from the start of the call to the point where the return address
-+//       will point.
++// Platform-specific definitions for method handles.
++// These definitions are inlined into class MethodHandles.
 +
-+int MachCallStaticJavaNode::ret_addr_offset()
-+{
-+  // call should be a simple jal
-+  int off = 4;
-+  return off;
-+}
++// Adapters
++enum /* platform_dependent_constants */ {
++  adapter_code_size = 32000 DEBUG_ONLY(+ 120000)
++};
 +
-+int MachCallDynamicJavaNode::ret_addr_offset()
-+{
-+  return 28; // movptr, jal
-+}
++public:
 +
-+int MachCallRuntimeNode::ret_addr_offset() {
-+  // for generated stubs the call will be
-+  //   jal(addr)
-+  // or with far branches
-+  //   jal(trampoline_stub)
-+  // for real runtime callouts it will be five instructions
-+  // see riscv_enc_java_to_runtime
-+  //   la(t1, retaddr)
-+  //   la(t0, RuntimeAddress(addr))
-+  //   addi(sp, sp, -2 * wordSize)
-+  //   sd(zr, Address(sp))
-+  //   sd(t1, Address(sp, wordSize))
-+  //   jalr(t0)
-+  CodeBlob *cb = CodeCache::find_blob(_entry_point);
-+  if (cb != NULL) {
-+    return 1 * NativeInstruction::instruction_size;
-+  } else {
-+    return 11 * NativeInstruction::instruction_size;
-+  }
-+}
++  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
 +
-+// Indicate if the safepoint node needs the polling page as an input
++  static void verify_klass(MacroAssembler* _masm,
++                           Register obj, vmClassID klass_id,
++                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
 +
-+// the shared code plants the oop data at the start of the generated
-+// code for the safepoint node and that needs ot be at the load
-+// instruction itself. so we cannot plant a mov of the safepoint poll
-+// address followed by a load. setting this to true means the mov is
-+// scheduled as a prior instruction. that's better for scheduling
-+// anyway.
++  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
++    verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle),
++                 "reference is a MH");
++  }
 +
-+bool SafePointNode::needs_polling_address_input()
-+{
-+  return true;
-+}
++  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
 +
-+//=============================================================================
++  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
++  // Takes care of special dispatch from single stepping too.
++  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                      bool for_compiler_entry);
 +
-+#ifndef PRODUCT
-+void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-+  assert_cond(st != NULL);
-+  st->print("BREAKPOINT");
-+}
++  static void jump_to_lambda_form(MacroAssembler* _masm,
++                                  Register recv, Register method_temp,
++                                  Register temp2,
++                                  bool for_compiler_entry);
+diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
+new file mode 100644
+index 00000000000..0a05c577860
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
+@@ -0,0 +1,429 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/compiledIC.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.hpp"
++#include "runtime/orderAccess.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/ostream.hpp"
++#ifdef COMPILER1
++#include "c1/c1_Runtime1.hpp"
 +#endif
 +
-+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-+  MacroAssembler _masm(&cbuf);
-+  __ ebreak();
++Register NativeInstruction::extract_rs1(address instr) {
++  assert_cond(instr != NULL);
++  return as_Register(Assembler::extract(((unsigned*)instr)[0], 19, 15));
 +}
 +
-+uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
-+  return MachNode::size(ra_);
++Register NativeInstruction::extract_rs2(address instr) {
++  assert_cond(instr != NULL);
++  return as_Register(Assembler::extract(((unsigned*)instr)[0], 24, 20));
 +}
 +
-+//=============================================================================
-+
-+#ifndef PRODUCT
-+  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
-+    st->print("nop \t# %d bytes pad for loops and calls", _count);
-+  }
-+#endif
-+
-+  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
-+    MacroAssembler _masm(&cbuf);
-+    for (int i = 0; i < _count; i++) {
-+      __ nop();
-+    }
-+  }
++Register NativeInstruction::extract_rd(address instr) {
++  assert_cond(instr != NULL);
++  return as_Register(Assembler::extract(((unsigned*)instr)[0], 11, 7));
++}
 +
-+  uint MachNopNode::size(PhaseRegAlloc*) const {
-+    return _count * NativeInstruction::instruction_size;
-+  }
++uint32_t NativeInstruction::extract_opcode(address instr) {
++  assert_cond(instr != NULL);
++  return Assembler::extract(((unsigned*)instr)[0], 6, 0);
++}
 +
-+//=============================================================================
-+const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
++uint32_t NativeInstruction::extract_funct3(address instr) {
++  assert_cond(instr != NULL);
++  return Assembler::extract(((unsigned*)instr)[0], 14, 12);
++}
 +
-+int Compile::ConstantTable::calculate_table_base_offset() const {
-+  return 0;  // absolute addressing, no offset
++bool NativeInstruction::is_pc_relative_at(address instr) {
++  // auipc + jalr
++  // auipc + addi
++  // auipc + load
++  // auipc + fload_load
++  return (is_auipc_at(instr)) &&
++         (is_addi_at(instr + instruction_size) ||
++          is_jalr_at(instr + instruction_size) ||
++          is_load_at(instr + instruction_size) ||
++          is_float_load_at(instr + instruction_size)) &&
++         check_pc_relative_data_dependency(instr);
 +}
 +
-+bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
-+void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
-+  ShouldNotReachHere();
++// ie:ld(Rd, Label)
++bool NativeInstruction::is_load_pc_relative_at(address instr) {
++  return is_auipc_at(instr) && // auipc
++         is_ld_at(instr + instruction_size) && // ld
++         check_load_pc_relative_data_dependency(instr);
 +}
 +
-+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
-+  // Empty encoding
++bool NativeInstruction::is_movptr_at(address instr) {
++  return is_lui_at(instr) && // Lui
++         is_addi_at(instr + instruction_size) && // Addi
++         is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11
++         is_addi_at(instr + instruction_size * 3) && // Addi
++         is_slli_shift_at(instr + instruction_size * 4, 5) && // Slli Rd, Rs, 5
++         (is_addi_at(instr + instruction_size * 5) ||
++          is_jalr_at(instr + instruction_size * 5) ||
++          is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load
++         check_movptr_data_dependency(instr);
 +}
 +
-+uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
-+  return 0;
++bool NativeInstruction::is_li32_at(address instr) {
++  return is_lui_at(instr) && // lui
++         is_addiw_at(instr + instruction_size) && // addiw
++         check_li32_data_dependency(instr);
 +}
 +
-+#ifndef PRODUCT
-+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
-+  assert_cond(st != NULL);
-+  st->print("-- \t// MachConstantBaseNode (empty encoding)");
++bool NativeInstruction::is_li64_at(address instr) {
++  return is_lui_at(instr) && // lui
++         is_addi_at(instr + instruction_size) && // addi
++         is_slli_shift_at(instr + instruction_size * 2, 12) &&  // Slli Rd, Rs, 12
++         is_addi_at(instr + instruction_size * 3) && // addi
++         is_slli_shift_at(instr + instruction_size * 4, 12) &&  // Slli Rd, Rs, 12
++         is_addi_at(instr + instruction_size * 5) && // addi
++         is_slli_shift_at(instr + instruction_size * 6, 8) &&   // Slli Rd, Rs, 8
++         is_addi_at(instr + instruction_size * 7) && // addi
++         check_li64_data_dependency(instr);
 +}
-+#endif
 +
-+#ifndef PRODUCT
-+void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-+  assert_cond(st != NULL && ra_ != NULL);
-+  Compile* C = ra_->C;
++void NativeCall::verify() {
++  assert(NativeCall::is_call_at((address)this), "unexpected code at call site");
++}
 +
-+  int framesize = C->frame_slots() << LogBytesPerInt;
++address NativeCall::destination() const {
++  address addr = (address)this;
++  assert(NativeInstruction::is_jal_at(instruction_address()), "inst must be jal.");
++  address destination = MacroAssembler::target_addr_for_insn(instruction_address());
 +
-+  if (C->need_stack_bang(framesize)) {
-+    st->print("# stack bang size=%d\n\t", framesize);
++  // Do we use a trampoline stub for this call?
++  CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
++  assert(cb && cb->is_nmethod(), "sanity");
++  nmethod *nm = (nmethod *)cb;
++  if (nm != NULL && nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
++    // Yes we do, so get the destination from the trampoline stub.
++    const address trampoline_stub_addr = destination;
++    destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
 +  }
 +
-+  st->print("sub sp, sp, #%d\n\t", framesize);
-+  st->print("sd  fp, [sp, #%d]", - 2 * wordSize);
-+  st->print("sd  ra, [sp, #%d]", - wordSize);
-+  if (PreserveFramePointer) { st->print("\n\tsub  fp, sp, #%d", 2 * wordSize); }
++  return destination;
 +}
-+#endif
 +
-+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-+  assert_cond(ra_ != NULL);
-+  Compile* C = ra_->C;
-+  MacroAssembler _masm(&cbuf);
++// Similar to replace_mt_safe, but just changes the destination. The
++// important thing is that free-running threads are able to execute this
++// call instruction at all times.
++//
++// Used in the runtime linkage of calls; see class CompiledIC.
++//
++// Add parameter assert_lock to switch off assertion
++// during code generation, where no patching lock is needed.
++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
++  assert(!assert_lock ||
++         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) ||
++         CompiledICLocker::is_safe(addr_at(0)),
++         "concurrent code patching");
 +
-+  // n.b. frame size includes space for return pc and fp
-+  const int framesize = C->frame_size_in_bytes();
-+  assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment");
++  ResourceMark rm;
++  address addr_call = addr_at(0);
++  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
 +
-+  // insert a nop at the start of the prolog so we can patch in a
-+  // branch if we need to invalidate the method later
-+  __ nop();
++  // Patch the constant in the call's trampoline stub.
++  address trampoline_stub_addr = get_trampoline();
++  if (trampoline_stub_addr != NULL) {
++    assert (!is_NativeCallTrampolineStub_at(dest), "chained trampolines");
++    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
++  }
 +
-+  assert_cond(C != NULL);
-+  int bangsize = C->bang_size_in_bytes();
-+  if (C->need_stack_bang(bangsize) && UseStackBanging) {
-+    __ generate_stack_overflow_check(bangsize);
++  // Patch the call.
++  if (Assembler::reachable_from_branch_at(addr_call, dest)) {
++    set_destination(dest);
++  } else {
++    assert (trampoline_stub_addr != NULL, "we need a trampoline");
++    set_destination(trampoline_stub_addr);
 +  }
 +
-+  __ build_frame(framesize);
++  ICache::invalidate_range(addr_call, instruction_size);
++}
 +
-+  if (VerifyStackAtCalls) {
-+    Unimplemented();
-+  }
++address NativeCall::get_trampoline() {
++  address call_addr = addr_at(0);
 +
-+  C->set_frame_complete(cbuf.insts_size());
++  CodeBlob *code = CodeCache::find_blob(call_addr);
++  assert(code != NULL, "Could not find the containing code blob");
 +
-+  if (C->has_mach_constant_base_node()) {
-+    // NOTE: We set the table base offset here because users might be
-+    // emitted before MachConstantBaseNode.
-+    Compile::ConstantTable& constant_table = C->constant_table();
-+    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
++  address jal_destination = MacroAssembler::pd_call_destination(call_addr);
++  if (code != NULL && code->contains(jal_destination) && is_NativeCallTrampolineStub_at(jal_destination)) {
++    return jal_destination;
 +  }
-+}
 +
-+uint MachPrologNode::size(PhaseRegAlloc* ra_) const
-+{
-+  assert_cond(ra_ != NULL);
-+  return MachNode::size(ra_); // too many variables; just compute it
-+                              // the hard way
-+}
++  if (code != NULL && code->is_nmethod()) {
++    return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
++  }
 +
-+int MachPrologNode::reloc() const
-+{
-+  return 0;
++  return NULL;
 +}
 +
-+//=============================================================================
-+
-+#ifndef PRODUCT
-+void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-+  assert_cond(st != NULL && ra_ != NULL);
-+  Compile* C = ra_->C;
-+  assert_cond(C != NULL);
-+  int framesize = C->frame_size_in_bytes();
++// Inserts a native call instruction at a given pc
++void NativeCall::insert(address code_pos, address entry) { Unimplemented(); }
 +
-+  st->print("# pop frame %d\n\t", framesize);
++//-------------------------------------------------------------------
 +
-+  if (framesize == 0) {
-+    st->print("ld  ra, [sp,#%d]\n\t", (2 * wordSize));
-+    st->print("ld  fp, [sp,#%d]\n\t", (3 * wordSize));
-+    st->print("add sp, sp, #%d\n\t", (2 * wordSize));
-+  } else {
-+    st->print("add  sp, sp, #%d\n\t", framesize);
-+    st->print("ld  ra, [sp,#%d]\n\t", - 2 * wordSize);
-+    st->print("ld  fp, [sp,#%d]\n\t", - wordSize);
++void NativeMovConstReg::verify() {
++  if (!(nativeInstruction_at(instruction_address())->is_movptr() ||
++        is_auipc_at(instruction_address()))) {
++    fatal("should be MOVPTR or AUIPC");
 +  }
++}
 +
-+  if (do_polling() && C->is_method_compilation()) {
-+    st->print("# touch polling page\n\t");
-+    st->print("li  t0, #0x%lx\n\t", p2i(os::get_polling_page()));
-+    st->print("ld zr, [t0]");
++intptr_t NativeMovConstReg::data() const {
++  address addr = MacroAssembler::target_addr_for_insn(instruction_address());
++  if (maybe_cpool_ref(instruction_address())) {
++    return *(intptr_t*)addr;
++  } else {
++    return (intptr_t)addr;
 +  }
 +}
-+#endif
-+
-+void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-+  assert_cond(ra_ != NULL);
-+  Compile* C = ra_->C;
-+  MacroAssembler _masm(&cbuf);
-+  assert_cond(C != NULL);
-+  int framesize = C->frame_size_in_bytes();
-+
-+  __ remove_frame(framesize);
 +
-+  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
-+    __ reserved_stack_check();
++void NativeMovConstReg::set_data(intptr_t x) {
++  if (maybe_cpool_ref(instruction_address())) {
++    address addr = MacroAssembler::target_addr_for_insn(instruction_address());
++    *(intptr_t*)addr = x;
++  } else {
++    // Store x into the instruction stream.
++    MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x);
++    ICache::invalidate_range(instruction_address(), movptr_instruction_size);
 +  }
 +
-+  if (do_polling() && C->is_method_compilation()) {
-+    __ read_polling_page(t0, os::get_polling_page(), relocInfo::poll_return_type);
++  // Find and replace the oop/metadata corresponding to this
++  // instruction in oops section.
++  CodeBlob* cb = CodeCache::find_blob(instruction_address());
++  nmethod* nm = cb->as_nmethod_or_null();
++  if (nm != NULL) {
++    RelocIterator iter(nm, instruction_address(), next_instruction_address());
++    while (iter.next()) {
++      if (iter.type() == relocInfo::oop_type) {
++        oop* oop_addr = iter.oop_reloc()->oop_addr();
++        *oop_addr = cast_to_oop(x);
++        break;
++      } else if (iter.type() == relocInfo::metadata_type) {
++        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
++        *metadata_addr = (Metadata*)x;
++        break;
++      }
++    }
 +  }
 +}
 +
-+uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
-+  assert_cond(ra_ != NULL);
-+  // Variable size. Determine dynamically.
-+  return MachNode::size(ra_);
++void NativeMovConstReg::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
++                p2i(instruction_address()), data());
 +}
 +
-+int MachEpilogNode::reloc() const {
-+  // Return number of relocatable values contained in this instruction.
-+  return 1; // 1 for polling page.
-+}
-+const Pipeline * MachEpilogNode::pipeline() const {
-+  return MachNode::pipeline_class();
++//-------------------------------------------------------------------
++
++int NativeMovRegMem::offset() const  {
++  Unimplemented();
++  return 0;
 +}
 +
-+int MachEpilogNode::safepoint_offset() const {
-+  assert(do_polling(), "no return for this epilog node");
-+  return 4;
++void NativeMovRegMem::set_offset(int x) { Unimplemented(); }
++
++void NativeMovRegMem::verify() {
++  Unimplemented();
 +}
 +
-+//=============================================================================
++//--------------------------------------------------------------------------------
 +
-+// Figure out which register class each belongs in: rc_int, rc_float or
-+// rc_stack.
-+enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack };
++void NativeJump::verify() { }
 +
-+static enum RC rc_class(OptoReg::Name reg) {
 +
-+  if (reg == OptoReg::Bad) {
-+    return rc_bad;
-+  }
++void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) {
++}
 +
-+  // we have 30 int registers * 2 halves
-+  // (t0 and t1 are omitted)
-+  int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2);
-+  if (reg < slots_of_int_registers) {
-+    return rc_int;
-+  }
 +
-+  // we have 32 float register * 2 halves
-+  int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers;
-+  if (reg < slots_of_int_registers + slots_of_float_registers) {
-+    return rc_float;
-+  }
++address NativeJump::jump_destination() const {
++  address dest = MacroAssembler::target_addr_for_insn(instruction_address());
 +
-+  // we have 32 vector register * 4 halves
-+  int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers;
-+  if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) {
-+    return rc_vector;
-+  }
++  // We use jump to self as the unresolved address which the inline
++  // cache code (and relocs) know about
++  // As a special case we also use sequence movptr_with_offset(r,0), jalr(r,0)
++  // i.e. jump to 0 when we need leave space for a wide immediate
++  // load
 +
-+  // Between vector regs & stack is the flags regs.
-+  assert(OptoReg::is_stack(reg), "blow up if spilling flags");
++  // return -1 if jump to self or to 0
++  if ((dest == (address) this) || dest == 0) {
++    dest = (address) -1;
++  }
 +
-+  return rc_stack;
-+}
++  return dest;
++};
 +
-+uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
-+  assert_cond(ra_ != NULL);
-+  Compile* C = ra_->C;
++void NativeJump::set_jump_destination(address dest) {
++  // We use jump to self as the unresolved address which the inline
++  // cache code (and relocs) know about
++  if (dest == (address) -1)
++    dest = instruction_address();
 +
-+  // Get registers to move.
-+  OptoReg::Name src_hi = ra_->get_reg_second(in(1));
-+  OptoReg::Name src_lo = ra_->get_reg_first(in(1));
-+  OptoReg::Name dst_hi = ra_->get_reg_second(this);
-+  OptoReg::Name dst_lo = ra_->get_reg_first(this);
++  MacroAssembler::pd_patch_instruction(instruction_address(), dest);
++  ICache::invalidate_range(instruction_address(), instruction_size);
++}
 +
-+  enum RC src_hi_rc = rc_class(src_hi);
-+  enum RC src_lo_rc = rc_class(src_lo);
-+  enum RC dst_hi_rc = rc_class(dst_hi);
-+  enum RC dst_lo_rc = rc_class(dst_lo);
++//-------------------------------------------------------------------
 +
-+  assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
++address NativeGeneralJump::jump_destination() const {
++  NativeMovConstReg* move = nativeMovConstReg_at(instruction_address());
++  address dest = (address) move->data();
 +
-+  if (src_hi != OptoReg::Bad) {
-+    assert((src_lo & 1) == 0 && src_lo + 1 == src_hi &&
-+           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi,
-+           "expected aligned-adjacent pairs");
-+  }
++  // We use jump to self as the unresolved address which the inline
++  // cache code (and relocs) know about
++  // As a special case we also use jump to 0 when first generating
++  // a general jump
 +
-+  if (src_lo == dst_lo && src_hi == dst_hi) {
-+    return 0;            // Self copy, no move.
++  // return -1 if jump to self or to 0
++  if ((dest == (address) this) || dest == 0) {
++    dest = (address) -1;
 +  }
 +
-+  bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
-+              (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
-+  int src_offset = ra_->reg2offset(src_lo);
-+  int dst_offset = ra_->reg2offset(dst_lo);
++  return dest;
++}
 +
-+  if (bottom_type() == NULL) {
-+    ShouldNotReachHere();
-+  } else if (bottom_type()->isa_vect() != NULL) {
-+    uint ireg = ideal_reg();
-+    if (ireg == Op_VecA && cbuf) {
-+      MacroAssembler _masm(cbuf);
-+      int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
-+      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
-+        // stack to stack
-+        __ spill_copy_vector_stack_to_stack(src_offset, dst_offset,
-+                                            vector_reg_size_in_bytes);
-+      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) {
-+        // vpr to stack
-+        __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo));
-+      } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) {
-+        // stack to vpr
-+        __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo));
-+      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) {
-+        // vpr to vpr
-+        __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo]));
-+      } else {
-+        ShouldNotReachHere();
-+      }
-+    }
-+  } else if (cbuf != NULL) {
-+    MacroAssembler _masm(cbuf);
-+    switch (src_lo_rc) {
-+      case rc_int:
-+        if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
-+          if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass
-+            __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32);
-+          } else {
-+            __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]));
-+          }
-+        } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
-+          if (is64) {
-+            __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-+                       as_Register(Matcher::_regEncode[src_lo]));
-+          } else {
-+            __ fmv_w_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-+                       as_Register(Matcher::_regEncode[src_lo]));
-+          }
-+        } else {                    // gpr --> stack spill
-+          assert(dst_lo_rc == rc_stack, "spill to bad register class");
-+          __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
-+        }
-+        break;
-+      case rc_float:
-+        if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
-+          if (is64) {
-+            __ fmv_x_d(as_Register(Matcher::_regEncode[dst_lo]),
-+                       as_FloatRegister(Matcher::_regEncode[src_lo]));
-+          } else {
-+            __ fmv_x_w(as_Register(Matcher::_regEncode[dst_lo]),
-+                       as_FloatRegister(Matcher::_regEncode[src_lo]));
-+          }
-+        } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
-+          if (is64) {
-+            __ fmv_d(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-+                     as_FloatRegister(Matcher::_regEncode[src_lo]));
-+          } else {
-+            __ fmv_s(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-+                     as_FloatRegister(Matcher::_regEncode[src_lo]));
-+          }
-+        } else {                    // fpr --> stack spill
-+          assert(dst_lo_rc == rc_stack, "spill to bad register class");
-+          __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
-+                   is64, dst_offset);
-+        }
-+        break;
-+      case rc_stack:
-+        if (dst_lo_rc == rc_int) {  // stack --> gpr load
-+          if (this->ideal_reg() == Op_RegI) {
-+            __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
-+          } else { // // zero extended for narrow oop or klass
-+            __ unspillu(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
-+          }
-+        } else if (dst_lo_rc == rc_float) { // stack --> fpr load
-+          __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-+                     is64, src_offset);
-+        } else {                    // stack --> stack copy
-+          assert(dst_lo_rc == rc_stack, "spill to bad register class");
-+          if (this->ideal_reg() == Op_RegI) {
-+            __ unspill(t0, is64, src_offset);
-+          } else { // zero extended for narrow oop or klass
-+            __ unspillu(t0, is64, src_offset);
-+          }
-+          __ spill(t0, is64, dst_offset);
-+        }
-+        break;
-+      default:
-+        ShouldNotReachHere();
-+    }
-+  }
++//-------------------------------------------------------------------
 +
-+  if (st != NULL) {
-+    st->print("spill ");
-+    if (src_lo_rc == rc_stack) {
-+      st->print("[sp, #%d] -> ", src_offset);
-+    } else {
-+      st->print("%s -> ", Matcher::regName[src_lo]);
-+    }
-+    if (dst_lo_rc == rc_stack) {
-+      st->print("[sp, #%d]", dst_offset);
-+    } else {
-+      st->print("%s", Matcher::regName[dst_lo]);
-+    }
-+    if (bottom_type()->isa_vect() != NULL) {
-+      int vsize = 0;
-+      if (ideal_reg() == Op_VecA) {
-+        vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
-+      } else {
-+        ShouldNotReachHere();
-+      }
-+      st->print("\t# vector spill size = %d", vsize);
-+    } else {
-+      st->print("\t# spill size = %d", is64 ? 64 : 32);
-+    }
-+  }
++bool NativeInstruction::is_safepoint_poll() {
++  return is_lwu_to_zr(address(this));
++}
 +
-+  return 0;
++bool NativeInstruction::is_lwu_to_zr(address instr) {
++  assert_cond(instr != NULL);
++  return (extract_opcode(instr) == 0b0000011 &&
++          extract_funct3(instr) == 0b110 &&
++          extract_rd(instr) == zr);         // zr
 +}
 +
-+#ifndef PRODUCT
-+void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-+  if (ra_ == NULL) {
-+    st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
-+  } else {
-+    implementation(NULL, ra_, false, st);
-+  }
++// A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction.
++bool NativeInstruction::is_sigill_zombie_not_entrant() {
++  // jvmci
++  return uint_at(0) == 0xffffffff;
 +}
-+#endif
 +
-+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-+  implementation(&cbuf, ra_, false, NULL);
++void NativeIllegalInstruction::insert(address code_pos) {
++  assert_cond(code_pos != NULL);
++  *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction
 +}
 +
-+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
-+  return MachNode::size(ra_);
++bool NativeInstruction::is_stop() {
++  return uint_at(0) == 0xffffffff; // an illegal instruction
 +}
 +
-+//=============================================================================
++//-------------------------------------------------------------------
 +
-+#ifndef PRODUCT
-+void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-+  assert_cond(ra_ != NULL && st != NULL);
-+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
-+  int reg = ra_->get_reg_first(this);
-+  st->print("add %s, sp, #%d\t# box lock",
-+            Matcher::regName[reg], offset);
-+}
-+#endif
++// MT-safe inserting of a jump over a jump or a nop (used by
++// nmethod::make_not_entrant_or_zombie)
 +
-+void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-+  MacroAssembler _masm(&cbuf);
++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
 +
-+  assert_cond(ra_ != NULL);
-+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
-+  int reg    = ra_->get_encode(this);
++  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
 +
-+  if (is_imm_in_range(offset, 12, 0)) {
-+    __ addi(as_Register(reg), sp, offset);
-+  } else if (is_imm_in_range(offset, 32, 0)) {
-+    __ li32(t0, offset);
-+    __ add(as_Register(reg), sp, t0);
-+  } else {
-+    ShouldNotReachHere();
-+  }
-+}
++  assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() ||
++         nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(),
++         "riscv cannot replace non-jump with jump");
 +
-+uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
-+  // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
-+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  // Patch this nmethod atomically.
++  if (Assembler::reachable_from_branch_at(verified_entry, dest)) {
++    ptrdiff_t offset = dest - verified_entry;
++    guarantee(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction."); // 1M
 +
-+  if (is_imm_in_range(offset, 12, 0)) {
-+    return NativeInstruction::instruction_size;
++    uint32_t insn = 0;
++    address pInsn = (address)&insn;
++    Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
++    Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
++    Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
++    Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
++    Assembler::patch(pInsn, 11, 7, 0); // zero, no link jump
++    Assembler::patch(pInsn, 6, 0, 0b1101111); // j, (jal x0 offset)
++    *(unsigned int*)verified_entry = insn;
 +  } else {
-+    return 3 * NativeInstruction::instruction_size; // lui + addiw + add;
++    // We use an illegal instruction for marking a method as
++    // not_entrant or zombie.
++    NativeIllegalInstruction::insert(verified_entry);
 +  }
++
++  ICache::invalidate_range(verified_entry, instruction_size);
 +}
 +
-+//=============================================================================
++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
++  CodeBuffer cb(code_pos, instruction_size);
++  MacroAssembler a(&cb);
 +
-+#ifndef PRODUCT
-+void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
-+{
-+  assert_cond(st != NULL);
-+  st->print_cr("# MachUEPNode");
-+  if (UseCompressedClassPointers) {
-+    st->print_cr("\tlw t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
-+    if (Universe::narrow_klass_shift() != 0) {
-+      st->print_cr("\tdecode_klass_not_null t0, t0");
-+    }
-+  } else {
-+   st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
-+  }
-+  st->print_cr("\tbne x10, t0, SharedRuntime::_ic_miss_stub\t # Inline cache check");
++  int32_t offset = 0;
++  a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli
++  a.jalr(x0, t0, offset); // jalr
++
++  ICache::invalidate_range(code_pos, instruction_size);
 +}
-+#endif
 +
-+void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
-+{
-+  // This is the unverified entry point.
-+  MacroAssembler _masm(&cbuf);
++// MT-safe patching of a long jump instruction.
++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
++  ShouldNotCallThis();
++}
 +
-+  Label skip;
-+  __ cmp_klass(j_rarg0, t1, t0, skip);
-+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
-+  __ bind(skip);
++
++address NativeCallTrampolineStub::destination(nmethod *nm) const {
++  return ptr_at(data_offset);
 +}
 +
-+uint MachUEPNode::size(PhaseRegAlloc* ra_) const
-+{
-+  assert_cond(ra_ != NULL);
-+  return MachNode::size(ra_);
++void NativeCallTrampolineStub::set_destination(address new_destination) {
++  set_ptr_at(data_offset, new_destination);
++  OrderAccess::release();
 +}
 +
-+// REQUIRED EMIT CODE
++uint32_t NativeMembar::get_kind() {
++  uint32_t insn = uint_at(0);
 +
-+//=============================================================================
++  uint32_t predecessor = Assembler::extract(insn, 27, 24);
++  uint32_t successor = Assembler::extract(insn, 23, 20);
 +
-+// Emit exception handler code.
-+int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
-+{
-+  // la_patchable t0, #exception_blob_entry_point
-+  // jr (offset)t0
-+  // or
-+  // j #exception_blob_entry_point
-+  // Note that the code buffer's insts_mark is always relative to insts.
-+  // That's why we must use the macroassembler to generate a handler.
-+  MacroAssembler _masm(&cbuf);
-+  address base = __ start_a_stub(size_exception_handler());
-+  if (base == NULL) {
-+    ciEnv::current()->record_failure("CodeCache is full");
-+    return 0;  // CodeBuffer::expand failed
-+  }
-+  int offset = __ offset();
-+  __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
-+  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
-+  __ end_a_stub();
-+  return offset;
++  return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor);
 +}
 +
-+// Emit deopt handler code.
-+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
-+{
-+  // Note that the code buffer's insts_mark is always relative to insts.
-+  // That's why we must use the macroassembler to generate a handler.
-+  MacroAssembler _masm(&cbuf);
-+  address base = __ start_a_stub(size_deopt_handler());
-+  if (base == NULL) {
-+    ciEnv::current()->record_failure("CodeCache is full");
-+    return 0;  // CodeBuffer::expand failed
-+  }
-+  int offset = __ offset();
++void NativeMembar::set_kind(uint32_t order_kind) {
++  uint32_t predecessor = 0;
++  uint32_t successor = 0;
 +
-+  __ auipc(ra, 0);
-+  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
++  MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor);
 +
-+  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
-+  __ end_a_stub();
-+  return offset;
++  uint32_t insn = uint_at(0);
++  address pInsn = (address) &insn;
++  Assembler::patch(pInsn, 27, 24, predecessor);
++  Assembler::patch(pInsn, 23, 20, successor);
 +
++  address membar = addr_at(0);
++  *(unsigned int*) membar = insn;
 +}
-+// REQUIRED MATCHER CODE
+diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
+new file mode 100644
+index 00000000000..718b2e3de6c
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
+@@ -0,0 +1,572 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+//=============================================================================
++#ifndef CPU_RISCV_NATIVEINST_RISCV_HPP
++#define CPU_RISCV_NATIVEINST_RISCV_HPP
 +
-+const bool Matcher::match_rule_supported(int opcode) {
-+  if (!has_match_rule(opcode)) {
-+    return false;
-+  }
++#include "asm/assembler.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/os.hpp"
 +
-+  switch (opcode) {
-+    case Op_StrCompressedCopy: // fall through
-+    case Op_StrInflatedCopy:   // fall through
-+    case Op_HasNegatives:
-+      return UseRVV;
-+    case Op_EncodeISOArray:
-+      return UseRVV && SpecialEncodeISOArray;
-+    case Op_PopCountI:
-+    case Op_PopCountL:
-+      return UsePopCountInstruction;
-+    case Op_CountLeadingZerosI:
-+    case Op_CountLeadingZerosL:
-+    case Op_CountTrailingZerosI:
-+    case Op_CountTrailingZerosL:
-+      return UseZbb;
-+  }
++// We have interfaces for the following instructions:
++// - NativeInstruction
++// - - NativeCall
++// - - NativeMovConstReg
++// - - NativeMovRegMem
++// - - NativeJump
++// - - NativeGeneralJump
++// - - NativeIllegalInstruction
++// - - NativeCallTrampolineStub
++// - - NativeMembar
++// - - NativeFenceI
 +
-+  return true; // Per default match rules are supported.
-+}
++// The base class for different kinds of native instruction abstractions.
++// Provides the primitive operations to manipulate code relative to this.
 +
-+// Identify extra cases that we might want to provide match rules for vector nodes and
-+// other intrinsics guarded with vector length (vlen).
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
-+  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
-+    return false;
++class NativeCall;
++
++class NativeInstruction {
++  friend class Relocation;
++  friend bool is_NativeCallTrampolineStub_at(address);
++ public:
++  enum {
++    instruction_size = 4,
++    compressed_instruction_size = 2,
++  };
++
++  juint encoding() const {
++    return uint_at(0);
 +  }
 +
-+  return op_vec_supported(opcode);
-+}
++  bool is_jal()                             const { return is_jal_at(addr_at(0));         }
++  bool is_movptr()                          const { return is_movptr_at(addr_at(0));      }
++  bool is_call()                            const { return is_call_at(addr_at(0));        }
++  bool is_jump()                            const { return is_jump_at(addr_at(0));        }
 +
-+const bool Matcher::has_predicated_vectors(void) {
-+  return false;  // not supported
++  static bool is_jal_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; }
++  static bool is_jalr_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
++  static bool is_branch_at(address instr)     { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; }
++  static bool is_ld_at(address instr)         { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
++  static bool is_load_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; }
++  static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; }
++  static bool is_auipc_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; }
++  static bool is_jump_at(address instr)       { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
++  static bool is_addi_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
++  static bool is_addiw_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
++  static bool is_lui_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; }
++  static bool is_slli_shift_at(address instr, uint32_t shift) {
++    assert_cond(instr != NULL);
++    return (extract_opcode(instr) == 0b0010011 && // opcode field
++            extract_funct3(instr) == 0b001 &&     // funct3 field, select the type of operation
++            Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift);    // shamt field
++  }
 +
-+}
++  static Register extract_rs1(address instr);
++  static Register extract_rs2(address instr);
++  static Register extract_rd(address instr);
++  static uint32_t extract_opcode(address instr);
++  static uint32_t extract_funct3(address instr);
 +
-+const int Matcher::float_pressure(int default_pressure_threshold) {
-+  return default_pressure_threshold;
-+}
++  // the instruction sequence of movptr is as below:
++  //     lui
++  //     addi
++  //     slli
++  //     addi
++  //     slli
++  //     addi/jalr/load
++  static bool check_movptr_data_dependency(address instr) {
++    address lui = instr;
++    address addi1 = lui + instruction_size;
++    address slli1 = addi1 + instruction_size;
++    address addi2 = slli1 + instruction_size;
++    address slli2 = addi2 + instruction_size;
++    address last_instr = slli2 + instruction_size;
++    return extract_rs1(addi1) == extract_rd(lui) &&
++           extract_rs1(addi1) == extract_rd(addi1) &&
++           extract_rs1(slli1) == extract_rd(addi1) &&
++           extract_rs1(slli1) == extract_rd(slli1) &&
++           extract_rs1(addi2) == extract_rd(slli1) &&
++           extract_rs1(addi2) == extract_rd(addi2) &&
++           extract_rs1(slli2) == extract_rd(addi2) &&
++           extract_rs1(slli2) == extract_rd(slli2) &&
++           extract_rs1(last_instr) == extract_rd(slli2);
++  }
 +
-+int Matcher::regnum_to_fpu_offset(int regnum)
-+{
-+  Unimplemented();
-+  return 0;
-+}
++  // the instruction sequence of li64 is as below:
++  //     lui
++  //     addi
++  //     slli
++  //     addi
++  //     slli
++  //     addi
++  //     slli
++  //     addi
++  static bool check_li64_data_dependency(address instr) {
++    address lui = instr;
++    address addi1 = lui + instruction_size;
++    address slli1 = addi1 + instruction_size;
++    address addi2 = slli1 + instruction_size;
++    address slli2 = addi2 + instruction_size;
++    address addi3 = slli2 + instruction_size;
++    address slli3 = addi3 + instruction_size;
++    address addi4 = slli3 + instruction_size;
++    return extract_rs1(addi1) == extract_rd(lui) &&
++           extract_rs1(addi1) == extract_rd(addi1) &&
++           extract_rs1(slli1) == extract_rd(addi1) &&
++           extract_rs1(slli1) == extract_rd(slli1) &&
++           extract_rs1(addi2) == extract_rd(slli1) &&
++           extract_rs1(addi2) == extract_rd(addi2) &&
++           extract_rs1(slli2) == extract_rd(addi2) &&
++           extract_rs1(slli2) == extract_rd(slli2) &&
++           extract_rs1(addi3) == extract_rd(slli2) &&
++           extract_rs1(addi3) == extract_rd(addi3) &&
++           extract_rs1(slli3) == extract_rd(addi3) &&
++           extract_rs1(slli3) == extract_rd(slli3) &&
++           extract_rs1(addi4) == extract_rd(slli3) &&
++           extract_rs1(addi4) == extract_rd(addi4);
++  }
 +
-+// Is this branch offset short enough that a short branch can be used?
-+//
-+// NOTE: If the platform does not provide any short branch variants, then
-+//       this method should return false for offset 0.
-+// |---label(L1)-----|
-+// |-----------------|
-+// |-----------------|----------eq: float-------------------
-+// |-----------------| // far_cmpD_branch   |   cmpD_branch
-+// |------- ---------|    feq;              |      feq;
-+// |-far_cmpD_branch-|    beqz done;        |      bnez L;
-+// |-----------------|    j L;              |
-+// |-----------------|    bind(done);       |
-+// |-----------------|--------------------------------------
-+// |-----------------| // so shortBrSize = br_size - 4;
-+// |-----------------| // so offs = offset - shortBrSize + 4;
-+// |---label(L2)-----|
-+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
-+  // The passed offset is relative to address of the branch.
-+  int shortBrSize = br_size - 4;
-+  int offs = offset - shortBrSize + 4;
-+  return (-4096 <= offs && offs < 4096);
-+}
++  // the instruction sequence of li32 is as below:
++  //     lui
++  //     addiw
++  static bool check_li32_data_dependency(address instr) {
++    address lui = instr;
++    address addiw = lui + instruction_size;
 +
-+const bool Matcher::isSimpleConstant64(jlong value) {
-+  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
-+  // Probably always true, even if a temp register is required.
-+  return true;
-+}
++    return extract_rs1(addiw) == extract_rd(lui) &&
++           extract_rs1(addiw) == extract_rd(addiw);
++  }
 +
-+// true just means we have fast l2f conversion
-+const bool Matcher::convL2FSupported(void) {
-+  return true;
-+}
++  // the instruction sequence of pc-relative is as below:
++  //     auipc
++  //     jalr/addi/load/float_load
++  static bool check_pc_relative_data_dependency(address instr) {
++    address auipc = instr;
++    address last_instr = auipc + instruction_size;
 +
-+// Vector width in bytes.
-+const int Matcher::vector_width_in_bytes(BasicType bt) {
-+  if (UseRVV) {
-+    // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV.
-+    // MaxVectorSize == VM_Version::_initial_vector_length
-+    return MaxVectorSize;
++    return extract_rs1(last_instr) == extract_rd(auipc);
 +  }
-+  return 0;
-+}
 +
-+// Limits on vector size (number of elements) loaded into vector.
-+const int Matcher::max_vector_size(const BasicType bt) {
-+  return vector_width_in_bytes(bt) / type2aelembytes(bt);
-+}
-+const int Matcher::min_vector_size(const BasicType bt) {
-+  return max_vector_size(bt);
-+}
++  // the instruction sequence of load_label is as below:
++  //     auipc
++  //     load
++  static bool check_load_pc_relative_data_dependency(address instr) {
++    address auipc = instr;
++    address load = auipc + instruction_size;
 +
-+// Vector ideal reg.
-+const uint Matcher::vector_ideal_reg(int len) {
-+  assert(MaxVectorSize >= len, "");
-+  if (UseRVV) {
-+    return Op_VecA;
++    return extract_rd(load) == extract_rd(auipc) &&
++           extract_rs1(load) == extract_rd(load);
 +  }
 +
-+  ShouldNotReachHere();
-+  return 0;
-+}
++  static bool is_movptr_at(address instr);
++  static bool is_li32_at(address instr);
++  static bool is_li64_at(address instr);
++  static bool is_pc_relative_at(address branch);
++  static bool is_load_pc_relative_at(address branch);
 +
-+const uint Matcher::vector_shift_count_ideal_reg(int size) {
-+  switch(size) {
-+    case  8: return Op_VecD;
-+    case 16: return Op_VecX;
-+    default:
-+      if (size == vector_width_in_bytes(T_BYTE)) {
-+        return Op_VecA;
-+      }
++  static bool is_call_at(address instr) {
++    if (is_jal_at(instr) || is_jalr_at(instr)) {
++      return true;
++    }
++    return false;
 +  }
-+  ShouldNotReachHere();
-+  return 0;
-+}
++  static bool is_lwu_to_zr(address instr);
 +
-+const bool Matcher::supports_scalable_vector() {
-+  return UseRVV;
-+}
++  inline bool is_nop();
++  inline bool is_jump_or_nop();
++  bool is_safepoint_poll();
++  bool is_sigill_zombie_not_entrant();
++  bool is_stop();
 +
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return Matcher::max_vector_size(bt);
-+}
++ protected:
++  address addr_at(int offset) const    { return address(this) + offset; }
 +
-+// AES support not yet implemented
-+const bool Matcher::pass_original_key_for_aes() {
-+  return false;
-+}
++  jint int_at(int offset) const        { return *(jint*) addr_at(offset); }
++  juint uint_at(int offset) const      { return *(juint*) addr_at(offset); }
 +
-+// riscv supports misaligned vectors store/load.
-+const bool Matcher::misaligned_vectors_ok() {
-+  return true;
-+}
++  address ptr_at(int offset) const     { return *(address*) addr_at(offset); }
 +
-+// false => size gets scaled to BytesPerLong, ok.
-+const bool Matcher::init_array_count_is_in_bytes = false;
++  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
 +
-+// Use conditional move (CMOVL)
-+const int Matcher::long_cmove_cost() {
-+  // long cmoves are no more expensive than int cmoves
-+  return 0;
-+}
 +
-+const int Matcher::float_cmove_cost() {
-+  // float cmoves are no more expensive than int cmoves
-+  return 0;
-+}
++  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i; }
++  void set_uint_at(int offset, jint  i)       { *(juint*)addr_at(offset) = i; }
++  void set_ptr_at (int offset, address  ptr)  { *(address*) addr_at(offset) = ptr; }
++  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o; }
 +
-+// Does the CPU require late expand (see block.cpp for description of late expand)?
-+const bool Matcher::require_postalloc_expand = false;
++ public:
 +
-+// Do we need to mask the count passed to shift instructions or does
-+// the cpu only look at the lower 5/6 bits anyway?
-+const bool Matcher::need_masked_shift_count = false;
++  inline friend NativeInstruction* nativeInstruction_at(address addr);
 +
-+// This affects two different things:
-+//  - how Decode nodes are matched
-+//  - how ImplicitNullCheck opportunities are recognized
-+// If true, the matcher will try to remove all Decodes and match them
-+// (as operands) into nodes. NullChecks are not prepared to deal with
-+// Decodes by final_graph_reshaping().
-+// If false, final_graph_reshaping() forces the decode behind the Cmp
-+// for a NullCheck. The matcher matches the Decode node into a register.
-+// Implicit_null_check optimization moves the Decode along with the
-+// memory operation back up before the NullCheck.
-+bool Matcher::narrow_oop_use_complex_address() {
-+  return Universe::narrow_oop_shift() == 0;
-+}
++  static bool maybe_cpool_ref(address instr) {
++    return is_auipc_at(instr);
++  }
 +
-+bool Matcher::narrow_klass_use_complex_address() {
-+// TODO
-+// decide whether we need to set this to true
-+  return false;
-+}
++  bool is_membar() {
++    return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0;
++  }
++};
 +
-+bool Matcher::const_oop_prefer_decode() {
-+  // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
-+  return Universe::narrow_oop_base() == NULL;
++inline NativeInstruction* nativeInstruction_at(address addr) {
++  return (NativeInstruction*)addr;
 +}
 +
-+bool Matcher::const_klass_prefer_decode() {
-+  // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
-+  return Universe::narrow_klass_base() == NULL;
++// The natural type of an RISCV instruction is uint32_t
++inline NativeInstruction* nativeInstruction_at(uint32_t *addr) {
++  return (NativeInstruction*)addr;
 +}
 +
-+// Is it better to copy float constants, or load them directly from
-+// memory?  Intel can load a float constant from a direct address,
-+// requiring no extra registers.  Most RISCs will have to materialize
-+// an address into a register first, so they would do better to copy
-+// the constant from stack.
-+const bool Matcher::rematerialize_float_constants = false;
++inline NativeCall* nativeCall_at(address addr);
++// The NativeCall is an abstraction for accessing/manipulating native
++// call instructions (used to manipulate inline caches, primitive &
++// DSO calls, etc.).
 +
-+// If CPU can load and store mis-aligned doubles directly then no
-+// fixup is needed.  Else we split the double into 2 integer pieces
-+// and move it piece-by-piece.  Only happens when passing doubles into
-+// C code as the Java calling convention forces doubles to be aligned.
-+const bool Matcher::misaligned_doubles_ok = true;
++class NativeCall: public NativeInstruction {
++ public:
++  enum RISCV_specific_constants {
++    instruction_size            =    4,
++    instruction_offset          =    0,
++    displacement_offset         =    0,
++    return_address_offset       =    4
++  };
 +
-+// No-op on amd64
-+void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
-+  Unimplemented();
-+}
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  { return addr_at(return_address_offset); }
++  address return_address() const            { return addr_at(return_address_offset); }
++  address destination() const;
 +
-+// Advertise here if the CPU requires explicit rounding operations to
-+// implement the UseStrictFP mode.
-+const bool Matcher::strict_fp_requires_explicit_rounding = false;
++  void set_destination(address dest) {
++    assert(is_jal(), "Should be jal instruction!");
++    intptr_t offset = (intptr_t)(dest - instruction_address());
++    assert((offset & 0x1) == 0, "bad alignment");
++    assert(is_imm_in_range(offset, 20, 1), "encoding constraint");
++    unsigned int insn = 0b1101111; // jal
++    address pInsn = (address)(&insn);
++    Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
++    Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
++    Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
++    Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
++    Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra
++    set_int_at(displacement_offset, insn);
++  }
 +
-+// Are floats converted to double when stored to stack during
-+// deoptimization?
-+bool Matcher::float_in_double() { return false; }
++  void verify_alignment() {} // do nothing on riscv
++  void verify();
++  void print();
 +
-+// Do ints take an entire long register or just half?
-+// The relevant question is how the int is callee-saved:
-+// the whole long is written but de-opt'ing will have to extract
-+// the relevant 32 bits.
-+const bool Matcher::int_in_long = true;
++  // Creation
++  inline friend NativeCall* nativeCall_at(address addr);
++  inline friend NativeCall* nativeCall_before(address return_address);
 +
-+// Return whether or not this register is ever used as an argument.
-+// This function is used on startup to build the trampoline stubs in
-+// generateOptoStub.  Registers not mentioned will be killed by the VM
-+// call in the trampoline, and arguments in those registers not be
-+// available to the callee.
-+bool Matcher::can_be_java_arg(int reg)
-+{
-+  return
-+    reg ==  R10_num || reg == R10_H_num ||
-+    reg ==  R11_num || reg == R11_H_num ||
-+    reg ==  R12_num || reg == R12_H_num ||
-+    reg ==  R13_num || reg == R13_H_num ||
-+    reg ==  R14_num || reg == R14_H_num ||
-+    reg ==  R15_num || reg == R15_H_num ||
-+    reg ==  R16_num || reg == R16_H_num ||
-+    reg ==  R17_num || reg == R17_H_num ||
-+    reg ==  F10_num || reg == F10_H_num ||
-+    reg ==  F11_num || reg == F11_H_num ||
-+    reg ==  F12_num || reg == F12_H_num ||
-+    reg ==  F13_num || reg == F13_H_num ||
-+    reg ==  F14_num || reg == F14_H_num ||
-+    reg ==  F15_num || reg == F15_H_num ||
-+    reg ==  F16_num || reg == F16_H_num ||
-+    reg ==  F17_num || reg == F17_H_num;
-+}
++  static bool is_call_before(address return_address) {
++    return is_call_at(return_address - NativeCall::return_address_offset);
++  }
 +
-+bool Matcher::is_spillable_arg(int reg)
-+{
-+  return can_be_java_arg(reg);
-+}
++  // MT-safe patching of a call instruction.
++  static void insert(address code_pos, address entry);
 +
-+bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
-+  return false;
-+}
++  static void replace_mt_safe(address instr_addr, address code_buffer);
 +
-+RegMask Matcher::divI_proj_mask() {
-+  ShouldNotReachHere();
-+  return RegMask();
-+}
++  // Similar to replace_mt_safe, but just changes the destination.  The
++  // important thing is that free-running threads are able to execute
++  // this call instruction at all times.  If the call is an immediate BL
++  // instruction we can simply rely on atomicity of 32-bit writes to
++  // make sure other threads will see no intermediate states.
 +
-+// Register for MODI projection of divmodI.
-+RegMask Matcher::modI_proj_mask() {
-+  ShouldNotReachHere();
-+  return RegMask();
-+}
++  // We cannot rely on locks here, since the free-running threads must run at
++  // full speed.
++  //
++  // Used in the runtime linkage of calls; see class CompiledIC.
++  // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
 +
-+// Register for DIVL projection of divmodL.
-+RegMask Matcher::divL_proj_mask() {
-+  ShouldNotReachHere();
-+  return RegMask();
-+}
++  // The parameter assert_lock disables the assertion during code generation.
++  void set_destination_mt_safe(address dest, bool assert_lock = true);
 +
-+// Register for MODL projection of divmodL.
-+RegMask Matcher::modL_proj_mask() {
-+  ShouldNotReachHere();
-+  return RegMask();
++  address get_trampoline();
++};
++
++inline NativeCall* nativeCall_at(address addr) {
++  assert_cond(addr != NULL);
++  NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
 +}
 +
-+const RegMask Matcher::method_handle_invoke_SP_save_mask() {
-+  return FP_REG_mask();
++inline NativeCall* nativeCall_before(address return_address) {
++  assert_cond(return_address != NULL);
++  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
 +}
 +
-+bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
-+  assert_cond(addp != NULL);
-+  for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
-+    Node* u = addp->fast_out(i);
-+    if (u != NULL && u->is_Mem()) {
-+      int opsize = u->as_Mem()->memory_size();
-+      assert(opsize > 0, "unexpected memory operand size");
-+      if (u->as_Mem()->memory_size() != (1 << shift)) {
-+        return false;
++// An interface for accessing/manipulating native mov reg, imm instructions.
++// (used to manipulate inlined 64-bit data calls, etc.)
++class NativeMovConstReg: public NativeInstruction {
++ public:
++  enum RISCV_specific_constants {
++    movptr_instruction_size             =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi.  See movptr().
++    movptr_with_offset_instruction_size =    5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset().
++    load_pc_relative_instruction_size   =    2 * NativeInstruction::instruction_size, // auipc, ld
++    instruction_offset                  =    0,
++    displacement_offset                 =    0
++  };
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  {
++    // if the instruction at 5 * instruction_size is addi,
++    // it means a lui + addi + slli + addi + slli + addi instruction sequence,
++    // and the next instruction address should be addr_at(6 * instruction_size).
++    // However, when the instruction at 5 * instruction_size isn't addi,
++    // the next instruction address should be addr_at(5 * instruction_size)
++    if (nativeInstruction_at(instruction_address())->is_movptr()) {
++      if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) {
++        // Assume: lui, addi, slli, addi, slli, addi
++        return addr_at(movptr_instruction_size);
++      } else {
++        // Assume: lui, addi, slli, addi, slli
++        return addr_at(movptr_with_offset_instruction_size);
 +      }
++    } else if (is_load_pc_relative_at(instruction_address())) {
++      // Assume: auipc, ld
++      return addr_at(load_pc_relative_instruction_size);
 +    }
++    guarantee(false, "Unknown instruction in NativeMovConstReg");
++    return NULL;
 +  }
-+  return true;
-+}
-+
-+const bool Matcher::convi2l_type_required = false;
 +
-+// Should the Matcher clone shifts on addressing modes, expecting them
-+// to be subsumed into complex addressing expressions or compute them
-+// into registers?
-+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
-+  assert_cond(m != NULL);
-+  if (clone_base_plus_offset_address(m, mstack, address_visited)) {
-+    return true;
-+  }
++  intptr_t data() const;
++  void  set_data(intptr_t x);
 +
-+  Node *off = m->in(AddPNode::Offset);
-+  if (off != NULL && off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
-+      size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
-+      // Are there other uses besides address expressions?
-+      !is_visited(off)) {
-+    address_visited.set(off->_idx); // Flag as address_visited
-+    mstack.push(off->in(2), Visit);
-+    Node *conv = off->in(1);
-+    if (conv->Opcode() == Op_ConvI2L &&
-+        // Are there other uses besides address expressions?
-+        !is_visited(conv)) {
-+      address_visited.set(conv->_idx); // Flag as address_visited
-+      mstack.push(conv->in(1), Pre_Visit);
-+    } else {
-+      mstack.push(conv, Pre_Visit);
++  void flush() {
++    if (!maybe_cpool_ref(instruction_address())) {
++      ICache::invalidate_range(instruction_address(), movptr_instruction_size);
 +    }
-+    address_visited.test_set(m->_idx); // Flag as address_visited
-+    mstack.push(m->in(AddPNode::Address), Pre_Visit);
-+    mstack.push(m->in(AddPNode::Base), Pre_Visit);
-+    return true;
-+  } else if (off != NULL && off->Opcode() == Op_ConvI2L &&
-+             // Are there other uses besides address expressions?
-+             !is_visited(off)) {
-+    address_visited.test_set(m->_idx); // Flag as address_visited
-+    address_visited.set(off->_idx); // Flag as address_visited
-+    mstack.push(off->in(1), Pre_Visit);
-+    mstack.push(m->in(AddPNode::Address), Pre_Visit);
-+    mstack.push(m->in(AddPNode::Base), Pre_Visit);
-+    return true;
 +  }
-+  return false;
-+}
-+
-+void Compile::reshape_address(AddPNode* addp) {
-+}
-+
-+%}
 +
++  void  verify();
++  void  print();
 +
++  // Creation
++  inline friend NativeMovConstReg* nativeMovConstReg_at(address addr);
++  inline friend NativeMovConstReg* nativeMovConstReg_before(address addr);
++};
 +
-+//----------ENCODING BLOCK-----------------------------------------------------
-+// This block specifies the encoding classes used by the compiler to
-+// output byte streams.  Encoding classes are parameterized macros
-+// used by Machine Instruction Nodes in order to generate the bit
-+// encoding of the instruction.  Operands specify their base encoding
-+// interface with the interface keyword.  There are currently
-+// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
-+// COND_INTER.  REG_INTER causes an operand to generate a function
-+// which returns its register number when queried.  CONST_INTER causes
-+// an operand to generate a function which returns the value of the
-+// constant when queried.  MEMORY_INTER causes an operand to generate
-+// four functions which return the Base Register, the Index Register,
-+// the Scale Value, and the Offset Value of the operand when queried.
-+// COND_INTER causes an operand to generate six functions which return
-+// the encoding code (ie - encoding bits for the instruction)
-+// associated with each basic boolean condition for a conditional
-+// instruction.
-+//
-+// Instructions specify two basic values for encoding.  Again, a
-+// function is available to check if the constant displacement is an
-+// oop. They use the ins_encode keyword to specify their encoding
-+// classes (which must be a sequence of enc_class names, and their
-+// parameters, specified in the encoding block), and they use the
-+// opcode keyword to specify, in order, their primary, secondary, and
-+// tertiary opcode.  Only the opcode sections which a particular
-+// instruction needs for encoding need to be specified.
-+encode %{
-+  // BEGIN Non-volatile memory access
++inline NativeMovConstReg* nativeMovConstReg_at(address addr) {
++  assert_cond(addr != NULL);
++  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
 +
-+  enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{
-+    MacroAssembler _masm(&cbuf);
-+    int64_t con = (int64_t)$src$$constant;
-+    Register dst_reg = as_Register($dst$$reg);
-+    __ mv(dst_reg, con);
-+  %}
++inline NativeMovConstReg* nativeMovConstReg_before(address addr) {
++  assert_cond(addr != NULL);
++  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
 +
-+  enc_class riscv_enc_mov_p(iRegP dst, immP src) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    address con = (address)$src$$constant;
-+    if (con == NULL || con == (address)1) {
-+      ShouldNotReachHere();
-+    } else {
-+      relocInfo::relocType rtype = $src->constant_reloc();
-+      if (rtype == relocInfo::oop_type) {
-+        __ movoop(dst_reg, (jobject)con, /*immediate*/true);
-+      } else if (rtype == relocInfo::metadata_type) {
-+        __ mov_metadata(dst_reg, (Metadata*)con);
-+      } else {
-+        assert(rtype == relocInfo::none, "unexpected reloc type");
-+        __ mv(dst_reg, $src$$constant);
-+      }
-+    }
-+  %}
++// RISCV should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented.
++class NativeMovRegMem: public NativeInstruction {
++ public:
++  int instruction_start() const {
++    Unimplemented();
++    return 0;
++  }
 +
-+  enc_class riscv_enc_mov_p1(iRegP dst) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    __ mv(dst_reg, 1);
-+  %}
++  address instruction_address() const {
++    Unimplemented();
++    return NULL;
++  }
 +
-+  enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{
-+    MacroAssembler _masm(&cbuf);
-+    int32_t offset = 0;
-+    address page = (address)$src$$constant;
-+    unsigned long align = (unsigned long)page & 0xfff;
-+    assert(align == 0, "polling page must be page aligned");
-+    Register dst_reg = as_Register($dst$$reg);
-+    __ la_patchable(dst_reg, Address(page, relocInfo::poll_type), offset);
-+    __ addi(dst_reg, dst_reg, offset);
-+  %}
++  int num_bytes_to_end_of_patch() const {
++    Unimplemented();
++    return 0;
++  }
 +
-+  enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
-+    MacroAssembler _masm(&cbuf);
-+    __ load_byte_map_base($dst$$Register);
-+  %}
++  int offset() const;
 +
-+  enc_class riscv_enc_mov_n(iRegN dst, immN src) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    address con = (address)$src$$constant;
-+    if (con == NULL) {
-+      ShouldNotReachHere();
-+    } else {
-+      relocInfo::relocType rtype = $src->constant_reloc();
-+      assert(rtype == relocInfo::oop_type, "unexpected reloc type");
-+      __ set_narrow_oop(dst_reg, (jobject)con);
-+    }
-+  %}
++  void set_offset(int x);
 +
-+  enc_class riscv_enc_mov_zero(iRegNorP dst) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    __ mv(dst_reg, zr);
-+  %}
++  void add_offset_in_bytes(int add_offset) { Unimplemented(); }
 +
-+  enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    address con = (address)$src$$constant;
-+    if (con == NULL) {
-+      ShouldNotReachHere();
-+    } else {
-+      relocInfo::relocType rtype = $src->constant_reloc();
-+      assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
-+      __ set_narrow_klass(dst_reg, (Klass *)con);
-+    }
-+  %}
++  void verify();
++  void print();
 +
-+  enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-+    MacroAssembler _masm(&cbuf);
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+               /*result as bool*/ true);
-+  %}
++ private:
++  inline friend NativeMovRegMem* nativeMovRegMem_at (address addr);
++};
 +
-+  enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-+    MacroAssembler _masm(&cbuf);
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+               /*result as bool*/ true);
-+  %}
++inline NativeMovRegMem* nativeMovRegMem_at (address addr) {
++  Unimplemented();
++  return NULL;
++}
 +
-+  enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
-+    MacroAssembler _masm(&cbuf);
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+               /*result as bool*/ true);
-+  %}
++class NativeJump: public NativeInstruction {
++ public:
++  enum RISCV_specific_constants {
++    instruction_size            =    NativeInstruction::instruction_size,
++    instruction_offset          =    0,
++    data_offset                 =    0,
++    next_instruction_offset     =    NativeInstruction::instruction_size
++  };
 +
-+  enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-+    MacroAssembler _masm(&cbuf);
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+               /*result as bool*/ true);
-+  %}
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  { return addr_at(instruction_size); }
++  address jump_destination() const;
++  void set_jump_destination(address dest);
 +
-+  enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-+    MacroAssembler _masm(&cbuf);
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+               /*result as bool*/ true);
-+  %}
++  // Creation
++  inline friend NativeJump* nativeJump_at(address address);
 +
-+  enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
-+    MacroAssembler _masm(&cbuf);
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+               /*result as bool*/ true);
-+  %}
++  void verify();
 +
-+  // compare and branch instruction encodings
++  // Insertion of native jump instruction
++  static void insert(address code_pos, address entry);
++  // MT-safe insertion of native jump at verified method entry
++  static void check_verified_entry_alignment(address entry, address verified_entry);
++  static void patch_verified_entry(address entry, address verified_entry, address dest);
++};
 +
-+  enc_class riscv_enc_j(label lbl) %{
-+    MacroAssembler _masm(&cbuf);
-+    Label* L = $lbl$$label;
-+    __ j(*L);
-+  %}
++inline NativeJump* nativeJump_at(address addr) {
++  NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset);
++#ifdef ASSERT
++  jump->verify();
++#endif
++  return jump;
++}
 +
-+  enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
-+    MacroAssembler _masm(&cbuf);
-+    Label* L = $lbl$$label;
-+    switch($cmp$$cmpcode) {
-+      case(BoolTest::ge):
-+        __ j(*L);
-+        break;
-+      case(BoolTest::lt):
-+        break;
-+      default:
-+        Unimplemented();
-+    }
-+  %}
++class NativeGeneralJump: public NativeJump {
++public:
++  enum RISCV_specific_constants {
++    instruction_size            =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr
++    instruction_offset          =    0,
++    data_offset                 =    0,
++    next_instruction_offset     =    6 * NativeInstruction::instruction_size  // lui, addi, slli, addi, slli, jalr
++  };
 +
-+  // call instruction encodings
++  address jump_destination() const;
 +
-+  enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{
-+    Register sub_reg = as_Register($sub$$reg);
-+    Register super_reg = as_Register($super$$reg);
-+    Register temp_reg = as_Register($temp$$reg);
-+    Register result_reg = as_Register($result$$reg);
-+    Register cr_reg = t1;
++  static void insert_unconditional(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++};
 +
-+    Label miss;
-+    Label done;
-+    MacroAssembler _masm(&cbuf);
-+    __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
-+                                     NULL, &miss);
-+    if ($primary) {
-+      __ mv(result_reg, zr);
-+    } else {
-+      __ mv(cr_reg, zr);
-+      __ j(done);
-+    }
++inline NativeGeneralJump* nativeGeneralJump_at(address addr) {
++  assert_cond(addr != NULL);
++  NativeGeneralJump* jump = (NativeGeneralJump*)(addr);
++  debug_only(jump->verify();)
++  return jump;
++}
 +
-+    __ bind(miss);
-+    if (!$primary) {
-+      __ mv(cr_reg, 1);
-+    }
++class NativeIllegalInstruction: public NativeInstruction {
++ public:
++  // Insert illegal opcode as specific address
++  static void insert(address code_pos);
++};
 +
-+    __ bind(done);
-+  %}
++inline bool NativeInstruction::is_nop()         {
++  uint32_t insn = *(uint32_t*)addr_at(0);
++  return insn == 0x13;
++}
 +
-+  enc_class riscv_enc_java_static_call(method meth) %{
-+    MacroAssembler _masm(&cbuf);
++inline bool NativeInstruction::is_jump_or_nop() {
++  return is_nop() || is_jump();
++}
 +
-+    address addr = (address)$meth$$method;
-+    address call = NULL;
-+    assert_cond(addr != NULL);
-+    if (!_method) {
-+      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
-+      call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type));
-+      if (call == NULL) {
-+        ciEnv::current()->record_failure("CodeCache is full");
-+        return;
-+      }
-+    } else {
-+      int method_index = resolved_method_index(cbuf);
-+      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
-+                                                  : static_call_Relocation::spec(method_index);
-+      call = __ trampoline_call(Address(addr, rspec));
-+      if (call == NULL) {
-+        ciEnv::current()->record_failure("CodeCache is full");
-+        return;
-+      }
-+      // Emit stub for static call
-+      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, call);
-+      if (stub == NULL) {
-+        ciEnv::current()->record_failure("CodeCache is full");
-+        return;
-+      }
-+    }
++// Call trampoline stubs.
++class NativeCallTrampolineStub : public NativeInstruction {
++ public:
 +
-+  %}
++  enum RISCV_specific_constants {
++    // Refer to function emit_trampoline_stub.
++    instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address
++    data_offset      = 3 * NativeInstruction::instruction_size,            // auipc + ld + jr
++  };
 +
-+  enc_class riscv_enc_java_dynamic_call(method meth) %{
-+    MacroAssembler _masm(&cbuf);
-+    int method_index = resolved_method_index(cbuf);
-+    address call = __ ic_call((address)$meth$$method, method_index);
-+    if (call == NULL) {
-+      ciEnv::current()->record_failure("CodeCache is full");
-+      return;
-+    }
-+  %}
++  address destination(nmethod *nm = NULL) const;
++  void set_destination(address new_destination);
++  ptrdiff_t destination_offset() const;
++};
 +
-+  enc_class riscv_enc_call_epilog() %{
-+    MacroAssembler _masm(&cbuf);
-+    if (VerifyStackAtCalls) {
-+      // Check that stack depth is unchanged: find majik cookie on stack
-+      __ call_Unimplemented();
-+    }
-+  %}
++inline bool is_NativeCallTrampolineStub_at(address addr) {
++  // Ensure that the stub is exactly
++  //      ld   t0, L--->auipc + ld
++  //      jr   t0
++  // L:
 +
-+  enc_class riscv_enc_java_to_runtime(method meth) %{
-+    MacroAssembler _masm(&cbuf);
++  // judge inst + register + imm
++  // 1). check the instructions: auipc + ld + jalr
++  // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0
++  // 3). check if the offset in ld[31:20] equals the data_offset
++  assert_cond(addr != NULL);
++  const int instr_size = NativeInstruction::instruction_size;
++  if (NativeInstruction::is_auipc_at(addr) &&
++      NativeInstruction::is_ld_at(addr + instr_size) &&
++      NativeInstruction::is_jalr_at(addr + 2 * instr_size) &&
++      (NativeInstruction::extract_rd(addr)                    == x5) &&
++      (NativeInstruction::extract_rd(addr + instr_size)       == x5) &&
++      (NativeInstruction::extract_rs1(addr + instr_size)      == x5) &&
++      (NativeInstruction::extract_rs1(addr + 2 * instr_size)  == x5) &&
++      (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) {
++    return true;
++  }
++  return false;
++}
 +
-+    // some calls to generated routines (arraycopy code) are scheduled
-+    // by C2 as runtime calls. if so we can call them using a jr (they
-+    // will be in a reachable segment) otherwise we have to use a jalr
-+    // which loads the absolute address into a register.
-+    address entry = (address)$meth$$method;
-+    CodeBlob *cb = CodeCache::find_blob(entry);
-+    if (cb != NULL) {
-+      address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
-+      if (call == NULL) {
-+        ciEnv::current()->record_failure("CodeCache is full");
-+        return;
-+      }
-+    } else {
-+      Label retaddr;
-+      __ la(t1, retaddr);
-+      __ la(t0, RuntimeAddress(entry));
-+      // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
-+      __ addi(sp, sp, -2 * wordSize);
-+      __ sd(t1, Address(sp, wordSize));
-+      __ jalr(t0);
-+      __ bind(retaddr);
-+      __ addi(sp, sp, 2 * wordSize);
-+    }
-+  %}
++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
++  assert_cond(addr != NULL);
++  assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found");
++  return (NativeCallTrampolineStub*)addr;
++}
 +
-+  // using the cr register as the bool result: 0 for success; others failed.
-+  enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register flag = t1;
-+    Register oop = as_Register($object$$reg);
-+    Register box = as_Register($box$$reg);
-+    Register disp_hdr = as_Register($tmp$$reg);
-+    Register tmp = as_Register($tmp2$$reg);
-+    Label cont;
-+    Label object_has_monitor;
++class NativeMembar : public NativeInstruction {
++public:
++  uint32_t get_kind();
++  void set_kind(uint32_t order_kind);
++};
 +
-+    assert_different_registers(oop, box, tmp, disp_hdr, t0);
++inline NativeMembar *NativeMembar_at(address addr) {
++  assert_cond(addr != NULL);
++  assert(nativeInstruction_at(addr)->is_membar(), "no membar found");
++  return (NativeMembar*)addr;
++}
 +
-+    // Load markOop from object into displaced_header.
-+    __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
++class NativeFenceI : public NativeInstruction {
++public:
++  static inline int instruction_size() {
++    // 2 for fence.i + fence
++    return (UseConservativeFence ? 2 : 1) * NativeInstruction::instruction_size;
++  }
++};
 +
-+    // Always do locking in runtime.
-+    if (EmitSync & 0x01) {
-+      __ mv(flag, 1);
-+      return;
-+    }
++#endif // CPU_RISCV_NATIVEINST_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.cpp b/src/hotspot/cpu/riscv/registerMap_riscv.cpp
+new file mode 100644
+index 00000000000..26c1edc36ff
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/registerMap_riscv.cpp
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    if (UseBiasedLocking && !UseOptoBiasInlining) {
-+      // ignore slow case here
-+      __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag);
-+    }
++#include "precompiled.hpp"
++#include "runtime/registerMap.hpp"
++#include "vmreg_riscv.inline.hpp"
 +
-+    // Check for existing monitor
-+    if ((EmitSync & 0x02) == 0) {
-+      __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-+      __ bnez(t0, object_has_monitor);
++address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const {
++  if (base_reg->is_VectorRegister()) {
++    assert(base_reg->is_concrete(), "must pass base reg");
++    int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) /
++                       VectorRegisterImpl::max_slots_per_register;
++    intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size;
++    address base_location = location(base_reg);
++    if (base_location != NULL) {
++      return base_location + offset_in_bytes;
++    } else {
++      return NULL;
 +    }
++  } else {
++    return location(base_reg->next(slot_idx));
++  }
++}
+diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
+new file mode 100644
+index 00000000000..f34349811a9
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    // Set tmp to be (markOop of object | UNLOCK_VALUE).
-+    __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
-+
-+    // Initialize the box. (Must happen before we update the object mark!)
-+    __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+
-+    // Compare object markOop with an unlocked value (tmp) and if
-+    // equal exchange the stack address of our box with object markOop.
-+    // On failure disp_hdr contains the possibly locked markOop.
-+    __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
-+               Assembler::rl, /*result*/disp_hdr);
-+    __ mv(flag, zr);
-+    __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
-+
-+    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
-+
-+    // If the compare-and-exchange succeeded, then we found an unlocked
-+    // object, will have now locked it will continue at label cont
-+    // We did not see an unlocked object so try the fast recursive case.
-+
-+    // Check if the owner is self by comparing the value in the
-+    // markOop of object (disp_hdr) with the stack pointer.
-+    __ sub(disp_hdr, disp_hdr, sp);
-+    __ mv(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
-+    // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
-+    // hence we can store 0 as the displaced header in the box, which indicates that it is a
-+    // recursive lock.
-+    __ andr(tmp/*==0?*/, disp_hdr, tmp);
-+    __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+    __ mv(flag, tmp); // we can use the value of tmp as the result here
++#ifndef CPU_RISCV_REGISTERMAP_RISCV_HPP
++#define CPU_RISCV_REGISTERMAP_RISCV_HPP
 +
-+    if ((EmitSync & 0x02) == 0) {
-+      __ j(cont);
++// machine-dependent implemention for register maps
++  friend class frame;
 +
-+      // Handle existing monitor.
-+      __ bind(object_has_monitor);
-+      // The object's monitor m is unlocked iff m->owner == NULL,
-+      // otherwise m->owner may contain a thread or a stack address.
-+      //
-+      // Try to CAS m->owner from NULL to current thread.
-+      __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
-+      __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
-+               Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
++ private:
++  // This is the hook for finding a register in an "well-known" location,
++  // such as a register block of a predetermined format.
++  address pd_location(VMReg reg) const { return NULL; }
++  address pd_location(VMReg base_reg, int slot_idx) const;
 +
-+      // Store a non-null value into the box to avoid looking like a re-entrant
-+      // lock. The fast-path monitor unlock code checks for
-+      // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
-+      // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
-+      __ mv(tmp, (address)markOopDesc::unused_mark());
-+      __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+    }
++  // no PD state to clear or copy:
++  void pd_clear() {}
++  void pd_initialize() {}
++  void pd_initialize_from(const RegisterMap* map) {}
 +
-+    __ bind(cont);
-+  %}
++#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
+new file mode 100644
+index 00000000000..f8116e9df8c
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/register_riscv.cpp
+@@ -0,0 +1,73 @@
++/*
++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // using cr flag to indicate the fast_unlock result: 0 for success; others failed.
-+  enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register flag = t1;
-+    Register oop = as_Register($object$$reg);
-+    Register box = as_Register($box$$reg);
-+    Register disp_hdr = as_Register($tmp$$reg);
-+    Register tmp = as_Register($tmp2$$reg);
-+    Label cont;
-+    Label object_has_monitor;
++#include "precompiled.hpp"
++#include "register_riscv.hpp"
 +
-+    assert_different_registers(oop, box, tmp, disp_hdr, flag);
++REGISTER_IMPL_DEFINITION(Register, RegisterImpl, RegisterImpl::number_of_registers);
++REGISTER_IMPL_DEFINITION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers);
++REGISTER_IMPL_DEFINITION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers);
 +
-+    // Always do locking in runtime.
-+    if (EmitSync & 0x01) {
-+      __ mv(flag, 1);
-+      return;
-+    }
++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers *
++                                          RegisterImpl::max_slots_per_register;
 +
-+    if (UseBiasedLocking && !UseOptoBiasInlining) {
-+      __ biased_locking_exit(oop, tmp, cont, flag);
-+    }
++const int ConcreteRegisterImpl::max_fpr =
++    ConcreteRegisterImpl::max_gpr +
++    FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
 +
-+    // Find the lock address and load the displaced header from the stack.
-+    __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++const int ConcreteRegisterImpl::max_vpr =
++    ConcreteRegisterImpl::max_fpr +
++    VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register;
 +
-+    // If the displaced header is 0, we have a recursive unlock.
-+    __ mv(flag, disp_hdr);
-+    __ beqz(disp_hdr, cont);
 +
-+    // Handle existing monitor.
-+    if ((EmitSync & 0x02) == 0) {
-+      __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
-+      __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-+      __ bnez(t0, object_has_monitor);
-+    }
++const char* RegisterImpl::name() const {
++  static const char *const names[number_of_registers] = {
++    "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9",
++    "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
++    "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals",
++    "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
 +
-+    // Check if it is still a light weight lock, this is true if we
-+    // see the stack address of the basicLock in the markOop of the
-+    // object.
++const char* FloatRegisterImpl::name() const {
++  static const char *const names[number_of_registers] = {
++    "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
++    "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
++    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
++    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
 +
-+    __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
-+               Assembler::rl, /*result*/tmp);
-+    __ xorr(flag, box, tmp); // box == tmp if cas succeeds
-+    __ j(cont);
++const char* VectorRegisterImpl::name() const {
++  static const char *const names[number_of_registers] = {
++    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
++    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
++    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
++    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
+diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
+new file mode 100644
+index 00000000000..a9200cac647
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/register_riscv.hpp
+@@ -0,0 +1,324 @@
++/*
++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
++#ifndef CPU_RISCV_REGISTER_RISCV_HPP
++#define CPU_RISCV_REGISTER_RISCV_HPP
 +
-+    // Handle existing monitor.
-+    if ((EmitSync & 0x02) == 0) {
-+      __ bind(object_has_monitor);
-+      __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
-+      __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
-+      __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
-+      __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
-+      __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
-+      __ bnez(flag, cont);
++#include "asm/register.hpp"
 +
-+      __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
-+      __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
-+      __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
-+      __ bnez(flag, cont);
-+      // need a release store here
-+      __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
-+      __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+      __ sd(zr, Address(tmp)); // set unowned
-+    }
++#define CSR_FFLAGS   0x001        // Floating-Point Accrued Exceptions.
++#define CSR_FRM      0x002        // Floating-Point Dynamic Rounding Mode.
++#define CSR_FCSR     0x003        // Floating-Point Control and Status Register (frm + fflags).
++#define CSR_VSTART   0x008        // Vector start position
++#define CSR_VXSAT    0x009        // Fixed-Point Saturate Flag
++#define CSR_VXRM     0x00A        // Fixed-Point Rounding Mode
++#define CSR_VCSR     0x00F        // Vector control and status register
++#define CSR_VL       0xC20        // Vector length
++#define CSR_VTYPE    0xC21        // Vector data type register
++#define CSR_VLENB    0xC22        // VLEN/8 (vector register length in bytes)
++#define CSR_CYCLE    0xc00        // Cycle counter for RDCYCLE instruction.
++#define CSR_TIME     0xc01        // Timer for RDTIME instruction.
++#define CSR_INSTERT  0xc02        // Instructions-retired counter for RDINSTRET instruction.
 +
-+    __ bind(cont);
-+  %}
++class VMRegImpl;
++typedef VMRegImpl* VMReg;
 +
-+  // arithmetic encodings
++// Use Register as shortcut
++class RegisterImpl;
++typedef const RegisterImpl* Register;
 +
-+  enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    Register src1_reg = as_Register($src1$$reg);
-+    Register src2_reg = as_Register($src2$$reg);
-+    __ corrected_idivl(dst_reg, src1_reg, src2_reg, false);
-+  %}
++inline constexpr Register as_Register(int encoding);
 +
-+  enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    Register src1_reg = as_Register($src1$$reg);
-+    Register src2_reg = as_Register($src2$$reg);
-+    __ corrected_idivq(dst_reg, src1_reg, src2_reg, false);
-+  %}
++class RegisterImpl: public AbstractRegisterImpl {
++  static constexpr Register first();
 +
-+  enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    Register src1_reg = as_Register($src1$$reg);
-+    Register src2_reg = as_Register($src2$$reg);
-+    __ corrected_idivl(dst_reg, src1_reg, src2_reg, true);
-+  %}
++ public:
++  enum {
++    number_of_registers      = 32,
++    max_slots_per_register   = 2,
 +
-+  enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    Register src1_reg = as_Register($src1$$reg);
-+    Register src2_reg = as_Register($src2$$reg);
-+    __ corrected_idivq(dst_reg, src1_reg, src2_reg, true);
-+  %}
++    // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable
++    // for compressed instructions. See Table 17.2 in spec.
++    compressed_register_base = 8,
++    compressed_register_top  = 15,
++  };
 +
-+  enc_class riscv_enc_tail_call(iRegP jump_target) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register target_reg = as_Register($jump_target$$reg);
-+    __ jr(target_reg);
-+  %}
++  // derived registers, offsets, and addresses
++  const Register successor() const { return this + 1; }
 +
-+  enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register target_reg = as_Register($jump_target$$reg);
-+    // exception oop should be in x10
-+    // ret addr has been popped into ra
-+    // callee expects it in x13
-+    __ mv(x13, ra);
-+    __ jr(target_reg);
-+  %}
++  // construction
++  inline friend constexpr Register as_Register(int encoding);
 +
-+  enc_class riscv_enc_rethrow() %{
-+    MacroAssembler _masm(&cbuf);
-+    __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
-+  %}
++  VMReg as_VMReg() const;
 +
-+  enc_class riscv_enc_ret() %{
-+    MacroAssembler _masm(&cbuf);
-+    __ ret();
-+  %}
++  // accessors
++  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
++  int encoding_nocheck() const    { return this - first(); }
++  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
++  const char* name() const;
 +
-+%}
++  // for rvc
++  int compressed_encoding() const {
++    assert(is_compressed_valid(), "invalid compressed register");
++    return encoding() - compressed_register_base;
++  }
 +
-+//----------FRAME--------------------------------------------------------------
-+// Definition of frame structure and management information.
-+//
-+//  S T A C K   L A Y O U T    Allocators stack-slot number
-+//                             |   (to get allocators register number
-+//  G  Owned by    |        |  v    add OptoReg::stack0())
-+//  r   CALLER     |        |
-+//  o     |        +--------+      pad to even-align allocators stack-slot
-+//  w     V        |  pad0  |        numbers; owned by CALLER
-+//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
-+//  h     ^        |   in   |  5
-+//        |        |  args  |  4   Holes in incoming args owned by SELF
-+//  |     |        |        |  3
-+//  |     |        +--------+
-+//  V     |        | old out|      Empty on Intel, window on Sparc
-+//        |    old |preserve|      Must be even aligned.
-+//        |     SP-+--------+----> Matcher::_old_SP, even aligned
-+//        |        |   in   |  3   area for Intel ret address
-+//     Owned by    |preserve|      Empty on Sparc.
-+//       SELF      +--------+
-+//        |        |  pad2  |  2   pad to align old SP
-+//        |        +--------+  1
-+//        |        | locks  |  0
-+//        |        +--------+----> OptoReg::stack0(), even aligned
-+//        |        |  pad1  | 11   pad to align new SP
-+//        |        +--------+
-+//        |        |        | 10
-+//        |        | spills |  9   spills
-+//        V        |        |  8   (pad0 slot for callee)
-+//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
-+//        ^        |  out   |  7
-+//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
-+//     Owned by    +--------+
-+//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
-+//        |    new |preserve|      Must be even-aligned.
-+//        |     SP-+--------+----> Matcher::_new_SP, even aligned
-+//        |        |        |
-+//
-+// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
-+//         known from SELF's arguments and the Java calling convention.
-+//         Region 6-7 is determined per call site.
-+// Note 2: If the calling convention leaves holes in the incoming argument
-+//         area, those holes are owned by SELF.  Holes in the outgoing area
-+//         are owned by the CALLEE.  Holes should not be nessecary in the
-+//         incoming area, as the Java calling convention is completely under
-+//         the control of the AD file.  Doubles can be sorted and packed to
-+//         avoid holes.  Holes in the outgoing arguments may be nessecary for
-+//         varargs C calling conventions.
-+// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
-+//         even aligned with pad0 as needed.
-+//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
-+//           (the latter is true on Intel but is it false on RISCV?)
-+//         region 6-11 is even aligned; it may be padded out more so that
-+//         the region from SP to FP meets the minimum stack alignment.
-+// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
-+//         alignment.  Region 11, pad1, may be dynamically extended so that
-+//         SP meets the minimum alignment.
++  int compressed_encoding_nocheck() const {
++    return encoding_nocheck() - compressed_register_base;
++  }
 +
-+frame %{
-+  // What direction does stack grow in (assumed to be same for C & Java)
-+  stack_direction(TOWARDS_LOW);
++  bool is_compressed_valid() const {
++    return encoding_nocheck() >= compressed_register_base &&
++           encoding_nocheck() <= compressed_register_top;
++  }
++};
 +
-+  // These three registers define part of the calling convention
-+  // between compiled code and the interpreter.
++REGISTER_IMPL_DECLARATION(Register, RegisterImpl, RegisterImpl::number_of_registers);
 +
-+  // Inline Cache Register or methodOop for I2C.
-+  inline_cache_reg(R31);
++// The integer registers of the RISCV architecture
 +
-+  // Method Oop Register when calling interpreter.
-+  interpreter_method_oop_reg(R31);
++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
 +
-+  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
-+  cisc_spilling_operand_name(indOffset);
++CONSTANT_REGISTER_DECLARATION(Register, x0,    (0));
++CONSTANT_REGISTER_DECLARATION(Register, x1,    (1));
++CONSTANT_REGISTER_DECLARATION(Register, x2,    (2));
++CONSTANT_REGISTER_DECLARATION(Register, x3,    (3));
++CONSTANT_REGISTER_DECLARATION(Register, x4,    (4));
++CONSTANT_REGISTER_DECLARATION(Register, x5,    (5));
++CONSTANT_REGISTER_DECLARATION(Register, x6,    (6));
++CONSTANT_REGISTER_DECLARATION(Register, x7,    (7));
++CONSTANT_REGISTER_DECLARATION(Register, x8,    (8));
++CONSTANT_REGISTER_DECLARATION(Register, x9,    (9));
++CONSTANT_REGISTER_DECLARATION(Register, x10,  (10));
++CONSTANT_REGISTER_DECLARATION(Register, x11,  (11));
++CONSTANT_REGISTER_DECLARATION(Register, x12,  (12));
++CONSTANT_REGISTER_DECLARATION(Register, x13,  (13));
++CONSTANT_REGISTER_DECLARATION(Register, x14,  (14));
++CONSTANT_REGISTER_DECLARATION(Register, x15,  (15));
++CONSTANT_REGISTER_DECLARATION(Register, x16,  (16));
++CONSTANT_REGISTER_DECLARATION(Register, x17,  (17));
++CONSTANT_REGISTER_DECLARATION(Register, x18,  (18));
++CONSTANT_REGISTER_DECLARATION(Register, x19,  (19));
++CONSTANT_REGISTER_DECLARATION(Register, x20,  (20));
++CONSTANT_REGISTER_DECLARATION(Register, x21,  (21));
++CONSTANT_REGISTER_DECLARATION(Register, x22,  (22));
++CONSTANT_REGISTER_DECLARATION(Register, x23,  (23));
++CONSTANT_REGISTER_DECLARATION(Register, x24,  (24));
++CONSTANT_REGISTER_DECLARATION(Register, x25,  (25));
++CONSTANT_REGISTER_DECLARATION(Register, x26,  (26));
++CONSTANT_REGISTER_DECLARATION(Register, x27,  (27));
++CONSTANT_REGISTER_DECLARATION(Register, x28,  (28));
++CONSTANT_REGISTER_DECLARATION(Register, x29,  (29));
++CONSTANT_REGISTER_DECLARATION(Register, x30,  (30));
++CONSTANT_REGISTER_DECLARATION(Register, x31,  (31));
 +
-+  // Number of stack slots consumed by locking an object
-+  // generate Compile::sync_stack_slots
-+  // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2
-+  sync_stack_slots(1 * VMRegImpl::slots_per_word);
++// Use FloatRegister as shortcut
++class FloatRegisterImpl;
++typedef const FloatRegisterImpl* FloatRegister;
 +
-+  // Compiled code's Frame Pointer
-+  frame_pointer(R2);
++inline constexpr FloatRegister as_FloatRegister(int encoding);
 +
-+  // Interpreter stores its frame pointer in a register which is
-+  // stored to the stack by I2CAdaptors.
-+  // I2CAdaptors convert from interpreted java to compiled java.
-+  interpreter_frame_pointer(R8);
++// The implementation of floating point registers for the architecture
++class FloatRegisterImpl: public AbstractRegisterImpl {
++  static constexpr FloatRegister first();
 +
-+  // Stack alignment requirement
-+  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
++ public:
++  enum {
++    number_of_registers     = 32,
++    max_slots_per_register  = 2,
 +
-+  // Number of stack slots between incoming argument block and the start of
-+  // a new frame.  The PROLOG must add this many slots to the stack.  The
-+  // EPILOG must remove this many slots.
-+  // RISCV needs two words for RA (return address) and FP (frame pointer).
-+  in_preserve_stack_slots(2 * VMRegImpl::slots_per_word);
++    // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec.
++    compressed_register_base = 8,
++    compressed_register_top  = 15,
++  };
 +
-+  // Number of outgoing stack slots killed above the out_preserve_stack_slots
-+  // for calls to C.  Supports the var-args backing area for register parms.
-+  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt);
++  // construction
++  inline friend constexpr FloatRegister as_FloatRegister(int encoding);
 +
-+  // The after-PROLOG location of the return address.  Location of
-+  // return address specifies a type (REG or STACK) and a number
-+  // representing the register number (i.e. - use a register name) or
-+  // stack slot.
-+  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
-+  // Otherwise, it is above the locks and verification slot and alignment word
-+  // TODO this may well be correct but need to check why that - 2 is there
-+  // ppc port uses 0 but we definitely need to allow for fixed_slots
-+  // which folds in the space used for monitors
-+  return_addr(STACK - 2 +
-+              align_up((Compile::current()->in_preserve_stack_slots() +
-+                        Compile::current()->fixed_slots()),
-+                       stack_alignment_in_slots()));
++  VMReg as_VMReg() const;
 +
-+  // Body of function which returns an integer array locating
-+  // arguments either in registers or in stack slots.  Passed an array
-+  // of ideal registers called "sig" and a "length" count.  Stack-slot
-+  // offsets are based on outgoing arguments, i.e. a CALLER setting up
-+  // arguments for a CALLEE.  Incoming stack arguments are
-+  // automatically biased by the preserve_stack_slots field above.
++  // derived registers, offsets, and addresses
++  FloatRegister successor() const {
++    return as_FloatRegister((encoding() + 1) % (unsigned)number_of_registers);
++  }
 +
-+  calling_convention
-+  %{
-+    // No difference between ingoing/outgoing just pass false
-+    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
-+  %}
++  // accessors
++  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
++  int encoding_nocheck() const    { return this - first(); }
++  int is_valid() const            { return (unsigned)encoding_nocheck() < number_of_registers; }
++  const char* name() const;
 +
-+  c_calling_convention
-+  %{
-+    // This is obviously always outgoing
-+    (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
-+  %}
++  // for rvc
++  int compressed_encoding() const {
++    assert(is_compressed_valid(), "invalid compressed register");
++    return encoding() - compressed_register_base;
++  }
 +
-+  // Location of compiled Java return values.  Same as C for now.
-+  return_value
-+  %{
-+    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
-+           "only return normal values");
++  int compressed_encoding_nocheck() const {
++    return encoding_nocheck() - compressed_register_base;
++  }
 +
-+    static const int lo[Op_RegL + 1] = { // enum name
-+      0,                                 // Op_Node
-+      0,                                 // Op_Set
-+      R10_num,                           // Op_RegN
-+      R10_num,                           // Op_RegI
-+      R10_num,                           // Op_RegP
-+      F10_num,                           // Op_RegF
-+      F10_num,                           // Op_RegD
-+      R10_num                            // Op_RegL
-+    };
++  bool is_compressed_valid() const {
++    return encoding_nocheck() >= compressed_register_base &&
++           encoding_nocheck() <= compressed_register_top;
++  }
++};
 +
-+    static const int hi[Op_RegL + 1] = { // enum name
-+      0,                                 // Op_Node
-+      0,                                 // Op_Set
-+      OptoReg::Bad,                      // Op_RegN
-+      OptoReg::Bad,                      // Op_RegI
-+      R10_H_num,                         // Op_RegP
-+      OptoReg::Bad,                      // Op_RegF
-+      F10_H_num,                         // Op_RegD
-+      R10_H_num                          // Op_RegL
-+    };
++REGISTER_IMPL_DECLARATION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers);
 +
-+    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
-+  %}
-+%}
++// The float registers of the RISCV architecture
 +
-+//----------ATTRIBUTES---------------------------------------------------------
-+//----------Operand Attributes-------------------------------------------------
-+op_attrib op_cost(1);        // Required cost attribute
++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
 +
-+//----------Instruction Attributes---------------------------------------------
-+ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
-+ins_attrib ins_size(32);        // Required size attribute (in bits)
-+ins_attrib ins_short_branch(0); // Required flag: is this instruction
-+                                // a non-matching short branch variant
-+                                // of some long branch?
-+ins_attrib ins_alignment(4);    // Required alignment attribute (must
-+                                // be a power of 2) specifies the
-+                                // alignment that some part of the
-+                                // instruction (not necessarily the
-+                                // start) requires.  If > 1, a
-+                                // compute_padding() function must be
-+                                // provided for the instruction
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
 +
-+//----------OPERANDS-----------------------------------------------------------
-+// Operand definitions must precede instruction definitions for correct parsing
-+// in the ADLC because operands constitute user defined types which are used in
-+// instruction definitions.
++// Use VectorRegister as shortcut
++class VectorRegisterImpl;
++typedef const VectorRegisterImpl* VectorRegister;
 +
-+//----------Simple Operands----------------------------------------------------
++inline constexpr VectorRegister as_VectorRegister(int encoding);
 +
-+// Integer operands 32 bit
-+// 32 bit immediate
-+operand immI()
-+%{
-+  match(ConI);
++// The implementation of vector registers for RVV
++class VectorRegisterImpl: public AbstractRegisterImpl {
++  static constexpr VectorRegister first();
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++ public:
++  enum {
++    number_of_registers    = 32,
++    max_slots_per_register = 4
++  };
 +
-+// 32 bit zero
-+operand immI0()
-+%{
-+  predicate(n->get_int() == 0);
-+  match(ConI);
++  // construction
++  inline friend constexpr VectorRegister as_VectorRegister(int encoding);
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++  VMReg as_VMReg() const;
 +
-+// 32 bit unit increment
-+operand immI_1()
-+%{
-+  predicate(n->get_int() == 1);
-+  match(ConI);
++  // derived registers, offsets, and addresses
++  VectorRegister successor() const { return this + 1; }
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++  // accessors
++  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
++  int encoding_nocheck() const    { return this - first(); }
++  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
++  const char* name() const;
 +
-+// 32 bit unit decrement
-+operand immI_M1()
-+%{
-+  predicate(n->get_int() == -1);
-+  match(ConI);
++};
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++REGISTER_IMPL_DECLARATION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers);
 +
-+// Unsigned Integer Immediate:  6-bit int, greater than 32
-+operand uimmI6_ge32() %{
-+  predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32));
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++// The vector registers of RVV
++CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1));
 +
-+operand immI_le_4()
-+%{
-+  predicate(n->get_int() <= 4);
-+  match(ConI);
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v10    , (10));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v11    , (11));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v12    , (12));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v13    , (13));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v14    , (14));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v15    , (15));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v16    , (16));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v17    , (17));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v18    , (18));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v19    , (19));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v20    , (20));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v21    , (21));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v22    , (22));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v23    , (23));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v24    , (24));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v25    , (25));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v26    , (26));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v27    , (27));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v28    , (28));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v29    , (29));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v30    , (30));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v31    , (31));
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
 +
-+operand immI_16()
-+%{
-+  predicate(n->get_int() == 16);
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++// Need to know the total number of registers of all sorts for SharedInfo.
++// Define a class that exports it.
++class ConcreteRegisterImpl : public AbstractRegisterImpl {
++ public:
++  enum {
++  // A big enough number for C2: all the registers plus flags
++  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
++  // There is no requirement that any ordering here matches any ordering c2 gives
++  // it's optoregs.
 +
-+operand immI_24()
-+%{
-+  predicate(n->get_int() == 24);
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++    number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
++                           FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
++                           VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers)
++  };
 +
-+operand immI_31()
-+%{
-+  predicate(n->get_int() == 31);
-+  match(ConI);
++  // added to make it compile
++  static const int max_gpr;
++  static const int max_fpr;
++  static const int max_vpr;
++};
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++typedef AbstractRegSet<Register> RegSet;
++typedef AbstractRegSet<FloatRegister> FloatRegSet;
++typedef AbstractRegSet<VectorRegister> VectorRegSet;
 +
-+operand immI_32()
-+%{
-+  predicate(n->get_int() == 32);
-+  match(ConI);
++#endif // CPU_RISCV_REGISTER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
+new file mode 100644
+index 00000000000..228a64eae2c
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
+@@ -0,0 +1,113 @@
++/*
++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/relocInfo.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/safepoint.hpp"
 +
-+operand immI_63()
-+%{
-+  predicate(n->get_int() == 63);
-+  match(ConI);
++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
++  if (verify_only) {
++    return;
++  }
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++  int bytes;
 +
-+operand immI_64()
-+%{
-+  predicate(n->get_int() == 64);
-+  match(ConI);
++  switch (type()) {
++    case relocInfo::oop_type: {
++      oop_Relocation *reloc = (oop_Relocation *)this;
++      // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate
++      if (NativeInstruction::is_load_pc_relative_at(addr())) {
++        address constptr = (address)code()->oop_addr_at(reloc->oop_index());
++        bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr);
++        assert(*(address*)constptr == x, "error in oop relocation");
++      } else {
++        bytes = MacroAssembler::patch_oop(addr(), x);
++      }
++      break;
++    }
++    default:
++      bytes = MacroAssembler::pd_patch_instruction_size(addr(), x);
++      break;
++  }
++  ICache::invalidate_range(addr(), bytes);
++}
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++address Relocation::pd_call_destination(address orig_addr) {
++  assert(is_call(), "should be an address instruction here");
++  if (NativeCall::is_call_at(addr())) {
++    address trampoline = nativeCall_at(addr())->get_trampoline();
++    if (trampoline != NULL) {
++      return nativeCallTrampolineStub_at(trampoline)->destination();
++    }
++  }
++  if (orig_addr != NULL) {
++    // the extracted address from the instructions in address orig_addr
++    address new_addr = MacroAssembler::pd_call_destination(orig_addr);
++    // If call is branch to self, don't try to relocate it, just leave it
++    // as branch to self. This happens during code generation if the code
++    // buffer expands. It will be relocated to the trampoline above once
++    // code generation is complete.
++    new_addr = (new_addr == orig_addr) ? addr() : new_addr;
++    return new_addr;
++  }
++  return MacroAssembler::pd_call_destination(addr());
++}
 +
-+// 32 bit integer valid for add immediate
-+operand immIAdd()
-+%{
-+  predicate(Assembler::operand_valid_for_add_immediate((long)n->get_int()));
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++void Relocation::pd_set_call_destination(address x) {
++  assert(is_call(), "should be an address instruction here");
++  if (NativeCall::is_call_at(addr())) {
++    address trampoline = nativeCall_at(addr())->get_trampoline();
++    if (trampoline != NULL) {
++      nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false);
++      return;
++    }
++  }
++  MacroAssembler::pd_patch_instruction_size(addr(), x);
++  address pd_call = pd_call_destination(addr());
++  assert(pd_call == x, "fail in reloc");
++}
 +
-+// 32 bit integer valid for sub immediate
-+operand immISub()
-+%{
-+  predicate(Assembler::operand_valid_for_add_immediate(-(long)n->get_int()));
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++address* Relocation::pd_address_in_code() {
++  assert(NativeCall::is_load_pc_relative_at(addr()), "Not the expected instruction sequence!");
++  return (address*)(MacroAssembler::target_addr_for_insn(addr()));
++}
 +
-+// 5 bit signed value.
-+operand immI5()
-+%{
-+  predicate(n->get_int() <= 15 && n->get_int() >= -16);
-+  match(ConI);
++address Relocation::pd_get_address_from_code() {
++  return MacroAssembler::pd_call_destination(addr());
++}
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++  if (NativeInstruction::maybe_cpool_ref(addr())) {
++    address old_addr = old_addr_for(addr(), src, dest);
++    MacroAssembler::pd_patch_instruction_size(addr(), MacroAssembler::target_addr_for_insn(old_addr));
++  }
++}
 +
-+// 5 bit signed value (simm5)
-+operand immL5()
-+%{
-+  predicate(n->get_long() <= 15 && n->get_long() >= -16);
-+  match(ConL);
++void metadata_Relocation::pd_fix_value(address x) {
++}
+diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
+new file mode 100644
+index 00000000000..840ed935d88
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++#ifndef CPU_RISCV_RELOCINFO_RISCV_HPP
++#define CPU_RISCV_RELOCINFO_RISCV_HPP
 +
-+// Integer operands 64 bit
-+// 64 bit immediate
-+operand immL()
-+%{
-+  match(ConL);
++  // machine-dependent parts of class relocInfo
++ private:
++  enum {
++    // Relocations are byte-aligned.
++    offset_unit        =  1,
++    // Must be at least 1 for RelocInfo::narrow_oop_in_const.
++    format_width       =  1
++  };
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++ public:
 +
-+// 64 bit zero
-+operand immL0()
-+%{
-+  predicate(n->get_long() == 0);
-+  match(ConL);
++  // This platform has no oops in the code that are not also
++  // listed in the oop section.
++  static bool mustIterateImmediateOopsInCode() { return false; }
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++#endif // CPU_RISCV_RELOCINFO_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+new file mode 100644
+index 00000000000..588887e1d96
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -0,0 +1,10611 @@
++//
++// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
 +
-+// Pointer operands
-+// Pointer Immediate
-+operand immP()
-+%{
-+  match(ConP);
++// RISCV Architecture Description File
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++//----------REGISTER DEFINITION BLOCK------------------------------------------
++// This information is used by the matcher and the register allocator to
++// describe individual registers and classes of registers within the target
++// archtecture.
 +
-+// NULL Pointer Immediate
-+operand immP0()
-+%{
-+  predicate(n->get_ptr() == 0);
-+  match(ConP);
++register %{
++//----------Architecture Description Register Definitions----------------------
++// General Registers
++// "reg_def"  name ( register save type, C convention save type,
++//                   ideal register type, encoding );
++// Register Save Types:
++//
++// NS  = No-Save:       The register allocator assumes that these registers
++//                      can be used without saving upon entry to the method, &
++//                      that they do not need to be saved at call sites.
++//
++// SOC = Save-On-Call:  The register allocator assumes that these registers
++//                      can be used without saving upon entry to the method,
++//                      but that they must be saved at call sites.
++//
++// SOE = Save-On-Entry: The register allocator assumes that these registers
++//                      must be saved before using them upon entry to the
++//                      method, but they do not need to be saved at call
++//                      sites.
++//
++// AS  = Always-Save:   The register allocator assumes that these registers
++//                      must be saved before using them upon entry to the
++//                      method, & that they must be saved at call sites.
++//
++// Ideal Register Type is used to determine how to save & restore a
++// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
++// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
++//
++// The encoding number is the actual bit-pattern placed into the opcodes.
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++// We must define the 64 bit int registers in two 32 bit halves, the
++// real lower register and a virtual upper half register. upper halves
++// are used by the register allocator but are not actually supplied as
++// operands to memory ops.
++//
++// follow the C1 compiler in making registers
++//
++//   x7, x9-x17, x27-x31 volatile (caller save)
++//   x0-x4, x8, x23 system (no save, no allocate)
++//   x5-x6 non-allocatable (so we can use them as temporary regs)
 +
-+// Pointer Immediate One
-+// this is used in object initialization (initial object header)
-+operand immP_1()
-+%{
-+  predicate(n->get_ptr() == 1);
-+  match(ConP);
++//
++// as regards Java usage. we don't use any callee save registers
++// because this makes it difficult to de-optimise a frame (see comment
++// in x86 implementation of Deoptimization::unwind_callee_save_values)
++//
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++// General Registers
 +
-+// Polling Page Pointer Immediate
-+operand immPollPage()
-+%{
-+  predicate((address)n->get_ptr() == os::get_polling_page());
-+  match(ConP);
++reg_def R0      ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()         ); // zr
++reg_def R0_H    ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()->next() );
++reg_def R1      ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()         ); // ra
++reg_def R1_H    ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()->next() );
++reg_def R2      ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()         ); // sp
++reg_def R2_H    ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()->next() );
++reg_def R3      ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()         ); // gp
++reg_def R3_H    ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()->next() );
++reg_def R4      ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()         ); // tp
++reg_def R4_H    ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()->next() );
++reg_def R7      ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()         );
++reg_def R7_H    ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()->next() );
++reg_def R8      ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()         ); // fp
++reg_def R8_H    ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()->next() );
++reg_def R9      ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()         );
++reg_def R9_H    ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()->next() );
++reg_def R10     ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()        );
++reg_def R10_H   ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next());
++reg_def R11     ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()        );
++reg_def R11_H   ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next());
++reg_def R12     ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()        );
++reg_def R12_H   ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next());
++reg_def R13     ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()        );
++reg_def R13_H   ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next());
++reg_def R14     ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()        );
++reg_def R14_H   ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next());
++reg_def R15     ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()        );
++reg_def R15_H   ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next());
++reg_def R16     ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()        );
++reg_def R16_H   ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next());
++reg_def R17     ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()        );
++reg_def R17_H   ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next());
++reg_def R18     ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()        );
++reg_def R18_H   ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next());
++reg_def R19     ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()        );
++reg_def R19_H   ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next());
++reg_def R20     ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()        ); // caller esp
++reg_def R20_H   ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next());
++reg_def R21     ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()        );
++reg_def R21_H   ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next());
++reg_def R22     ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()        );
++reg_def R22_H   ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next());
++reg_def R23     ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()        ); // java thread
++reg_def R23_H   ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()->next());
++reg_def R24     ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()        );
++reg_def R24_H   ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next());
++reg_def R25     ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()        );
++reg_def R25_H   ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next());
++reg_def R26     ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()        );
++reg_def R26_H   ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next());
++reg_def R27     ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()        ); // heapbase
++reg_def R27_H   ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next());
++reg_def R28     ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()        );
++reg_def R28_H   ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next());
++reg_def R29     ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()        );
++reg_def R29_H   ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next());
++reg_def R30     ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()        );
++reg_def R30_H   ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next());
++reg_def R31     ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()        );
++reg_def R31_H   ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next());
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++// ----------------------------
++// Float/Double Registers
++// ----------------------------
 +
-+// Card Table Byte Map Base
-+operand immByteMapBase()
-+%{
-+  // Get base of card map
-+  predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
-+            (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
-+  match(ConP);
++// Double Registers
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++// The rules of ADL require that double registers be defined in pairs.
++// Each pair must be two 32-bit values, but not necessarily a pair of
++// single float registers. In each pair, ADLC-assigned register numbers
++// must be adjacent, with the lower number even. Finally, when the
++// CPU stores such a register pair to memory, the word associated with
++// the lower ADLC-assigned number must be stored to the lower address.
 +
-+// Int Immediate: low 16-bit mask
-+operand immI_16bits()
-+%{
-+  predicate(n->get_int() == 0xFFFF);
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++// RISCV has 32 floating-point registers. Each can store a single
++// or double precision floating-point value.
 +
-+// Long Immediate: low 32-bit mask
-+operand immL_32bits()
-+%{
-+  predicate(n->get_long() == 0xFFFFFFFFL);
-+  match(ConL);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++// for Java use float registers f0-f31 are always save on call whereas
++// the platform ABI treats f8-f9 and f18-f27 as callee save). Other
++// float registers are SOC as per the platform spec
 +
-+// 64 bit unit decrement
-+operand immL_M1()
-+%{
-+  predicate(n->get_long() == -1);
-+  match(ConL);
++reg_def F0    ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()          );
++reg_def F0_H  ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()->next()  );
++reg_def F1    ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()          );
++reg_def F1_H  ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()->next()  );
++reg_def F2    ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()          );
++reg_def F2_H  ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()->next()  );
++reg_def F3    ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()          );
++reg_def F3_H  ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()->next()  );
++reg_def F4    ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()          );
++reg_def F4_H  ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()->next()  );
++reg_def F5    ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()          );
++reg_def F5_H  ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()->next()  );
++reg_def F6    ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()          );
++reg_def F6_H  ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()->next()  );
++reg_def F7    ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()          );
++reg_def F7_H  ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()->next()  );
++reg_def F8    ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()          );
++reg_def F8_H  ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()->next()  );
++reg_def F9    ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()          );
++reg_def F9_H  ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()->next()  );
++reg_def F10   ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()         );
++reg_def F10_H ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()->next() );
++reg_def F11   ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()         );
++reg_def F11_H ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()->next() );
++reg_def F12   ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()         );
++reg_def F12_H ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()->next() );
++reg_def F13   ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()         );
++reg_def F13_H ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()->next() );
++reg_def F14   ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()         );
++reg_def F14_H ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()->next() );
++reg_def F15   ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()         );
++reg_def F15_H ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()->next() );
++reg_def F16   ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()         );
++reg_def F16_H ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()->next() );
++reg_def F17   ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()         );
++reg_def F17_H ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()->next() );
++reg_def F18   ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()         );
++reg_def F18_H ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()->next() );
++reg_def F19   ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()         );
++reg_def F19_H ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()->next() );
++reg_def F20   ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()         );
++reg_def F20_H ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()->next() );
++reg_def F21   ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()         );
++reg_def F21_H ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()->next() );
++reg_def F22   ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()         );
++reg_def F22_H ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()->next() );
++reg_def F23   ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()         );
++reg_def F23_H ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()->next() );
++reg_def F24   ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()         );
++reg_def F24_H ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()->next() );
++reg_def F25   ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()         );
++reg_def F25_H ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()->next() );
++reg_def F26   ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()         );
++reg_def F26_H ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()->next() );
++reg_def F27   ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()         );
++reg_def F27_H ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()->next() );
++reg_def F28   ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()         );
++reg_def F28_H ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()->next() );
++reg_def F29   ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()         );
++reg_def F29_H ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()->next() );
++reg_def F30   ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()         );
++reg_def F30_H ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()->next() );
++reg_def F31   ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()         );
++reg_def F31_H ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()->next() );
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++// ----------------------------
++// Vector Registers
++// ----------------------------
 +
++// For RVV vector registers, we simply extend vector register size to 4
++// 'logical' slots. This is nominally 128 bits but it actually covers
++// all possible 'physical' RVV vector register lengths from 128 ~ 1024
++// bits. The 'physical' RVV vector register length is detected during
++// startup, so the register allocator is able to identify the correct
++// number of bytes needed for an RVV spill/unspill.
 +
-+// 32 bit offset of pc in thread anchor
++reg_def V0    ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()           );
++reg_def V0_H  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next()   );
++reg_def V0_J  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(2)  );
++reg_def V0_K  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(3)  );
 +
-+operand immL_pc_off()
-+%{
-+  predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
-+                             in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
-+  match(ConL);
++reg_def V1    ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg() 	        );
++reg_def V1_H  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next()   );
++reg_def V1_J  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(2)  );
++reg_def V1_K  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(3)  );
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++reg_def V2    ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()           );
++reg_def V2_H  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next()   );
++reg_def V2_J  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(2)  );
++reg_def V2_K  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(3)  );
 +
-+// 64 bit integer valid for add immediate
-+operand immLAdd()
-+%{
-+  predicate(Assembler::operand_valid_for_add_immediate(n->get_long()));
-+  match(ConL);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++reg_def V3    ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()           );
++reg_def V3_H  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next()   );
++reg_def V3_J  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(2)  );
++reg_def V3_K  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(3)  );
 +
-+// 64 bit integer valid for sub immediate
-+operand immLSub()
-+%{
-+  predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long())));
-+  match(ConL);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++reg_def V4    ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()           );
++reg_def V4_H  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next()   );
++reg_def V4_J  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(2)  );
++reg_def V4_K  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(3)  );
 +
-+// Narrow pointer operands
-+// Narrow Pointer Immediate
-+operand immN()
-+%{
-+  match(ConN);
++reg_def V5    ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg() 	        );
++reg_def V5_H  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next()   );
++reg_def V5_J  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(2)  );
++reg_def V5_K  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(3)  );
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++reg_def V6    ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()           );
++reg_def V6_H  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next()   );
++reg_def V6_J  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(2)  );
++reg_def V6_K  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(3)  );
 +
-+// Narrow NULL Pointer Immediate
-+operand immN0()
-+%{
-+  predicate(n->get_narrowcon() == 0);
-+  match(ConN);
++reg_def V7    ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg() 	        );
++reg_def V7_H  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next()   );
++reg_def V7_J  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(2)  );
++reg_def V7_K  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(3)  );
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++reg_def V8    ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()           );
++reg_def V8_H  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next()   );
++reg_def V8_J  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(2)  );
++reg_def V8_K  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(3)  );
 +
-+operand immNKlass()
-+%{
-+  match(ConNKlass);
++reg_def V9    ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()           );
++reg_def V9_H  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next()   );
++reg_def V9_J  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(2)  );
++reg_def V9_K  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(3)  );
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++reg_def V10   ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()          );
++reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next()  );
++reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) );
++reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) );
 +
-+// Float and Double operands
-+// Double Immediate
-+operand immD()
-+%{
-+  match(ConD);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++reg_def V11   ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()          );
++reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next()  );
++reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) );
++reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) );
 +
-+// Double Immediate: +0.0d
-+operand immD0()
-+%{
-+  predicate(jlong_cast(n->getd()) == 0);
-+  match(ConD);
++reg_def V12   ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()          );
++reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next()  );
++reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) );
++reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) );
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++reg_def V13   ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()          );
++reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next()  );
++reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) );
++reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) );
 +
-+// Float Immediate
-+operand immF()
-+%{
-+  match(ConF);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++reg_def V14   ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()          );
++reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next()  );
++reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) );
++reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) );
 +
-+// Float Immediate: +0.0f.
-+operand immF0()
-+%{
-+  predicate(jint_cast(n->getf()) == 0);
-+  match(ConF);
++reg_def V15   ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()          );
++reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next()  );
++reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) );
++reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) );
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++reg_def V16   ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()          );
++reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next()  );
++reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) );
++reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) );
 +
-+operand immIOffset()
-+%{
-+  predicate(is_imm_in_range(n->get_int(), 12, 0));
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++reg_def V17   ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()          );
++reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next()  );
++reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) );
++reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) );
 +
-+operand immLOffset()
-+%{
-+  predicate(is_imm_in_range(n->get_long(), 12, 0));
-+  match(ConL);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++reg_def V18   ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()          );
++reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next()  );
++reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) );
++reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) );
 +
-+// Scale values
-+operand immIScale()
-+%{
-+  predicate(1 <= n->get_int() && (n->get_int() <= 3));
-+  match(ConI);
++reg_def V19   ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()          );
++reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next()  );
++reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) );
++reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) );
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
++reg_def V20   ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()          );
++reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next()  );
++reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) );
++reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) );
 +
-+// Integer 32 bit Register Operands
-+operand iRegI()
-+%{
-+  constraint(ALLOC_IN_RC(any_reg32));
-+  match(RegI);
-+  match(iRegINoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++reg_def V21   ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()          );
++reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next()  );
++reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) );
++reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) );
 +
-+// Integer 32 bit Register not Special
-+operand iRegINoSp()
-+%{
-+  constraint(ALLOC_IN_RC(no_special_reg32));
-+  match(RegI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++reg_def V22   ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()          );
++reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next()  );
++reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) );
++reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) );
 +
-+// Register R10 only
-+operand iRegI_R10()
-+%{
-+  constraint(ALLOC_IN_RC(int_r10_reg));
-+  match(RegI);
-+  match(iRegINoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++reg_def V23   ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()          );
++reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next()  );
++reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) );
++reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) );
 +
-+// Register R12 only
-+operand iRegI_R12()
-+%{
-+  constraint(ALLOC_IN_RC(int_r12_reg));
-+  match(RegI);
-+  match(iRegINoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++reg_def V24   ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()          );
++reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next()  );
++reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) );
++reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) );
 +
-+// Register R13 only
-+operand iRegI_R13()
-+%{
-+  constraint(ALLOC_IN_RC(int_r13_reg));
-+  match(RegI);
-+  match(iRegINoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++reg_def V25   ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()          );
++reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next()  );
++reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) );
++reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) );
 +
-+// Register R14 only
-+operand iRegI_R14()
-+%{
-+  constraint(ALLOC_IN_RC(int_r14_reg));
-+  match(RegI);
-+  match(iRegINoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++reg_def V26   ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()          );
++reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next()  );
++reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) );
++reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) );
 +
-+// Integer 64 bit Register Operands
-+operand iRegL()
-+%{
-+  constraint(ALLOC_IN_RC(any_reg));
-+  match(RegL);
-+  match(iRegLNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++reg_def V27   ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()          );
++reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next()  );
++reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) );
++reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) );
 +
-+// Integer 64 bit Register not Special
-+operand iRegLNoSp()
-+%{
-+  constraint(ALLOC_IN_RC(no_special_reg));
-+  match(RegL);
-+  match(iRegL_R10);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++reg_def V28   ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()          );
++reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next()  );
++reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) );
++reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) );
 +
-+// Long 64 bit Register R28 only
-+operand iRegL_R28()
-+%{
-+  constraint(ALLOC_IN_RC(r28_reg));
-+  match(RegL);
-+  match(iRegLNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++reg_def V29   ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()          );
++reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next()  );
++reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) );
++reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) );
 +
-+// Long 64 bit Register R29 only
-+operand iRegL_R29()
-+%{
-+  constraint(ALLOC_IN_RC(r29_reg));
-+  match(RegL);
-+  match(iRegLNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++reg_def V30   ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()          );
++reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next()  );
++reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) );
++reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) );
 +
-+// Long 64 bit Register R30 only
-+operand iRegL_R30()
-+%{
-+  constraint(ALLOC_IN_RC(r30_reg));
-+  match(RegL);
-+  match(iRegLNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++reg_def V31   ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()          );
++reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next()  );
++reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) );
++reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) );
 +
-+// Pointer Register Operands
-+// Pointer Register
-+operand iRegP()
-+%{
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(RegP);
-+  match(iRegPNoSp);
-+  match(iRegP_R10);
-+  match(javaThread_RegP);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++// ----------------------------
++// Special Registers
++// ----------------------------
 +
-+// Pointer 64 bit Register not Special
-+operand iRegPNoSp()
-+%{
-+  constraint(ALLOC_IN_RC(no_special_ptr_reg));
-+  match(RegP);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++// On riscv, the physical flag register is missing, so we use t1 instead,
++// to bridge the RegFlag semantics in share/opto
 +
-+operand iRegP_R10()
-+%{
-+  constraint(ALLOC_IN_RC(r10_reg));
-+  match(RegP);
-+  // match(iRegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++reg_def RFLAGS   (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg()        );
 +
-+// Pointer 64 bit Register R11 only
-+operand iRegP_R11()
-+%{
-+  constraint(ALLOC_IN_RC(r11_reg));
-+  match(RegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++// Specify priority of register selection within phases of register
++// allocation.  Highest priority is first.  A useful heuristic is to
++// give registers a low priority when they are required by machine
++// instructions, like EAX and EDX on I486, and choose no-save registers
++// before save-on-call, & save-on-call before save-on-entry.  Registers
++// which participate in fixed calling sequences should come last.
++// Registers which are used as pairs must fall on an even boundary.
 +
-+operand iRegP_R12()
-+%{
-+  constraint(ALLOC_IN_RC(r12_reg));
-+  match(RegP);
-+  // match(iRegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++alloc_class chunk0(
++    // volatiles
++    R7,  R7_H,
++    R28, R28_H,
++    R29, R29_H,
++    R30, R30_H,
++    R31, R31_H,
 +
-+// Pointer 64 bit Register R13 only
-+operand iRegP_R13()
-+%{
-+  constraint(ALLOC_IN_RC(r13_reg));
-+  match(RegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++    // arg registers
++    R10, R10_H,
++    R11, R11_H,
++    R12, R12_H,
++    R13, R13_H,
++    R14, R14_H,
++    R15, R15_H,
++    R16, R16_H,
++    R17, R17_H,
 +
-+operand iRegP_R14()
-+%{
-+  constraint(ALLOC_IN_RC(r14_reg));
-+  match(RegP);
-+  // match(iRegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++    // non-volatiles
++    R9,  R9_H,
++    R18, R18_H,
++    R19, R19_H,
++    R20, R20_H,
++    R21, R21_H,
++    R22, R22_H,
++    R24, R24_H,
++    R25, R25_H,
++    R26, R26_H,
 +
-+operand iRegP_R15()
-+%{
-+  constraint(ALLOC_IN_RC(r15_reg));
-+  match(RegP);
-+  // match(iRegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++    // non-allocatable registers
++    R23, R23_H, // java thread
++    R27, R27_H, // heapbase
++    R4,  R4_H,  // thread
++    R8,  R8_H,  // fp
++    R0,  R0_H,  // zero
++    R1,  R1_H,  // ra
++    R2,  R2_H,  // sp
++    R3,  R3_H,  // gp
++);
 +
-+operand iRegP_R16()
-+%{
-+  constraint(ALLOC_IN_RC(r16_reg));
-+  match(RegP);
-+  // match(iRegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++alloc_class chunk1(
 +
-+// Pointer 64 bit Register R28 only
-+operand iRegP_R28()
-+%{
-+  constraint(ALLOC_IN_RC(r28_reg));
-+  match(RegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++    // no save
++    F0,  F0_H,
++    F1,  F1_H,
++    F2,  F2_H,
++    F3,  F3_H,
++    F4,  F4_H,
++    F5,  F5_H,
++    F6,  F6_H,
++    F7,  F7_H,
++    F28, F28_H,
++    F29, F29_H,
++    F30, F30_H,
++    F31, F31_H,
 +
-+// Pointer Register Operands
-+// Narrow Pointer Register
-+operand iRegN()
-+%{
-+  constraint(ALLOC_IN_RC(any_reg32));
-+  match(RegN);
-+  match(iRegNNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++    // arg registers
++    F10, F10_H,
++    F11, F11_H,
++    F12, F12_H,
++    F13, F13_H,
++    F14, F14_H,
++    F15, F15_H,
++    F16, F16_H,
++    F17, F17_H,
 +
-+// Integer 64 bit Register not Special
-+operand iRegNNoSp()
-+%{
-+  constraint(ALLOC_IN_RC(no_special_reg32));
-+  match(RegN);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++    // non-volatiles
++    F8,  F8_H,
++    F9,  F9_H,
++    F18, F18_H,
++    F19, F19_H,
++    F20, F20_H,
++    F21, F21_H,
++    F22, F22_H,
++    F23, F23_H,
++    F24, F24_H,
++    F25, F25_H,
++    F26, F26_H,
++    F27, F27_H,
++);
 +
-+// heap base register -- used for encoding immN0
-+operand iRegIHeapbase()
-+%{
-+  constraint(ALLOC_IN_RC(heapbase_reg));
-+  match(RegI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++alloc_class chunk2(
++    V0, V0_H, V0_J, V0_K,
++    V1, V1_H, V1_J, V1_K,
++    V2, V2_H, V2_J, V2_K,
++    V3, V3_H, V3_J, V3_K,
++    V4, V4_H, V4_J, V4_K,
++    V5, V5_H, V5_J, V5_K,
++    V6, V6_H, V6_J, V6_K,
++    V7, V7_H, V7_J, V7_K,
++    V8, V8_H, V8_J, V8_K,
++    V9, V9_H, V9_J, V9_K,
++    V10, V10_H, V10_J, V10_K,
++    V11, V11_H, V11_J, V11_K,
++    V12, V12_H, V12_J, V12_K,
++    V13, V13_H, V13_J, V13_K,
++    V14, V14_H, V14_J, V14_K,
++    V15, V15_H, V15_J, V15_K,
++    V16, V16_H, V16_J, V16_K,
++    V17, V17_H, V17_J, V17_K,
++    V18, V18_H, V18_J, V18_K,
++    V19, V19_H, V19_J, V19_K,
++    V20, V20_H, V20_J, V20_K,
++    V21, V21_H, V21_J, V21_K,
++    V22, V22_H, V22_J, V22_K,
++    V23, V23_H, V23_J, V23_K,
++    V24, V24_H, V24_J, V24_K,
++    V25, V25_H, V25_J, V25_K,
++    V26, V26_H, V26_J, V26_K,
++    V27, V27_H, V27_J, V27_K,
++    V28, V28_H, V28_J, V28_K,
++    V29, V29_H, V29_J, V29_K,
++    V30, V30_H, V30_J, V30_K,
++    V31, V31_H, V31_J, V31_K,
++);
 +
-+// Long 64 bit Register R10 only
-+operand iRegL_R10()
-+%{
-+  constraint(ALLOC_IN_RC(r10_reg));
-+  match(RegL);
-+  match(iRegLNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++alloc_class chunk3(RFLAGS);
 +
-+// Float Register
-+// Float register operands
-+operand fRegF()
-+%{
-+  constraint(ALLOC_IN_RC(float_reg));
-+  match(RegF);
++//----------Architecture Description Register Classes--------------------------
++// Several register classes are automatically defined based upon information in
++// this architecture description.
++// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
++// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
++//
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++// Class for all 32 bit general purpose registers
++reg_class all_reg32(
++    R0,
++    R1,
++    R2,
++    R3,
++    R4,
++    R7,
++    R8,
++    R9,
++    R10,
++    R11,
++    R12,
++    R13,
++    R14,
++    R15,
++    R16,
++    R17,
++    R18,
++    R19,
++    R20,
++    R21,
++    R22,
++    R23,
++    R24,
++    R25,
++    R26,
++    R27,
++    R28,
++    R29,
++    R30,
++    R31
++);
++
++// Class for any 32 bit integer registers (excluding zr)
++reg_class any_reg32 %{
++  return _ANY_REG32_mask;
 +%}
 +
-+// Double Register
-+// Double register operands
-+operand fRegD()
-+%{
-+  constraint(ALLOC_IN_RC(double_reg));
-+  match(RegD);
++// Singleton class for R10 int register
++reg_class int_r10_reg(R10);
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++// Singleton class for R12 int register
++reg_class int_r12_reg(R12);
 +
-+// Generic vector class. This will be used for
-+// all vector operands.
-+operand vReg()
-+%{
-+  constraint(ALLOC_IN_RC(vectora_reg));
-+  match(VecA);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++// Singleton class for R13 int register
++reg_class int_r13_reg(R13);
 +
-+operand vReg_V1()
-+%{
-+  constraint(ALLOC_IN_RC(v1_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++// Singleton class for R14 int register
++reg_class int_r14_reg(R14);
 +
-+operand vReg_V2()
-+%{
-+  constraint(ALLOC_IN_RC(v2_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++// Class for all long integer registers
++reg_class all_reg(
++    R0,  R0_H,
++    R1,  R1_H,
++    R2,  R2_H,
++    R3,  R3_H,
++    R4,  R4_H,
++    R7,  R7_H,
++    R8,  R8_H,
++    R9,  R9_H,
++    R10, R10_H,
++    R11, R11_H,
++    R12, R12_H,
++    R13, R13_H,
++    R14, R14_H,
++    R15, R15_H,
++    R16, R16_H,
++    R17, R17_H,
++    R18, R18_H,
++    R19, R19_H,
++    R20, R20_H,
++    R21, R21_H,
++    R22, R22_H,
++    R23, R23_H,
++    R24, R24_H,
++    R25, R25_H,
++    R26, R26_H,
++    R27, R27_H,
++    R28, R28_H,
++    R29, R29_H,
++    R30, R30_H,
++    R31, R31_H
++);
 +
-+operand vReg_V3()
-+%{
-+  constraint(ALLOC_IN_RC(v3_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++// Class for all long integer registers (excluding zr)
++reg_class any_reg %{
++  return _ANY_REG_mask;
 +%}
 +
-+operand vReg_V4()
-+%{
-+  constraint(ALLOC_IN_RC(v4_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++// Class for non-allocatable 32 bit registers
++reg_class non_allocatable_reg32(
++    R0,                       // zr
++    R1,                       // ra
++    R2,                       // sp
++    R3,                       // gp
++    R4,                       // tp
++    R23                       // java thread
++);
 +
-+operand vReg_V5()
-+%{
-+  constraint(ALLOC_IN_RC(v5_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++// Class for non-allocatable 64 bit registers
++reg_class non_allocatable_reg(
++    R0,  R0_H,                // zr
++    R1,  R1_H,                // ra
++    R2,  R2_H,                // sp
++    R3,  R3_H,                // gp
++    R4,  R4_H,                // tp
++    R23, R23_H                // java thread
++);
++
++reg_class no_special_reg32 %{
++  return _NO_SPECIAL_REG32_mask;
 +%}
 +
-+// Java Thread Register
-+operand javaThread_RegP(iRegP reg)
-+%{
-+  constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg
-+  match(reg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++reg_class no_special_reg %{
++  return _NO_SPECIAL_REG_mask;
 +%}
 +
-+//----------Memory Operands----------------------------------------------------
-+// RISCV has only base_plus_offset and literal address mode, so no need to use
-+// index and scale. Here set index as 0xffffffff and scale as 0x0.
-+operand indirect(iRegP reg)
-+%{
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(reg);
-+  op_cost(0);
-+  format %{ "[$reg]" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp(0x0);
-+  %}
++reg_class ptr_reg %{
++  return _PTR_REG_mask;
 +%}
 +
-+operand indOffI(iRegP reg, immIOffset off)
-+%{
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(AddP reg off);
-+  op_cost(0);
-+  format %{ "[$reg, $off]" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp($off);
-+  %}
++reg_class no_special_ptr_reg %{
++  return _NO_SPECIAL_PTR_REG_mask;
 +%}
 +
-+operand indOffL(iRegP reg, immLOffset off)
-+%{
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(AddP reg off);
-+  op_cost(0);
-+  format %{ "[$reg, $off]" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp($off);
-+  %}
-+%}
++// Class for 64 bit register r10
++reg_class r10_reg(
++    R10, R10_H
++);
 +
-+operand indirectN(iRegN reg)
-+%{
-+  predicate(Universe::narrow_oop_shift() == 0);
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(DecodeN reg);
-+  op_cost(0);
-+  format %{ "[$reg]\t# narrow" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp(0x0);
-+  %}
-+%}
++// Class for 64 bit register r11
++reg_class r11_reg(
++    R11, R11_H
++);
 +
-+operand indOffIN(iRegN reg, immIOffset off)
-+%{
-+  predicate(Universe::narrow_oop_shift() == 0);
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(AddP (DecodeN reg) off);
-+  op_cost(0);
-+  format %{ "[$reg, $off]\t# narrow" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp($off);
-+  %}
-+%}
++// Class for 64 bit register r12
++reg_class r12_reg(
++    R12, R12_H
++);
 +
-+operand indOffLN(iRegN reg, immLOffset off)
-+%{
-+  predicate(Universe::narrow_oop_shift() == 0);
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(AddP (DecodeN reg) off);
-+  op_cost(0);
-+  format %{ "[$reg, $off]\t# narrow" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp($off);
-+  %}
-+%}
++// Class for 64 bit register r13
++reg_class r13_reg(
++    R13, R13_H
++);
 +
-+// RISCV opto stubs need to write to the pc slot in the thread anchor
-+operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off)
-+%{
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(AddP reg off);
-+  op_cost(0);
-+  format %{ "[$reg, $off]" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp($off);
-+  %}
-+%}
++// Class for 64 bit register r14
++reg_class r14_reg(
++    R14, R14_H
++);
 +
++// Class for 64 bit register r15
++reg_class r15_reg(
++    R15, R15_H
++);
 +
-+//----------Special Memory Operands--------------------------------------------
-+// Stack Slot Operand - This operand is used for loading and storing temporary
-+//                      values on the stack where a match requires a value to
-+//                      flow through memory.
-+operand stackSlotI(sRegI reg)
-+%{
-+  constraint(ALLOC_IN_RC(stack_slots));
-+  // No match rule because this operand is only generated in matching
-+  // match(RegI);
-+  format %{ "[$reg]" %}
-+  interface(MEMORY_INTER) %{
-+    base(0x02);  // RSP
-+    index(0xffffffff);  // No Index
-+    scale(0x0);  // No Scale
-+    disp($reg);  // Stack Offset
-+  %}
-+%}
++// Class for 64 bit register r16
++reg_class r16_reg(
++    R16, R16_H
++);
 +
-+operand stackSlotF(sRegF reg)
-+%{
-+  constraint(ALLOC_IN_RC(stack_slots));
-+  // No match rule because this operand is only generated in matching
-+  // match(RegF);
-+  format %{ "[$reg]" %}
-+  interface(MEMORY_INTER) %{
-+    base(0x02);  // RSP
-+    index(0xffffffff);  // No Index
-+    scale(0x0);  // No Scale
-+    disp($reg);  // Stack Offset
-+  %}
-+%}
++// Class for method register
++reg_class method_reg(
++    R31, R31_H
++);
 +
-+operand stackSlotD(sRegD reg)
-+%{
-+  constraint(ALLOC_IN_RC(stack_slots));
-+  // No match rule because this operand is only generated in matching
-+  // match(RegD);
-+  format %{ "[$reg]" %}
-+  interface(MEMORY_INTER) %{
-+    base(0x02);  // RSP
-+    index(0xffffffff);  // No Index
-+    scale(0x0);  // No Scale
-+    disp($reg);  // Stack Offset
-+  %}
-+%}
++// Class for heapbase register
++reg_class heapbase_reg(
++    R27, R27_H
++);
 +
-+operand stackSlotL(sRegL reg)
-+%{
-+  constraint(ALLOC_IN_RC(stack_slots));
-+  // No match rule because this operand is only generated in matching
-+  // match(RegL);
-+  format %{ "[$reg]" %}
-+  interface(MEMORY_INTER) %{
-+    base(0x02);  // RSP
-+    index(0xffffffff);  // No Index
-+    scale(0x0);  // No Scale
-+    disp($reg);  // Stack Offset
-+  %}
-+%}
++// Class for java thread register
++reg_class java_thread_reg(
++    R23, R23_H
++);
 +
-+// Special operand allowing long args to int ops to be truncated for free
++reg_class r28_reg(
++    R28, R28_H
++);
 +
-+operand iRegL2I(iRegL reg) %{
++reg_class r29_reg(
++    R29, R29_H
++);
 +
-+  op_cost(0);
++reg_class r30_reg(
++    R30, R30_H
++);
 +
-+  match(ConvL2I reg);
++// Class for zero registesr
++reg_class zr_reg(
++    R0, R0_H
++);
 +
-+  format %{ "l2i($reg)" %}
++// Class for thread register
++reg_class thread_reg(
++    R4, R4_H
++);
 +
-+  interface(REG_INTER)
-+%}
++// Class for frame pointer register
++reg_class fp_reg(
++    R8, R8_H
++);
 +
++// Class for link register
++reg_class ra_reg(
++    R1, R1_H
++);
 +
-+// Comparison Operands
-+// NOTE: Label is a predefined operand which should not be redefined in
-+//       the AD file. It is generically handled within the ADLC.
++// Class for long sp register
++reg_class sp_reg(
++    R2, R2_H
++);
 +
-+//----------Conditional Branch Operands----------------------------------------
-+// Comparison Op  - This is the operation of the comparison, and is limited to
-+//                  the following set of codes:
-+//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
-+//
-+// Other attributes of the comparison, such as unsignedness, are specified
-+// by the comparison instruction that sets a condition code flags register.
-+// That result is represented by a flags operand whose subtype is appropriate
-+// to the unsignedness (etc.) of the comparison.
-+//
-+// Later, the instruction which matches both the Comparison Op (a Bool) and
-+// the flags (produced by the Cmp) specifies the coding of the comparison op
-+// by matching a specific subtype of Bool operand below, such as cmpOpU.
++// Class for all float registers
++reg_class float_reg(
++    F0,
++    F1,
++    F2,
++    F3,
++    F4,
++    F5,
++    F6,
++    F7,
++    F8,
++    F9,
++    F10,
++    F11,
++    F12,
++    F13,
++    F14,
++    F15,
++    F16,
++    F17,
++    F18,
++    F19,
++    F20,
++    F21,
++    F22,
++    F23,
++    F24,
++    F25,
++    F26,
++    F27,
++    F28,
++    F29,
++    F30,
++    F31
++);
 +
++// Double precision float registers have virtual `high halves' that
++// are needed by the allocator.
++// Class for all double registers
++reg_class double_reg(
++    F0,  F0_H,
++    F1,  F1_H,
++    F2,  F2_H,
++    F3,  F3_H,
++    F4,  F4_H,
++    F5,  F5_H,
++    F6,  F6_H,
++    F7,  F7_H,
++    F8,  F8_H,
++    F9,  F9_H,
++    F10, F10_H,
++    F11, F11_H,
++    F12, F12_H,
++    F13, F13_H,
++    F14, F14_H,
++    F15, F15_H,
++    F16, F16_H,
++    F17, F17_H,
++    F18, F18_H,
++    F19, F19_H,
++    F20, F20_H,
++    F21, F21_H,
++    F22, F22_H,
++    F23, F23_H,
++    F24, F24_H,
++    F25, F25_H,
++    F26, F26_H,
++    F27, F27_H,
++    F28, F28_H,
++    F29, F29_H,
++    F30, F30_H,
++    F31, F31_H
++);
 +
-+// used for signed integral comparisons and fp comparisons
-+operand cmpOp()
-+%{
-+  match(Bool);
++// Class for all RVV vector registers
++reg_class vectora_reg(
++    V1, V1_H, V1_J, V1_K,
++    V2, V2_H, V2_J, V2_K,
++    V3, V3_H, V3_J, V3_K,
++    V4, V4_H, V4_J, V4_K,
++    V5, V5_H, V5_J, V5_K,
++    V6, V6_H, V6_J, V6_K,
++    V7, V7_H, V7_J, V7_K,
++    V8, V8_H, V8_J, V8_K,
++    V9, V9_H, V9_J, V9_K,
++    V10, V10_H, V10_J, V10_K,
++    V11, V11_H, V11_J, V11_K,
++    V12, V12_H, V12_J, V12_K,
++    V13, V13_H, V13_J, V13_K,
++    V14, V14_H, V14_J, V14_K,
++    V15, V15_H, V15_J, V15_K,
++    V16, V16_H, V16_J, V16_K,
++    V17, V17_H, V17_J, V17_K,
++    V18, V18_H, V18_J, V18_K,
++    V19, V19_H, V19_J, V19_K,
++    V20, V20_H, V20_J, V20_K,
++    V21, V21_H, V21_J, V21_K,
++    V22, V22_H, V22_J, V22_K,
++    V23, V23_H, V23_J, V23_K,
++    V24, V24_H, V24_J, V24_K,
++    V25, V25_H, V25_J, V25_K,
++    V26, V26_H, V26_J, V26_K,
++    V27, V27_H, V27_J, V27_K,
++    V28, V28_H, V28_J, V28_K,
++    V29, V29_H, V29_J, V29_K,
++    V30, V30_H, V30_J, V30_K,
++    V31, V31_H, V31_J, V31_K
++);
 +
-+  format %{ "" %}
++// Class for 64 bit register f0
++reg_class f0_reg(
++    F0, F0_H
++);
 +
-+  // the values in interface derives from struct BoolTest::mask
-+  interface(COND_INTER) %{
-+    equal(0x0, "eq");
-+    greater(0x1, "gt");
-+    overflow(0x2, "overflow");
-+    less(0x3, "lt");
-+    not_equal(0x4, "ne");
-+    less_equal(0x5, "le");
-+    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "ge");
-+  %}
-+%}
++// Class for 64 bit register f1
++reg_class f1_reg(
++    F1, F1_H
++);
 +
-+// used for unsigned integral comparisons
-+operand cmpOpU()
-+%{
-+  match(Bool);
++// Class for 64 bit register f2
++reg_class f2_reg(
++    F2, F2_H
++);
 +
-+  format %{ "" %}
-+  // the values in interface derives from struct BoolTest::mask
-+  interface(COND_INTER) %{
-+    equal(0x0, "eq");
-+    greater(0x1, "gtu");
-+    overflow(0x2, "overflow");
-+    less(0x3, "ltu");
-+    not_equal(0x4, "ne");
-+    less_equal(0x5, "leu");
-+    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "geu");
-+  %}
-+%}
++// Class for 64 bit register f3
++reg_class f3_reg(
++    F3, F3_H
++);
 +
-+// used for certain integral comparisons which can be
-+// converted to bxx instructions
-+operand cmpOpEqNe()
-+%{
-+  match(Bool);
-+  op_cost(0);
-+  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
-+            n->as_Bool()->_test._test == BoolTest::eq);
++// class for vector register v1
++reg_class v1_reg(
++    V1, V1_H, V1_J, V1_K
++);
 +
-+  format %{ "" %}
-+  interface(COND_INTER) %{
-+    equal(0x0, "eq");
-+    greater(0x1, "gt");
-+    overflow(0x2, "overflow");
-+    less(0x3, "lt");
-+    not_equal(0x4, "ne");
-+    less_equal(0x5, "le");
-+    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "ge");
-+  %}
-+%}
++// class for vector register v2
++reg_class v2_reg(
++    V2, V2_H, V2_J, V2_K
++);
 +
-+operand cmpOpULtGe()
-+%{
-+  match(Bool);
-+  op_cost(0);
-+  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
-+            n->as_Bool()->_test._test == BoolTest::ge);
++// class for vector register v3
++reg_class v3_reg(
++    V3, V3_H, V3_J, V3_K
++);
 +
-+  format %{ "" %}
-+  interface(COND_INTER) %{
-+    equal(0x0, "eq");
-+    greater(0x1, "gt");
-+    overflow(0x2, "overflow");
-+    less(0x3, "lt");
-+    not_equal(0x4, "ne");
-+    less_equal(0x5, "le");
-+    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "ge");
-+  %}
-+%}
++// class for vector register v4
++reg_class v4_reg(
++    V4, V4_H, V4_J, V4_K
++);
 +
-+operand cmpOpUEqNeLeGt()
-+%{
-+  match(Bool);
-+  op_cost(0);
-+  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
-+            n->as_Bool()->_test._test == BoolTest::eq ||
-+            n->as_Bool()->_test._test == BoolTest::le ||
-+            n->as_Bool()->_test._test == BoolTest::gt);
++// class for vector register v5
++reg_class v5_reg(
++    V5, V5_H, V5_J, V5_K
++);
 +
-+  format %{ "" %}
-+  interface(COND_INTER) %{
-+    equal(0x0, "eq");
-+    greater(0x1, "gt");
-+    overflow(0x2, "overflow");
-+    less(0x3, "lt");
-+    not_equal(0x4, "ne");
-+    less_equal(0x5, "le");
-+    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "ge");
-+  %}
++// class for condition codes
++reg_class reg_flags(RFLAGS);
 +%}
 +
++//----------DEFINITION BLOCK---------------------------------------------------
++// Define name --> value mappings to inform the ADLC of an integer valued name
++// Current support includes integer values in the range [0, 0x7FFFFFFF]
++// Format:
++//        int_def  <name>         ( <int_value>, <expression>);
++// Generated Code in ad_<arch>.hpp
++//        #define  <name>   (<expression>)
++//        // value == <int_value>
++// Generated code in ad_<arch>.cpp adlc_verification()
++//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
++//
 +
-+// Flags register, used as output of compare logic
-+operand rFlagsReg()
-+%{
-+  constraint(ALLOC_IN_RC(reg_flags));
-+  match(RegFlags);
++// we follow the ppc-aix port in using a simple cost model which ranks
++// register operations as cheap, memory ops as more expensive and
++// branches as most expensive. the first two have a low as well as a
++// normal cost. huge cost appears to be a way of saying don't do
++// something
 +
-+  op_cost(0);
-+  format %{ "RFLAGS" %}
-+  interface(REG_INTER);
++definitions %{
++  // The default cost (of a register move instruction).
++  int_def DEFAULT_COST         (  100,               100);
++  int_def ALU_COST             (  100,  1 * DEFAULT_COST);          // unknown, const, arith, shift, slt,
++                                                                    // multi, auipc, nop, logical, move
++  int_def LOAD_COST            (  300,  3 * DEFAULT_COST);          // load, fpload
++  int_def STORE_COST           (  100,  1 * DEFAULT_COST);          // store, fpstore
++  int_def XFER_COST            (  300,  3 * DEFAULT_COST);          // mfc, mtc, fcvt, fmove, fcmp
++  int_def BRANCH_COST          (  100,  1 * DEFAULT_COST);          // branch, jmp, call
++  int_def IMUL_COST            ( 1000, 10 * DEFAULT_COST);          // imul
++  int_def IDIVSI_COST          ( 3400, 34 * DEFAULT_COST);          // idivdi
++  int_def IDIVDI_COST          ( 6600, 66 * DEFAULT_COST);          // idivsi
++  int_def FMUL_SINGLE_COST     (  500,  5 * DEFAULT_COST);          // fadd, fmul, fmadd
++  int_def FMUL_DOUBLE_COST     (  700,  7 * DEFAULT_COST);          // fadd, fmul, fmadd
++  int_def FDIV_COST            ( 2000, 20 * DEFAULT_COST);          // fdiv
++  int_def FSQRT_COST           ( 2500, 25 * DEFAULT_COST);          // fsqrt
++  int_def VOLATILE_REF_COST    ( 1000, 10 * DEFAULT_COST);
 +%}
 +
-+// Special Registers
 +
-+// Method Register
-+operand inline_cache_RegP(iRegP reg)
-+%{
-+  constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
-+  match(reg);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
 +
-+//----------OPERAND CLASSES----------------------------------------------------
-+// Operand Classes are groups of operands that are used as to simplify
-+// instruction definitions by not requiring the AD writer to specify
-+// separate instructions for every form of operand when the
-+// instruction accepts multiple operand types with the same basic
-+// encoding and format. The classic case of this is memory operands.
++//----------SOURCE BLOCK-------------------------------------------------------
++// This is a block of C++ code which provides values, functions, and
++// definitions necessary in the rest of the architecture description
 +
-+// memory is used to define read/write location for load/store
-+// instruction defs. we can turn a memory op into an Address
++source_hpp %{
 +
-+opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN);
-+
-+// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
-+// operations. it allows the src to be either an iRegI or a (ConvL2I
-+// iRegL). in the latter case the l2i normally planted for a ConvL2I
-+// can be elided because the 32-bit instruction will just employ the
-+// lower 32 bits anyway.
-+//
-+// n.b. this does not elide all L2I conversions. if the truncated
-+// value is consumed by more than one operation then the ConvL2I
-+// cannot be bundled into the consuming nodes so an l2i gets planted
-+// (actually a mvw $dst $src) and the downstream instructions consume
-+// the result of the l2i as an iRegI input. That's a shame since the
-+// mvw is actually redundant but its not too costly.
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "opto/addnode.hpp"
++#include "opto/convertnode.hpp"
 +
-+opclass iRegIorL2I(iRegI, iRegL2I);
-+opclass iRegIorL(iRegI, iRegL);
-+opclass iRegNorP(iRegN, iRegP);
-+opclass iRegILNP(iRegI, iRegL, iRegN, iRegP);
-+opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp);
-+opclass immIorL(immI, immL);
++extern RegMask _ANY_REG32_mask;
++extern RegMask _ANY_REG_mask;
++extern RegMask _PTR_REG_mask;
++extern RegMask _NO_SPECIAL_REG32_mask;
++extern RegMask _NO_SPECIAL_REG_mask;
++extern RegMask _NO_SPECIAL_PTR_REG_mask;
 +
-+//----------PIPELINE-----------------------------------------------------------
-+// Rules which define the behavior of the target architectures pipeline.
++class CallStubImpl {
 +
-+// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline
-+//pipe_desc(ID, EX, MEM, WR);
-+#define ID   S0
-+#define EX   S1
-+#define MEM  S2
-+#define WR   S3
++  //--------------------------------------------------------------
++  //---<  Used for optimization in Compile::shorten_branches  >---
++  //--------------------------------------------------------------
 +
-+// Integer ALU reg operation
-+pipeline %{
++ public:
++  // Size of call trampoline stub.
++  static uint size_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
 +
-+attributes %{
-+  // RISC-V instructions are of fixed length
-+  fixed_size_instructions;           // Fixed size instructions TODO does
-+  max_instructions_per_bundle = 2;   // Generic RISC-V 1, Sifive Series 7 2
-+  // RISC-V instructions come in 32-bit word units
-+  instruction_unit_size = 4;         // An instruction is 4 bytes long
-+  instruction_fetch_unit_size = 64;  // The processor fetches one line
-+  instruction_fetch_units = 1;       // of 64 bytes
++  // number of relocations needed by a call trampoline stub
++  static uint reloc_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++};
 +
-+  // List of nop instructions
-+  nops( MachNop );
-+%}
++class HandlerImpl {
 +
-+// We don't use an actual pipeline model so don't care about resources
-+// or description. we do use pipeline classes to introduce fixed
-+// latencies
++ public:
 +
-+//----------RESOURCES----------------------------------------------------------
-+// Resources are the functional units available to the machine
++  static int emit_exception_handler(CodeBuffer &cbuf);
++  static int emit_deopt_handler(CodeBuffer& cbuf);
 +
-+// Generic RISC-V pipeline
-+// 1 decoder
-+// 1 instruction decoded per cycle
-+// 1 load/store ops per cycle, 1 branch, 1 FPU
-+// 1 mul, 1 div
++  static uint size_exception_handler() {
++    return MacroAssembler::far_branch_size();
++  }
 +
-+resources ( DECODE,
-+            ALU,
-+            MUL,
-+            DIV,
-+            BRANCH,
-+            LDST,
-+            FPU);
++  static uint size_deopt_handler() {
++    // count auipc + far branch
++    return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
++  }
++};
 +
-+//----------PIPELINE DESCRIPTION-----------------------------------------------
-+// Pipeline Description specifies the stages in the machine's pipeline
++class Node::PD {
++public:
++  enum NodeFlags {
++    _last_flag = Node::_last_flag
++  };
++};
 +
-+// Define the pipeline as a generic 6 stage pipeline
-+pipe_desc(S0, S1, S2, S3, S4, S5);
++bool is_CAS(int opcode, bool maybe_volatile);
 +
-+//----------PIPELINE CLASSES---------------------------------------------------
-+// Pipeline Classes describe the stages in which input and output are
-+// referenced by the hardware pipeline.
++// predicate controlling translation of CompareAndSwapX
++bool needs_acquiring_load_reserved(const Node *load);
 +
-+pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2)
-+%{
-+  single_instruction;
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++// predicate controlling addressing modes
++bool size_fits_all_mem_uses(AddPNode* addp, int shift);
 +%}
 +
-+pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2)
-+%{
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++source %{
 +
-+pipe_class fp_uop_s(fRegF dst, fRegF src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++// Derived RegMask with conditionally allocatable registers
 +
-+pipe_class fp_uop_d(fRegD dst, fRegD src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++RegMask _ANY_REG32_mask;
++RegMask _ANY_REG_mask;
++RegMask _PTR_REG_mask;
++RegMask _NO_SPECIAL_REG32_mask;
++RegMask _NO_SPECIAL_REG_mask;
++RegMask _NO_SPECIAL_PTR_REG_mask;
 +
-+pipe_class fp_d2f(fRegF dst, fRegD src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++void reg_mask_init() {
 +
-+pipe_class fp_f2d(fRegD dst, fRegF src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  _ANY_REG32_mask = _ALL_REG32_mask;
++  _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg()));
 +
-+pipe_class fp_f2i(iRegINoSp dst, fRegF src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  _ANY_REG_mask = _ALL_REG_mask;
++  _ANY_REG_mask.SUBTRACT(_ZR_REG_mask);
 +
-+pipe_class fp_f2l(iRegLNoSp dst, fRegF src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  _PTR_REG_mask = _ALL_REG_mask;
++  _PTR_REG_mask.SUBTRACT(_ZR_REG_mask);
 +
-+pipe_class fp_i2f(fRegF dst, iRegIorL2I src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
++  _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
 +
-+pipe_class fp_l2f(fRegF dst, iRegL src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  _NO_SPECIAL_REG_mask = _ALL_REG_mask;
++  _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
 +
-+pipe_class fp_d2i(iRegINoSp dst, fRegD src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
++  _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
 +
-+pipe_class fp_d2l(iRegLNoSp dst, fRegD src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  // x27 is not allocatable when compressed oops is on
++  if (UseCompressedOops) {
++    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg()));
++    _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
++    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
++  }
 +
-+pipe_class fp_i2d(fRegD dst, iRegIorL2I src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  // x8 is not allocatable when PreserveFramePointer is on
++  if (PreserveFramePointer) {
++    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
++    _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask);
++    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask);
++  }
++}
 +
-+pipe_class fp_l2d(fRegD dst, iRegIorL2I src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++void PhaseOutput::pd_perform_mach_node_analysis() {
++}
 +
-+pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2)
-+%{
-+  single_instruction;
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++int MachNode::pd_alignment_required() const {
++  return 1;
++}
 +
-+pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2)
-+%{
-+  single_instruction;
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++int MachNode::compute_padding(int current_offset) const {
++  return 0;
++}
 +
-+pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2)
-+%{
-+  single_instruction;
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++// is_CAS(int opcode, bool maybe_volatile)
++//
++// return true if opcode is one of the possible CompareAndSwapX
++// values otherwise false.
++bool is_CAS(int opcode, bool maybe_volatile)
++{
++  switch (opcode) {
++    // We handle these
++    case Op_CompareAndSwapI:
++    case Op_CompareAndSwapL:
++    case Op_CompareAndSwapP:
++    case Op_CompareAndSwapN:
++    case Op_ShenandoahCompareAndSwapP:
++    case Op_ShenandoahCompareAndSwapN:
++    case Op_CompareAndSwapB:
++    case Op_CompareAndSwapS:
++    case Op_GetAndSetI:
++    case Op_GetAndSetL:
++    case Op_GetAndSetP:
++    case Op_GetAndSetN:
++    case Op_GetAndAddI:
++    case Op_GetAndAddL:
++      return true;
++    case Op_CompareAndExchangeI:
++    case Op_CompareAndExchangeN:
++    case Op_CompareAndExchangeB:
++    case Op_CompareAndExchangeS:
++    case Op_CompareAndExchangeL:
++    case Op_CompareAndExchangeP:
++    case Op_WeakCompareAndSwapB:
++    case Op_WeakCompareAndSwapS:
++    case Op_WeakCompareAndSwapI:
++    case Op_WeakCompareAndSwapL:
++    case Op_WeakCompareAndSwapP:
++    case Op_WeakCompareAndSwapN:
++    case Op_ShenandoahWeakCompareAndSwapP:
++    case Op_ShenandoahWeakCompareAndSwapN:
++    case Op_ShenandoahCompareAndExchangeP:
++    case Op_ShenandoahCompareAndExchangeN:
++      return maybe_volatile;
++    default:
++      return false;
++  }
++}
 +
-+pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2)
-+%{
-+  single_instruction;
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++// predicate controlling translation of CAS
++//
++// returns true if CAS needs to use an acquiring load otherwise false
++bool needs_acquiring_load_reserved(const Node *n)
++{
++  assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap");
 +
-+pipe_class fp_load_constant_s(fRegF dst)
-+%{
-+  single_instruction;
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  LoadStoreNode* ldst = n->as_LoadStore();
++  if (n != NULL && is_CAS(n->Opcode(), false)) {
++    assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar");
++  } else {
++    return ldst != NULL && ldst->trailing_membar() != NULL;
++  }
++  // so we can just return true here
++  return true;
++}
++#define __ _masm.
 +
-+pipe_class fp_load_constant_d(fRegD dst)
-+%{
-+  single_instruction;
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++// advance declarations for helper functions to convert register
++// indices to register objects
 +
-+pipe_class fp_load_mem_s(fRegF dst, memory mem)
-+%{
-+  single_instruction;
-+  mem    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
++// the ad file has to provide implementations of certain methods
++// expected by the generic code
++//
++// REQUIRED FUNCTIONALITY
 +
-+pipe_class fp_load_mem_d(fRegD dst, memory mem)
-+%{
-+  single_instruction;
-+  mem    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
++//=============================================================================
 +
-+pipe_class fp_store_reg_s(fRegF src, memory mem)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  mem    : S5(write);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
++// !!!!! Special hack to get all types of calls to specify the byte offset
++//       from the start of the call to the point where the return address
++//       will point.
 +
-+pipe_class fp_store_reg_d(fRegD src, memory mem)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  mem    : S5(write);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
++int MachCallStaticJavaNode::ret_addr_offset()
++{
++  // jal
++  return 1 * NativeInstruction::instruction_size;
++}
 +
-+//------- Integer ALU operations --------------------------
++int MachCallDynamicJavaNode::ret_addr_offset()
++{
++  return 7 * NativeInstruction::instruction_size; // movptr, jal
++}
 +
-+// Integer ALU reg-reg operation
-+// Operands needs in ID, result generated in EX
-+// E.g.  ADD   Rd, Rs1, Rs2
-+pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
-+%{
-+  single_instruction;
-+  dst    : EX(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  ALU    : EX;
-+%}
++int MachCallRuntimeNode::ret_addr_offset() {
++  // for generated stubs the call will be
++  //   jal(addr)
++  // or with far branches
++  //   jal(trampoline_stub)
++  // for real runtime callouts it will be 11 instructions
++  // see riscv_enc_java_to_runtime
++  //   la(t1, retaddr)                ->  auipc + addi
++  //   la(t0, RuntimeAddress(addr))   ->  lui + addi + slli + addi + slli + addi
++  //   addi(sp, sp, -2 * wordSize)    ->  addi
++  //   sd(t1, Address(sp, wordSize))  ->  sd
++  //   jalr(t0)                       ->  jalr
++  CodeBlob *cb = CodeCache::find_blob(_entry_point);
++  if (cb != NULL) {
++    return 1 * NativeInstruction::instruction_size;
++  } else {
++    return 11 * NativeInstruction::instruction_size;
++  }
++}
 +
-+// Integer ALU reg operation with constant shift
-+// E.g. SLLI    Rd, Rs1, #shift
-+pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
-+%{
-+  single_instruction;
-+  dst    : EX(write);
-+  src1   : ID(read);
-+  DECODE : ID;
-+  ALU    : EX;
-+%}
++int MachCallNativeNode::ret_addr_offset() {
++  Unimplemented();
++  return -1;
++}
 +
-+// Integer ALU reg-reg operation with variable shift
-+// both operands must be available in ID
-+// E.g. SLL   Rd, Rs1, Rs2
-+pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
-+%{
-+  single_instruction;
-+  dst    : EX(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  ALU    : EX;
-+%}
++//
++// Compute padding required for nodes which need alignment
++//
 +
-+// Integer ALU reg operation
-+// E.g. NEG   Rd, Rs2
-+pipe_class ialu_reg(iRegI dst, iRegI src)
-+%{
-+  single_instruction;
-+  dst    : EX(write);
-+  src    : ID(read);
-+  DECODE : ID;
-+  ALU    : EX;
-+%}
++// With RVC a call instruction may get 2-byte aligned.
++// The address of the call instruction needs to be 4-byte aligned to
++// ensure that it does not span a cache line so that it can be patched.
++int CallStaticJavaDirectNode::compute_padding(int current_offset) const
++{
++  // to make sure the address of jal 4-byte aligned.
++  return align_up(current_offset, alignment_required()) - current_offset;
++}
 +
-+// Integer ALU reg immediate operation
-+// E.g. ADDI   Rd, Rs1, #imm
-+pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
-+%{
-+  single_instruction;
-+  dst    : EX(write);
-+  src1   : ID(read);
-+  DECODE : ID;
-+  ALU    : EX;
-+%}
++// With RVC a call instruction may get 2-byte aligned.
++// The address of the call instruction needs to be 4-byte aligned to
++// ensure that it does not span a cache line so that it can be patched.
++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
++{
++  // skip the movptr in MacroAssembler::ic_call():
++  // lui + addi + slli + addi + slli + addi
++  // Though movptr() has already 4-byte aligned with or without RVC,
++  // We need to prevent from further changes by explicitly calculating the size.
++  const int movptr_size = 6 * NativeInstruction::instruction_size;
++  current_offset += movptr_size;
++  // to make sure the address of jal 4-byte aligned.
++  return align_up(current_offset, alignment_required()) - current_offset;
++}
 +
-+// Integer ALU immediate operation (no source operands)
-+// E.g. LI    Rd, #imm
-+pipe_class ialu_imm(iRegI dst)
-+%{
-+  single_instruction;
-+  dst    : EX(write);
-+  DECODE : ID;
-+  ALU    : EX;
-+%}
++//=============================================================================
 +
-+//------- Multiply pipeline operations --------------------
++#ifndef PRODUCT
++void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++  assert_cond(st != NULL);
++  st->print("BREAKPOINT");
++}
++#endif
 +
-+// Multiply reg-reg
-+// E.g. MULW   Rd, Rs1, Rs2
-+pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
-+%{
-+  single_instruction;
-+  dst    : WR(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  MUL    : WR;
-+%}
++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  C2_MacroAssembler _masm(&cbuf);
++  Assembler::CompressibleRegion cr(&_masm);
++  __ ebreak();
++}
 +
-+// E.g. MUL   RD, Rs1, Rs2
-+pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
-+%{
-+  single_instruction;
-+  fixed_latency(3); // Maximum latency for 64 bit mul
-+  dst    : WR(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  MUL    : WR;
-+%}
++uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
 +
-+//------- Divide pipeline operations --------------------
++//=============================================================================
 +
-+// E.g. DIVW   Rd, Rs1, Rs2
-+pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
-+%{
-+  single_instruction;
-+  fixed_latency(8); // Maximum latency for 32 bit divide
-+  dst    : WR(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  DIV    : WR;
-+%}
-+
-+// E.g. DIV   RD, Rs1, Rs2
-+pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
-+%{
-+  single_instruction;
-+  fixed_latency(16); // Maximum latency for 64 bit divide
-+  dst    : WR(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  DIV    : WR;
-+%}
-+
-+//------- Load pipeline operations ------------------------
-+
-+// Load - reg, mem
-+// E.g. LA    Rd, mem
-+pipe_class iload_reg_mem(iRegI dst, memory mem)
-+%{
-+  single_instruction;
-+  dst    : WR(write);
-+  mem    : ID(read);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
-+
-+// Load - reg, reg
-+// E.g. LD    Rd, Rs
-+pipe_class iload_reg_reg(iRegI dst, iRegI src)
-+%{
-+  single_instruction;
-+  dst    : WR(write);
-+  src    : ID(read);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
-+
-+//------- Store pipeline operations -----------------------
-+
-+// Store - zr, mem
-+// E.g. SD    zr, mem
-+pipe_class istore_mem(memory mem)
-+%{
-+  single_instruction;
-+  mem    : ID(read);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
-+
-+// Store - reg, mem
-+// E.g. SD    Rs, mem
-+pipe_class istore_reg_mem(iRegI src, memory mem)
-+%{
-+  single_instruction;
-+  mem    : ID(read);
-+  src    : EX(read);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
-+
-+// Store - reg, reg
-+// E.g. SD    Rs2, Rs1
-+pipe_class istore_reg_reg(iRegI dst, iRegI src)
-+%{
-+  single_instruction;
-+  dst    : ID(read);
-+  src    : EX(read);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
-+
-+//------- Store pipeline operations -----------------------
-+
-+// Branch
-+pipe_class pipe_branch()
-+%{
-+  single_instruction;
-+  DECODE : ID;
-+  BRANCH : EX;
-+%}
-+
-+// Branch
-+pipe_class pipe_branch_reg(iRegI src)
-+%{
-+  single_instruction;
-+  src    : ID(read);
-+  DECODE : ID;
-+  BRANCH : EX;
-+%}
-+
-+// Compare & Branch
-+// E.g. BEQ   Rs1, Rs2, L
-+pipe_class pipe_cmp_branch(iRegI src1, iRegI src2)
-+%{
-+  single_instruction;
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  BRANCH : EX;
-+%}
-+
-+// E.g. BEQZ Rs, L
-+pipe_class pipe_cmpz_branch(iRegI src)
-+%{
-+  single_instruction;
-+  src    : ID(read);
-+  DECODE : ID;
-+  BRANCH : EX;
-+%}
-+
-+//------- Synchronisation operations ----------------------
-+// Any operation requiring serialization
-+// E.g. FENCE/Atomic Ops/Load Acquire/Store Release
-+pipe_class pipe_serial()
-+%{
-+  single_instruction;
-+  force_serialization;
-+  fixed_latency(16);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
-+
-+pipe_class pipe_slow()
-+%{
-+  instruction_count(10);
-+  multiple_bundles;
-+  force_serialization;
-+  fixed_latency(16);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
-+
-+// Empty pipeline class
-+pipe_class pipe_class_empty()
-+%{
-+  single_instruction;
-+  fixed_latency(0);
-+%}
-+
-+// Default pipeline class.
-+pipe_class pipe_class_default()
-+%{
-+  single_instruction;
-+  fixed_latency(2);
-+%}
-+
-+// Pipeline class for compares.
-+pipe_class pipe_class_compare()
-+%{
-+  single_instruction;
-+  fixed_latency(16);
-+%}
-+
-+// Pipeline class for memory operations.
-+pipe_class pipe_class_memory()
-+%{
-+  single_instruction;
-+  fixed_latency(16);
-+%}
++#ifndef PRODUCT
++  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
++    st->print("nop \t# %d bytes pad for loops and calls", _count);
++  }
++#endif
 +
-+// Pipeline class for call.
-+pipe_class pipe_class_call()
-+%{
-+  single_instruction;
-+  fixed_latency(100);
-+%}
++  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
++    C2_MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes.
++    for (int i = 0; i < _count; i++) {
++      __ nop();
++    }
++  }
 +
-+// Define the class for the Nop node.
-+define %{
-+   MachNop = pipe_class_empty;
-+%}
-+%}
-+//----------INSTRUCTIONS-------------------------------------------------------
-+//
-+// match      -- States which machine-independent subtree may be replaced
-+//               by this instruction.
-+// ins_cost   -- The estimated cost of this instruction is used by instruction
-+//               selection to identify a minimum cost tree of machine
-+//               instructions that matches a tree of machine-independent
-+//               instructions.
-+// format     -- A string providing the disassembly for this instruction.
-+//               The value of an instruction's operand may be inserted
-+//               by referring to it with a '$' prefix.
-+// opcode     -- Three instruction opcodes may be provided.  These are referred
-+//               to within an encode class as $primary, $secondary, and $tertiary
-+//               rrspectively.  The primary opcode is commonly used to
-+//               indicate the type of machine instruction, while secondary
-+//               and tertiary are often used for prefix options or addressing
-+//               modes.
-+// ins_encode -- A list of encode classes with parameters. The encode class
-+//               name must have been defined in an 'enc_class' specification
-+//               in the encode section of the architecture description.
++  uint MachNopNode::size(PhaseRegAlloc*) const {
++    return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size);
++  }
 +
-+// ============================================================================
-+// Memory (Load/Store) Instructions
++//=============================================================================
++const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 +
-+// Load Instructions
++int ConstantTable::calculate_table_base_offset() const {
++  return 0;  // absolute addressing, no offset
++}
 +
-+// Load Byte (8 bit signed)
-+instruct loadB(iRegINoSp dst, memory mem)
-+%{
-+  match(Set dst (LoadB mem));
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
++  ShouldNotReachHere();
++}
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lb  $dst, $mem\t# byte, #@loadB" %}
++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++  // Empty encoding
++}
 +
-+  ins_encode %{
-+    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
++  return 0;
++}
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++#ifndef PRODUCT
++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++  assert_cond(st != NULL);
++  st->print("-- \t// MachConstantBaseNode (empty encoding)");
++}
++#endif
 +
-+// Load Byte (8 bit signed) into long
-+instruct loadB2L(iRegLNoSp dst, memory mem)
-+%{
-+  match(Set dst (ConvI2L (LoadB mem)));
++#ifndef PRODUCT
++void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++  assert_cond(st != NULL && ra_ != NULL);
++  Compile* C = ra_->C;
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lb  $dst, $mem\t# byte, #@loadB2L" %}
++  int framesize = C->output()->frame_slots() << LogBytesPerInt;
 +
-+  ins_encode %{
-+    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  if (C->output()->need_stack_bang(framesize)) {
++    st->print("# stack bang size=%d\n\t", framesize);
++  }
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++  st->print("sd  fp, [sp, #%d]\n\t", - 2 * wordSize);
++  st->print("sd  ra, [sp, #%d]\n\t", - wordSize);
++  if (PreserveFramePointer) { st->print("sub  fp, sp, #%d\n\t", 2 * wordSize); }
++  st->print("sub sp, sp, #%d\n\t", framesize);
 +
-+// Load Byte (8 bit unsigned)
-+instruct loadUB(iRegINoSp dst, memory mem)
-+%{
-+  match(Set dst (LoadUB mem));
++  if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
++    st->print("ld  t0, [guard]\n\t");
++    st->print("membar LoadLoad\n\t");
++    st->print("ld  t1, [xthread, #thread_disarmed_offset]\n\t");
++    st->print("beq t0, t1, skip\n\t");
++    st->print("jalr #nmethod_entry_barrier_stub\n\t");
++    st->print("j skip\n\t");
++    st->print("guard: int\n\t");
++    st->print("skip:\n\t");
++  }
++}
++#endif
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lbu  $dst, $mem\t# byte, #@loadUB" %}
++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  assert_cond(ra_ != NULL);
++  Compile* C = ra_->C;
++  C2_MacroAssembler _masm(&cbuf);
 +
-+  ins_encode %{
-+    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  // n.b. frame size includes space for return pc and fp
++  const int framesize = C->output()->frame_size_in_bytes();
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++  // insert a nop at the start of the prolog so we can patch in a
++  // branch if we need to invalidate the method later
++  __ nop();
 +
-+// Load Byte (8 bit unsigned) into long
-+instruct loadUB2L(iRegLNoSp dst, memory mem)
-+%{
-+  match(Set dst (ConvI2L (LoadUB mem)));
++  assert_cond(C != NULL);
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lbu  $dst, $mem\t# byte, #@loadUB2L" %}
++  if (C->clinit_barrier_on_entry()) {
++    assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 +
-+  ins_encode %{
-+    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++    Label L_skip_barrier;
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++    __ mov_metadata(t1, C->method()->holder()->constant_encoding());
++    __ clinit_barrier(t1, t0, &L_skip_barrier);
++    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
++    __ bind(L_skip_barrier);
++  }
 +
-+// Load Short (16 bit signed)
-+instruct loadS(iRegINoSp dst, memory mem)
-+%{
-+  match(Set dst (LoadS mem));
++  int bangsize = C->output()->bang_size_in_bytes();
++  if (C->output()->need_stack_bang(bangsize)) {
++    __ generate_stack_overflow_check(bangsize);
++  }
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lh  $dst, $mem\t# short, #@loadS" %}
++  __ build_frame(framesize);
 +
-+  ins_encode %{
-+    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  if (C->stub_function() == NULL) {
++    BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->nmethod_entry_barrier(&_masm);
++  }
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++  if (VerifyStackAtCalls) {
++    Unimplemented();
++  }
 +
-+// Load Short (16 bit signed) into long
-+instruct loadS2L(iRegLNoSp dst, memory mem)
-+%{
-+  match(Set dst (ConvI2L (LoadS mem)));
++  C->output()->set_frame_complete(cbuf.insts_size());
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lh  $dst, $mem\t# short, #@loadS2L" %}
++  if (C->has_mach_constant_base_node()) {
++    // NOTE: We set the table base offset here because users might be
++    // emitted before MachConstantBaseNode.
++    ConstantTable& constant_table = C->output()->constant_table();
++    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
++  }
++}
 +
-+  ins_encode %{
-+    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++uint MachPrologNode::size(PhaseRegAlloc* ra_) const
++{
++  assert_cond(ra_ != NULL);
++  return MachNode::size(ra_); // too many variables; just compute it
++                              // the hard way
++}
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++int MachPrologNode::reloc() const
++{
++  return 0;
++}
 +
-+// Load Char (16 bit unsigned)
-+instruct loadUS(iRegINoSp dst, memory mem)
-+%{
-+  match(Set dst (LoadUS mem));
++//=============================================================================
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lhu  $dst, $mem\t# short, #@loadUS" %}
++#ifndef PRODUCT
++void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++  assert_cond(st != NULL && ra_ != NULL);
++  Compile* C = ra_->C;
++  assert_cond(C != NULL);
++  int framesize = C->output()->frame_size_in_bytes();
 +
-+  ins_encode %{
-+    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  st->print("# pop frame %d\n\t", framesize);
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++  if (framesize == 0) {
++    st->print("ld  ra, [sp,#%d]\n\t", (2 * wordSize));
++    st->print("ld  fp, [sp,#%d]\n\t", (3 * wordSize));
++    st->print("add sp, sp, #%d\n\t", (2 * wordSize));
++  } else {
++    st->print("add  sp, sp, #%d\n\t", framesize);
++    st->print("ld  ra, [sp,#%d]\n\t", - 2 * wordSize);
++    st->print("ld  fp, [sp,#%d]\n\t", - wordSize);
++  }
 +
-+// Load Short/Char (16 bit unsigned) into long
-+instruct loadUS2L(iRegLNoSp dst, memory mem)
-+%{
-+  match(Set dst (ConvI2L (LoadUS mem)));
++  if (do_polling() && C->is_method_compilation()) {
++    st->print("# test polling word\n\t");
++    st->print("ld t0, [xthread,#%d]\n\t", in_bytes(JavaThread::polling_word_offset()));
++    st->print("bgtu sp, t0, #slow_path");
++  }
++}
++#endif
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lhu  $dst, $mem\t# short, #@loadUS2L" %}
++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  assert_cond(ra_ != NULL);
++  Compile* C = ra_->C;
++  C2_MacroAssembler _masm(&cbuf);
++  assert_cond(C != NULL);
++  int framesize = C->output()->frame_size_in_bytes();
 +
-+  ins_encode %{
-+    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  __ remove_frame(framesize);
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
++    __ reserved_stack_check();
++  }
 +
-+// Load Integer (32 bit signed)
-+instruct loadI(iRegINoSp dst, memory mem)
-+%{
-+  match(Set dst (LoadI mem));
++  if (do_polling() && C->is_method_compilation()) {
++    Label dummy_label;
++    Label* code_stub = &dummy_label;
++    if (!C->output()->in_scratch_emit_size()) {
++      code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
++    }
++    __ relocate(relocInfo::poll_return_type);
++    __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
++  }
++}
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lw  $dst, $mem\t# int, #@loadI" %}
++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
++  assert_cond(ra_ != NULL);
++  // Variable size. Determine dynamically.
++  return MachNode::size(ra_);
++}
 +
-+  ins_encode %{
-+    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++int MachEpilogNode::reloc() const {
++  // Return number of relocatable values contained in this instruction.
++  return 1; // 1 for polling page.
++}
++const Pipeline * MachEpilogNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++//=============================================================================
 +
-+// Load Integer (32 bit signed) into long
-+instruct loadI2L(iRegLNoSp dst, memory mem)
-+%{
-+  match(Set dst (ConvI2L (LoadI mem)));
++// Figure out which register class each belongs in: rc_int, rc_float or
++// rc_stack.
++enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack };
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lw  $dst, $mem\t# int, #@loadI2L" %}
++static enum RC rc_class(OptoReg::Name reg) {
 +
-+  ins_encode %{
-+    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  if (reg == OptoReg::Bad) {
++    return rc_bad;
++  }
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++  // we have 30 int registers * 2 halves
++  // (t0 and t1 are omitted)
++  int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2);
++  if (reg < slots_of_int_registers) {
++    return rc_int;
++  }
 +
-+// Load Integer (32 bit unsigned) into long
-+instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
-+%{
-+  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
++  // we have 32 float register * 2 halves
++  int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers;
++  if (reg < slots_of_int_registers + slots_of_float_registers) {
++    return rc_float;
++  }
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lwu  $dst, $mem\t# int, #@loadUI2L" %}
++  // we have 32 vector register * 4 halves
++  int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers;
++  if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) {
++    return rc_vector;
++  }
 +
-+  ins_encode %{
-+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  // Between vector regs & stack is the flags regs.
++  assert(OptoReg::is_stack(reg), "blow up if spilling flags");
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++  return rc_stack;
++}
 +
-+// Load Long (64 bit signed)
-+instruct loadL(iRegLNoSp dst, memory mem)
-+%{
-+  match(Set dst (LoadL mem));
++uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
++  assert_cond(ra_ != NULL);
++  Compile* C = ra_->C;
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "ld  $dst, $mem\t# int, #@loadL" %}
++  // Get registers to move.
++  OptoReg::Name src_hi = ra_->get_reg_second(in(1));
++  OptoReg::Name src_lo = ra_->get_reg_first(in(1));
++  OptoReg::Name dst_hi = ra_->get_reg_second(this);
++  OptoReg::Name dst_lo = ra_->get_reg_first(this);
 +
-+  ins_encode %{
-+    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  enum RC src_hi_rc = rc_class(src_hi);
++  enum RC src_lo_rc = rc_class(src_lo);
++  enum RC dst_hi_rc = rc_class(dst_hi);
++  enum RC dst_lo_rc = rc_class(dst_lo);
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++  assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
 +
-+// Load Range
-+instruct loadRange(iRegINoSp dst, memory mem)
-+%{
-+  match(Set dst (LoadRange mem));
++  if (src_hi != OptoReg::Bad) {
++    assert((src_lo & 1) == 0 && src_lo + 1 == src_hi &&
++           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi,
++           "expected aligned-adjacent pairs");
++  }
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lwu  $dst, $mem\t# range, #@loadRange" %}
++  if (src_lo == dst_lo && src_hi == dst_hi) {
++    return 0;            // Self copy, no move.
++  }
 +
-+  ins_encode %{
-+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
++              (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
++  int src_offset = ra_->reg2offset(src_lo);
++  int dst_offset = ra_->reg2offset(dst_lo);
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++  if (bottom_type()->isa_vect() != NULL) {
++    uint ireg = ideal_reg();
++    if (ireg == Op_VecA && cbuf) {
++      C2_MacroAssembler _masm(cbuf);
++      Assembler::CompressibleRegion cr(&_masm);
++      int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
++      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
++        // stack to stack
++        __ spill_copy_vector_stack_to_stack(src_offset, dst_offset,
++                                            vector_reg_size_in_bytes);
++      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) {
++        // vpr to stack
++        __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo));
++      } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) {
++        // stack to vpr
++        __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo));
++      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) {
++        // vpr to vpr
++        __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo]));
++      } else {
++        ShouldNotReachHere();
++      }
++    }
++  } else if (cbuf != NULL) {
++    C2_MacroAssembler _masm(cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
++    switch (src_lo_rc) {
++      case rc_int:
++        if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
++          if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass
++            __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32);
++          } else {
++            __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]));
++          }
++        } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
++          if (is64) {
++            __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
++                       as_Register(Matcher::_regEncode[src_lo]));
++          } else {
++            __ fmv_w_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
++                       as_Register(Matcher::_regEncode[src_lo]));
++          }
++        } else {                    // gpr --> stack spill
++          assert(dst_lo_rc == rc_stack, "spill to bad register class");
++          __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
++        }
++        break;
++      case rc_float:
++        if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
++          if (is64) {
++            __ fmv_x_d(as_Register(Matcher::_regEncode[dst_lo]),
++                       as_FloatRegister(Matcher::_regEncode[src_lo]));
++          } else {
++            __ fmv_x_w(as_Register(Matcher::_regEncode[dst_lo]),
++                       as_FloatRegister(Matcher::_regEncode[src_lo]));
++          }
++        } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
++          if (is64) {
++            __ fmv_d(as_FloatRegister(Matcher::_regEncode[dst_lo]),
++                     as_FloatRegister(Matcher::_regEncode[src_lo]));
++          } else {
++            __ fmv_s(as_FloatRegister(Matcher::_regEncode[dst_lo]),
++                     as_FloatRegister(Matcher::_regEncode[src_lo]));
++          }
++        } else {                    // fpr --> stack spill
++          assert(dst_lo_rc == rc_stack, "spill to bad register class");
++          __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
++                   is64, dst_offset);
++        }
++        break;
++      case rc_stack:
++        if (dst_lo_rc == rc_int) {  // stack --> gpr load
++          if (this->ideal_reg() == Op_RegI) {
++            __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
++          } else { // // zero extended for narrow oop or klass
++            __ unspillu(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
++          }
++        } else if (dst_lo_rc == rc_float) { // stack --> fpr load
++          __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
++                     is64, src_offset);
++        } else {                    // stack --> stack copy
++          assert(dst_lo_rc == rc_stack, "spill to bad register class");
++          if (this->ideal_reg() == Op_RegI) {
++            __ unspill(t0, is64, src_offset);
++          } else { // zero extended for narrow oop or klass
++            __ unspillu(t0, is64, src_offset);
++          }
++          __ spill(t0, is64, dst_offset);
++        }
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  }
 +
-+// Load Pointer
-+instruct loadP(iRegPNoSp dst, memory mem)
-+%{
-+  match(Set dst (LoadP mem));
++  if (st != NULL) {
++    st->print("spill ");
++    if (src_lo_rc == rc_stack) {
++      st->print("[sp, #%d] -> ", src_offset);
++    } else {
++      st->print("%s -> ", Matcher::regName[src_lo]);
++    }
++    if (dst_lo_rc == rc_stack) {
++      st->print("[sp, #%d]", dst_offset);
++    } else {
++      st->print("%s", Matcher::regName[dst_lo]);
++    }
++    if (bottom_type()->isa_vect() != NULL) {
++      int vsize = 0;
++      if (ideal_reg() == Op_VecA) {
++        vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
++      } else {
++        ShouldNotReachHere();
++      }
++      st->print("\t# vector spill size = %d", vsize);
++    } else {
++      st->print("\t# spill size = %d", is64 ? 64 : 32);
++    }
++  }
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "ld  $dst, $mem\t# ptr, #@loadP" %}
++  return 0;
++}
 +
-+  ins_encode %{
-+    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++#ifndef PRODUCT
++void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++  if (ra_ == NULL) {
++    st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
++  } else {
++    implementation(NULL, ra_, false, st);
++  }
++}
++#endif
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  implementation(&cbuf, ra_, false, NULL);
++}
 +
-+// Load Compressed Pointer
-+instruct loadN(iRegNNoSp dst, memory mem)
-+%{
-+  match(Set dst (LoadN mem));
++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lwu  $dst, $mem\t# loadN, compressed ptr, #@loadN" %}
++//=============================================================================
 +
-+  ins_encode %{
-+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++#ifndef PRODUCT
++void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++  assert_cond(ra_ != NULL && st != NULL);
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_reg_first(this);
++  st->print("add %s, sp, #%d\t# box lock",
++            Matcher::regName[reg], offset);
++}
++#endif
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  C2_MacroAssembler _masm(&cbuf);
 +
-+// Load Klass Pointer
-+instruct loadKlass(iRegPNoSp dst, memory mem)
-+%{
-+  match(Set dst (LoadKlass mem));
++  assert_cond(ra_ != NULL);
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg    = ra_->get_encode(this);
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "ld  $dst, $mem\t# class, #@loadKlass" %}
++  if (is_imm_in_range(offset, 12, 0)) {
++    __ addi(as_Register(reg), sp, offset);
++  } else if (is_imm_in_range(offset, 32, 0)) {
++    __ li32(t0, offset);
++    __ add(as_Register(reg), sp, t0);
++  } else {
++    ShouldNotReachHere();
++  }
++}
 +
-+  ins_encode %{
-+    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
++  // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++  if (is_imm_in_range(offset, 12, 0)) {
++    return NativeInstruction::instruction_size;
++  } else {
++    return 3 * NativeInstruction::instruction_size; // lui + addiw + add;
++  }
++}
 +
-+// Load Narrow Klass Pointer
-+instruct loadNKlass(iRegNNoSp dst, memory mem)
-+%{
-+  match(Set dst (LoadNKlass mem));
++//=============================================================================
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lwu  $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %}
++#ifndef PRODUCT
++void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
++{
++  assert_cond(st != NULL);
++  st->print_cr("# MachUEPNode");
++  if (UseCompressedClassPointers) {
++    st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
++    if (CompressedKlassPointers::shift() != 0) {
++      st->print_cr("\tdecode_klass_not_null t0, t0");
++    }
++  } else {
++    st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
++  }
++  st->print_cr("\tbeq t0, t1, ic_hit");
++  st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check");
++  st->print_cr("\tic_hit:");
++}
++#endif
 +
-+  ins_encode %{
-+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
++{
++  // This is the unverified entry point.
++  C2_MacroAssembler _masm(&cbuf);
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++  Label skip;
++  __ cmp_klass(j_rarg0, t1, t0, skip);
++  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
++  __ bind(skip);
++}
 +
-+// Load Float
-+instruct loadF(fRegF dst, memory mem)
-+%{
-+  match(Set dst (LoadF mem));
++uint MachUEPNode::size(PhaseRegAlloc* ra_) const
++{
++  assert_cond(ra_ != NULL);
++  return MachNode::size(ra_);
++}
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "flw  $dst, $mem\t# float, #@loadF" %}
++// REQUIRED EMIT CODE
 +
-+  ins_encode %{
-+    __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++//=============================================================================
 +
-+  ins_pipe(fp_load_mem_s);
-+%}
++// Emit exception handler code.
++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
++{
++  // la_patchable t0, #exception_blob_entry_point
++  // jr (offset)t0
++  // or
++  // j #exception_blob_entry_point
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  C2_MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_exception_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++  int offset = __ offset();
++  __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
++  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
 +
-+// Load Double
-+instruct loadD(fRegD dst, memory mem)
-+%{
-+  match(Set dst (LoadD mem));
++// Emit deopt handler code.
++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
++{
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  C2_MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_deopt_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++  int offset = __ offset();
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "fld  $dst, $mem\t# double, #@loadD" %}
++  __ auipc(ra, 0);
++  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 +
-+  ins_encode %{
-+    __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
 +
-+  ins_pipe(fp_load_mem_d);
-+%}
++}
++// REQUIRED MATCHER CODE
 +
-+// Load Int Constant
-+instruct loadConI(iRegINoSp dst, immI src)
-+%{
-+  match(Set dst src);
++//=============================================================================
 +
-+  ins_cost(ALU_COST);
-+  format %{ "li $dst, $src\t# int, #@loadConI" %}
++const bool Matcher::match_rule_supported(int opcode) {
++  if (!has_match_rule(opcode)) {
++    return false;
++  }
 +
-+  ins_encode(riscv_enc_li_imm(dst, src));
++  switch (opcode) {
++    case Op_CacheWB:           // fall through
++    case Op_CacheWBPreSync:    // fall through
++    case Op_CacheWBPostSync:
++      if (!VM_Version::supports_data_cache_line_flush()) {
++        return false;
++      }
++      break;
 +
-+  ins_pipe(ialu_imm);
-+%}
++    case Op_StrCompressedCopy: // fall through
++    case Op_StrInflatedCopy:   // fall through
++    case Op_CountPositives:
++      return UseRVV;
 +
-+// Load Long Constant
-+instruct loadConL(iRegLNoSp dst, immL src)
-+%{
-+  match(Set dst src);
++    case Op_EncodeISOArray:
++      return UseRVV && SpecialEncodeISOArray;
 +
-+  ins_cost(ALU_COST);
-+  format %{ "li $dst, $src\t# long, #@loadConL" %}
++    case Op_PopCountI:
++    case Op_PopCountL:
++      return UsePopCountInstruction;
 +
-+  ins_encode(riscv_enc_li_imm(dst, src));
++    case Op_RotateRight:
++    case Op_RotateLeft:
++    case Op_CountLeadingZerosI:
++    case Op_CountLeadingZerosL:
++    case Op_CountTrailingZerosI:
++    case Op_CountTrailingZerosL:
++      return UseRVB;
++  }
 +
-+  ins_pipe(ialu_imm);
-+%}
++  return true; // Per default match rules are supported.
++}
 +
-+// Load Pointer Constant
-+instruct loadConP(iRegPNoSp dst, immP con)
-+%{
-+  match(Set dst con);
++// Identify extra cases that we might want to provide match rules for vector nodes and
++// other intrinsics guarded with vector length (vlen) and element type (bt).
++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
++  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
++    return false;
++  }
 +
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $con\t# ptr, #@loadConP" %}
++  return op_vec_supported(opcode);
++}
 +
-+  ins_encode(riscv_enc_mov_p(dst, con));
++const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
++  return false;
++}
 +
-+  ins_pipe(ialu_imm);
-+%}
++const RegMask* Matcher::predicate_reg_mask(void) {
++  return NULL;
++}
 +
-+// Load Null Pointer Constant
-+instruct loadConP0(iRegPNoSp dst, immP0 con)
-+%{
-+  match(Set dst con);
++const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
++  return NULL;
++}
 +
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $con\t# NULL ptr, #@loadConP0" %}
++// Vector calling convention not yet implemented.
++const bool Matcher::supports_vector_calling_convention(void) {
++  return false;
++}
 +
-+  ins_encode(riscv_enc_mov_zero(dst));
++OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
++  Unimplemented();
++  return OptoRegPair(0, 0);
++}
 +
-+  ins_pipe(ialu_imm);
-+%}
++// Is this branch offset short enough that a short branch can be used?
++//
++// NOTE: If the platform does not provide any short branch variants, then
++//       this method should return false for offset 0.
++// |---label(L1)-----|
++// |-----------------|
++// |-----------------|----------eq: float-------------------
++// |-----------------| // far_cmpD_branch   |   cmpD_branch
++// |------- ---------|    feq;              |      feq;
++// |-far_cmpD_branch-|    beqz done;        |      bnez L;
++// |-----------------|    j L;              |
++// |-----------------|    bind(done);       |
++// |-----------------|--------------------------------------
++// |-----------------| // so shortBrSize = br_size - 4;
++// |-----------------| // so offs = offset - shortBrSize + 4;
++// |---label(L2)-----|
++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
++  // The passed offset is relative to address of the branch.
++  int shortBrSize = br_size - 4;
++  int offs = offset - shortBrSize + 4;
++  return (-4096 <= offs && offs < 4096);
++}
 +
-+// Load Pointer Constant One
-+instruct loadConP1(iRegPNoSp dst, immP_1 con)
-+%{
-+  match(Set dst con);
++// Vector width in bytes.
++const int Matcher::vector_width_in_bytes(BasicType bt) {
++  if (UseRVV) {
++    // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV.
++    // MaxVectorSize == VM_Version::_initial_vector_length
++    return MaxVectorSize;
++  }
++  return 0;
++}
 +
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $con\t# load ptr constant one, #@loadConP1" %}
++// Limits on vector size (number of elements) loaded into vector.
++const int Matcher::max_vector_size(const BasicType bt) {
++  return vector_width_in_bytes(bt) / type2aelembytes(bt);
++}
++const int Matcher::min_vector_size(const BasicType bt) {
++  return max_vector_size(bt);
++}
 +
-+  ins_encode(riscv_enc_mov_p1(dst));
++// Vector ideal reg.
++const uint Matcher::vector_ideal_reg(int len) {
++  assert(MaxVectorSize >= len, "");
++  if (UseRVV) {
++    return Op_VecA;
++  }
 +
-+  ins_pipe(ialu_imm);
-+%}
++  ShouldNotReachHere();
++  return 0;
++}
 +
-+// Load Poll Page Constant
-+instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
-+%{
-+  match(Set dst con);
++const int Matcher::scalable_vector_reg_size(const BasicType bt) {
++  return Matcher::max_vector_size(bt);
++}
 +
-+  ins_cost(ALU_COST * 6);
-+  format %{ "movptr  $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %}
++MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
++  ShouldNotReachHere(); // generic vector operands not supported
++  return NULL;
++}
 +
-+  ins_encode(riscv_enc_mov_poll_page(dst, con));
++bool Matcher::is_reg2reg_move(MachNode* m) {
++  ShouldNotReachHere(); // generic vector operands not supported
++  return false;
++}
 +
-+  ins_pipe(ialu_imm);
-+%}
++bool Matcher::is_generic_vector(MachOper* opnd) {
++  ShouldNotReachHere(); // generic vector operands not supported
++  return false;
++}
 +
-+// Load Byte Map Base Constant
-+instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
-+%{
-+  match(Set dst con);
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $con\t# Byte Map Base, #@loadByteMapBase" %}
++// Return whether or not this register is ever used as an argument.
++// This function is used on startup to build the trampoline stubs in
++// generateOptoStub.  Registers not mentioned will be killed by the VM
++// call in the trampoline, and arguments in those registers not be
++// available to the callee.
++bool Matcher::can_be_java_arg(int reg)
++{
++  return
++    reg ==  R10_num || reg == R10_H_num ||
++    reg ==  R11_num || reg == R11_H_num ||
++    reg ==  R12_num || reg == R12_H_num ||
++    reg ==  R13_num || reg == R13_H_num ||
++    reg ==  R14_num || reg == R14_H_num ||
++    reg ==  R15_num || reg == R15_H_num ||
++    reg ==  R16_num || reg == R16_H_num ||
++    reg ==  R17_num || reg == R17_H_num ||
++    reg ==  F10_num || reg == F10_H_num ||
++    reg ==  F11_num || reg == F11_H_num ||
++    reg ==  F12_num || reg == F12_H_num ||
++    reg ==  F13_num || reg == F13_H_num ||
++    reg ==  F14_num || reg == F14_H_num ||
++    reg ==  F15_num || reg == F15_H_num ||
++    reg ==  F16_num || reg == F16_H_num ||
++    reg ==  F17_num || reg == F17_H_num;
++}
 +
-+  ins_encode(riscv_enc_mov_byte_map_base(dst));
++bool Matcher::is_spillable_arg(int reg)
++{
++  return can_be_java_arg(reg);
++}
 +
-+  ins_pipe(ialu_imm);
-+%}
++uint Matcher::int_pressure_limit()
++{
++  // A derived pointer is live at CallNode and then is flagged by RA
++  // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip
++  // derived pointers and lastly fail to spill after reaching maximum
++  // number of iterations. Lowering the default pressure threshold to
++  // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become
++  // a high register pressure area of the code so that split_DEF can
++  // generate DefinitionSpillCopy for the derived pointer.
++  uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1;
++  if (!PreserveFramePointer) {
++    // When PreserveFramePointer is off, frame pointer is allocatable,
++    // but different from other SOC registers, it is excluded from
++    // fatproj's mask because its save type is No-Save. Decrease 1 to
++    // ensure high pressure at fatproj when PreserveFramePointer is off.
++    // See check_pressure_at_fatproj().
++    default_int_pressure_threshold--;
++  }
++  return (INTPRESSURE == -1) ? default_int_pressure_threshold : INTPRESSURE;
++}
++
++uint Matcher::float_pressure_limit()
++{
++  // _FLOAT_REG_mask is generated by adlc from the float_reg register class.
++  return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE;
++}
 +
-+// Load Narrow Pointer Constant
-+instruct loadConN(iRegNNoSp dst, immN con)
-+%{
-+  match(Set dst con);
++bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
++  return false;
++}
 +
-+  ins_cost(ALU_COST * 4);
-+  format %{ "mv  $dst, $con\t# compressed ptr, #@loadConN" %}
++RegMask Matcher::divI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
 +
-+  ins_encode(riscv_enc_mov_n(dst, con));
++// Register for MODI projection of divmodI.
++RegMask Matcher::modI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
 +
-+  ins_pipe(ialu_imm);
-+%}
++// Register for DIVL projection of divmodL.
++RegMask Matcher::divL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
 +
-+// Load Narrow Null Pointer Constant
-+instruct loadConN0(iRegNNoSp dst, immN0 con)
-+%{
-+  match(Set dst con);
++// Register for MODL projection of divmodL.
++RegMask Matcher::modL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
 +
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $con\t# compressed NULL ptr, #@loadConN0" %}
++const RegMask Matcher::method_handle_invoke_SP_save_mask() {
++  return FP_REG_mask();
++}
 +
-+  ins_encode(riscv_enc_mov_zero(dst));
++bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
++  assert_cond(addp != NULL);
++  for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
++    Node* u = addp->fast_out(i);
++    if (u != NULL && u->is_Mem()) {
++      int opsize = u->as_Mem()->memory_size();
++      assert(opsize > 0, "unexpected memory operand size");
++      if (u->as_Mem()->memory_size() != (1 << shift)) {
++        return false;
++      }
++    }
++  }
++  return true;
++}
 +
-+  ins_pipe(ialu_imm);
-+%}
++// Should the Matcher clone input 'm' of node 'n'?
++bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
++  assert_cond(m != NULL);
++  if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
++    mstack.push(m, Visit);           // m = ShiftCntV
++    return true;
++  }
++  return false;
++}
 +
-+// Load Narrow Klass Constant
-+instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
-+%{
-+  match(Set dst con);
++// Should the Matcher clone shifts on addressing modes, expecting them
++// to be subsumed into complex addressing expressions or compute them
++// into registers?
++bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
++  return clone_base_plus_offset_address(m, mstack, address_visited);
++}
 +
-+  ins_cost(ALU_COST * 6);
-+  format %{ "mv  $dst, $con\t# compressed klass ptr, #@loadConNKlass" %}
++%}
 +
-+  ins_encode(riscv_enc_mov_nk(dst, con));
 +
-+  ins_pipe(ialu_imm);
-+%}
 +
-+// Load Float Constant
-+instruct loadConF(fRegF dst, immF con) %{
-+  match(Set dst con);
++//----------ENCODING BLOCK-----------------------------------------------------
++// This block specifies the encoding classes used by the compiler to
++// output byte streams.  Encoding classes are parameterized macros
++// used by Machine Instruction Nodes in order to generate the bit
++// encoding of the instruction.  Operands specify their base encoding
++// interface with the interface keyword.  There are currently
++// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
++// COND_INTER.  REG_INTER causes an operand to generate a function
++// which returns its register number when queried.  CONST_INTER causes
++// an operand to generate a function which returns the value of the
++// constant when queried.  MEMORY_INTER causes an operand to generate
++// four functions which return the Base Register, the Index Register,
++// the Scale Value, and the Offset Value of the operand when queried.
++// COND_INTER causes an operand to generate six functions which return
++// the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional
++// instruction.
++//
++// Instructions specify two basic values for encoding.  Again, a
++// function is available to check if the constant displacement is an
++// oop. They use the ins_encode keyword to specify their encoding
++// classes (which must be a sequence of enc_class names, and their
++// parameters, specified in the encoding block), and they use the
++// opcode keyword to specify, in order, their primary, secondary, and
++// tertiary opcode.  Only the opcode sections which a particular
++// instruction needs for encoding need to be specified.
++encode %{
++  // BEGIN Non-volatile memory access
 +
-+  ins_cost(LOAD_COST);
-+  format %{
-+    "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF"
++  enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
++    int64_t con = (int64_t)$src$$constant;
++    Register dst_reg = as_Register($dst$$reg);
++    __ li(dst_reg, con);
 +  %}
 +
-+  ins_encode %{
-+    __ flw(as_FloatRegister($dst$$reg), $constantaddress($con));
++  enc_class riscv_enc_mov_p(iRegP dst, immP src) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    address con = (address)$src$$constant;
++    if (con == NULL || con == (address)1) {
++      ShouldNotReachHere();
++    } else {
++      relocInfo::relocType rtype = $src->constant_reloc();
++      if (rtype == relocInfo::oop_type) {
++        __ movoop(dst_reg, (jobject)con, /*immediate*/true);
++      } else if (rtype == relocInfo::metadata_type) {
++        __ mov_metadata(dst_reg, (Metadata*)con);
++      } else {
++        assert(rtype == relocInfo::none, "unexpected reloc type");
++        __ li(dst_reg, $src$$constant);
++      }
++    }
 +  %}
 +
-+  ins_pipe(fp_load_constant_s);
-+%}
++  enc_class riscv_enc_mov_p1(iRegP dst) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
++    Register dst_reg = as_Register($dst$$reg);
++    __ li(dst_reg, 1);
++  %}
 +
-+instruct loadConF0(fRegF dst, immF0 con) %{
-+  match(Set dst con);
++  enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
++    C2_MacroAssembler _masm(&cbuf);
++    __ load_byte_map_base($dst$$Register);
++  %}
 +
-+  ins_cost(XFER_COST);
++  enc_class riscv_enc_mov_n(iRegN dst, immN src) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    address con = (address)$src$$constant;
++    if (con == NULL) {
++      ShouldNotReachHere();
++    } else {
++      relocInfo::relocType rtype = $src->constant_reloc();
++      assert(rtype == relocInfo::oop_type, "unexpected reloc type");
++      __ set_narrow_oop(dst_reg, (jobject)con);
++    }
++  %}
 +
-+  format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %}
++  enc_class riscv_enc_mov_zero(iRegNorP dst) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    __ mv(dst_reg, zr);
++  %}
 +
-+  ins_encode %{
-+    __ fmv_w_x(as_FloatRegister($dst$$reg), zr);
++  enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    address con = (address)$src$$constant;
++    if (con == NULL) {
++      ShouldNotReachHere();
++    } else {
++      relocInfo::relocType rtype = $src->constant_reloc();
++      assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
++      __ set_narrow_klass(dst_reg, (Klass *)con);
++    }
 +  %}
 +
-+  ins_pipe(fp_load_constant_s);
-+%}
++  enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
++    C2_MacroAssembler _masm(&cbuf);
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++               /*result as bool*/ true);
++  %}
 +
-+// Load Double Constant
-+instruct loadConD(fRegD dst, immD con) %{
-+  match(Set dst con);
++  enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
++    C2_MacroAssembler _masm(&cbuf);
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++               /*result as bool*/ true);
++  %}
 +
-+  ins_cost(LOAD_COST);
-+  format %{
-+    "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD"
++  enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
++    C2_MacroAssembler _masm(&cbuf);
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++               /*result as bool*/ true);
 +  %}
 +
-+  ins_encode %{
-+    __ fld(as_FloatRegister($dst$$reg), $constantaddress($con));
++  enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
++    C2_MacroAssembler _masm(&cbuf);
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++               /*result as bool*/ true);
 +  %}
 +
-+  ins_pipe(fp_load_constant_d);
-+%}
++  enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
++    C2_MacroAssembler _masm(&cbuf);
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++               /*result as bool*/ true);
++  %}
 +
-+instruct loadConD0(fRegD dst, immD0 con) %{
-+  match(Set dst con);
++  enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
++    C2_MacroAssembler _masm(&cbuf);
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++               /*result as bool*/ true);
++  %}
 +
-+  ins_cost(XFER_COST);
++  // compare and branch instruction encodings
 +
-+  format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %}
++  enc_class riscv_enc_j(label lbl) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Label* L = $lbl$$label;
++    __ j(*L);
++  %}
 +
-+  ins_encode %{
-+    __ fmv_d_x(as_FloatRegister($dst$$reg), zr);
++  enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Label* L = $lbl$$label;
++    switch ($cmp$$cmpcode) {
++      case(BoolTest::ge):
++        __ j(*L);
++        break;
++      case(BoolTest::lt):
++        break;
++      default:
++        Unimplemented();
++    }
 +  %}
 +
-+  ins_pipe(fp_load_constant_d);
-+%}
++  // call instruction encodings
 +
-+// Store Instructions
-+// Store CMS card-mark Immediate
-+instruct storeimmCM0(immI0 zero, memory mem)
-+%{
-+  match(Set mem (StoreCM mem zero));
-+  predicate(unnecessary_storestore(n));
++  enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{
++    Register sub_reg = as_Register($sub$$reg);
++    Register super_reg = as_Register($super$$reg);
++    Register temp_reg = as_Register($temp$$reg);
++    Register result_reg = as_Register($result$$reg);
++    Register cr_reg = t1;
 +
-+  ins_cost(STORE_COST);
-+  format %{ "storestore (elided)\n\t"
-+            "sb zr, $mem\t# byte, #@storeimmCM0" %}
++    Label miss;
++    Label done;
++    C2_MacroAssembler _masm(&cbuf);
++    __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
++                                     NULL, &miss);
++    if ($primary) {
++      __ mv(result_reg, zr);
++    } else {
++      __ mv(cr_reg, zr);
++      __ j(done);
++    }
 +
-+  ins_encode %{
-+    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++    __ bind(miss);
++    if (!$primary) {
++      __ li(cr_reg, 1);
++    }
 +
-+  ins_pipe(istore_mem);
-+%}
++    __ bind(done);
++  %}
 +
-+// Store CMS card-mark Immediate with intervening StoreStore
-+// needed when using CMS with no conditional card marking
-+instruct storeimmCM0_ordered(immI0 zero, memory mem)
-+%{
-+  match(Set mem (StoreCM mem zero));
++  enc_class riscv_enc_java_static_call(method meth) %{
++    C2_MacroAssembler _masm(&cbuf);
 +
-+  ins_cost(ALU_COST + STORE_COST);
-+  format %{ "membar(StoreStore)\n\t"
-+            "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %}
++    address addr = (address)$meth$$method;
++    address call = NULL;
++    assert_cond(addr != NULL);
++    if (!_method) {
++      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
++      call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
++      if (call == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    } else {
++      int method_index = resolved_method_index(cbuf);
++      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
++                                                  : static_call_Relocation::spec(method_index);
++      call = __ trampoline_call(Address(addr, rspec), &cbuf);
++      if (call == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
 +
-+  ins_encode %{
-+    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
++      // Emit stub for static call
++      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
++      if (stub == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    }
 +  %}
 +
-+  ins_pipe(istore_mem);
-+%}
-+
-+// Store Byte
-+instruct storeB(iRegIorL2I src, memory mem)
-+%{
-+  match(Set mem (StoreB mem src));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sb  $src, $mem\t# byte, #@storeB" %}
++  enc_class riscv_enc_java_dynamic_call(method meth) %{
++    C2_MacroAssembler _masm(&cbuf);
++    int method_index = resolved_method_index(cbuf);
++    address call = __ ic_call((address)$meth$$method, method_index);
++    if (call == NULL) {
++      ciEnv::current()->record_failure("CodeCache is full");
++      return;
++    }
++  %}
 +
-+  ins_encode %{
-+    __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  enc_class riscv_enc_call_epilog() %{
++    C2_MacroAssembler _masm(&cbuf);
++    if (VerifyStackAtCalls) {
++      // Check that stack depth is unchanged: find majik cookie on stack
++      __ call_Unimplemented();
++    }
 +  %}
 +
-+  ins_pipe(istore_reg_mem);
-+%}
++  enc_class riscv_enc_java_to_runtime(method meth) %{
++    C2_MacroAssembler _masm(&cbuf);
 +
-+instruct storeimmB0(immI0 zero, memory mem)
-+%{
-+  match(Set mem (StoreB mem zero));
++    // some calls to generated routines (arraycopy code) are scheduled
++    // by C2 as runtime calls. if so we can call them using a jr (they
++    // will be in a reachable segment) otherwise we have to use a jalr
++    // which loads the absolute address into a register.
++    address entry = (address)$meth$$method;
++    CodeBlob *cb = CodeCache::find_blob(entry);
++    if (cb != NULL) {
++      address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
++      if (call == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    } else {
++      Label retaddr;
++      __ la(t1, retaddr);
++      __ la(t0, RuntimeAddress(entry));
++      // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
++      __ addi(sp, sp, -2 * wordSize);
++      __ sd(t1, Address(sp, wordSize));
++      __ jalr(t0);
++      __ bind(retaddr);
++      __ addi(sp, sp, 2 * wordSize);
++    }
++  %}
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sb zr, $mem\t# byte, #@storeimmB0" %}
++  // using the cr register as the bool result: 0 for success; others failed.
++  enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Register flag = t1;
++    Register oop = as_Register($object$$reg);
++    Register box = as_Register($box$$reg);
++    Register disp_hdr = as_Register($tmp1$$reg);
++    Register tmp = as_Register($tmp2$$reg);
++    Label cont;
++    Label object_has_monitor;
 +
-+  ins_encode %{
-+    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++    assert_different_registers(oop, box, tmp, disp_hdr, t0);
 +
-+  ins_pipe(istore_mem);
-+%}
++    // Load markWord from object into displaced_header.
++    __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
 +
-+// Store Char/Short
-+instruct storeC(iRegIorL2I src, memory mem)
-+%{
-+  match(Set mem (StoreC mem src));
++    if (DiagnoseSyncOnValueBasedClasses != 0) {
++      __ load_klass(flag, oop);
++      __ lwu(flag, Address(flag, Klass::access_flags_offset()));
++      __ andi(flag, flag, JVM_ACC_IS_VALUE_BASED_CLASS, tmp /* tmp */);
++      __ bnez(flag, cont, true /* is_far */);
++    }
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sh  $src, $mem\t# short, #@storeC" %}
++    // Check for existing monitor
++    __ andi(t0, disp_hdr, markWord::monitor_value);
++    __ bnez(t0, object_has_monitor);
 +
-+  ins_encode %{
-+    __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++    if (!UseHeavyMonitors) {
++      // Set tmp to be (markWord of object | UNLOCK_VALUE).
++      __ ori(tmp, disp_hdr, markWord::unlocked_value);
 +
-+  ins_pipe(istore_reg_mem);
-+%}
++      // Initialize the box. (Must happen before we update the object mark!)
++      __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 +
-+instruct storeimmC0(immI0 zero, memory mem)
-+%{
-+  match(Set mem (StoreC mem zero));
++      // Compare object markWord with an unlocked value (tmp) and if
++      // equal exchange the stack address of our box with object markWord.
++      // On failure disp_hdr contains the possibly locked markWord.
++      __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
++                 Assembler::rl, /*result*/disp_hdr);
++      __ mv(flag, zr);
++      __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
++
++      assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
++
++      // If the compare-and-exchange succeeded, then we found an unlocked
++      // object, will have now locked it will continue at label cont
++      // We did not see an unlocked object so try the fast recursive case.
++
++      // Check if the owner is self by comparing the value in the
++      // markWord of object (disp_hdr) with the stack pointer.
++      __ sub(disp_hdr, disp_hdr, sp);
++      __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place));
++      // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
++      // hence we can store 0 as the displaced header in the box, which indicates that it is a
++      // recursive lock.
++      __ andr(tmp/*==0?*/, disp_hdr, tmp);
++      __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++      __ mv(flag, tmp); // we can use the value of tmp as the result here
++    } else {
++      __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path
++    }
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sh  zr, $mem\t# short, #@storeimmC0" %}
++    __ j(cont);
 +
-+  ins_encode %{
-+    __ sh(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++    // Handle existing monitor.
++    __ bind(object_has_monitor);
++    // The object's monitor m is unlocked iff m->owner == NULL,
++    // otherwise m->owner may contain a thread or a stack address.
++    //
++    // Try to CAS m->owner from NULL to current thread.
++    __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value));
++    __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
++             Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
++
++    // Store a non-null value into the box to avoid looking like a re-entrant
++    // lock. The fast-path monitor unlock code checks for
++    // markWord::monitor_value so use markWord::unused_mark which has the
++    // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
++    __ mv(tmp, (address)markWord::unused_mark().value());
++    __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 +
-+  ins_pipe(istore_mem);
-+%}
++    __ beqz(flag, cont); // CAS success means locking succeeded
 +
-+// Store Integer
-+instruct storeI(iRegIorL2I src, memory mem)
-+%{
-+  match(Set mem(StoreI mem src));
++    __ bne(flag, xthread, cont); // Check for recursive locking
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sw  $src, $mem\t# int, #@storeI" %}
++    // Recursive lock case
++    __ mv(flag, zr);
++    __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value));
++    __ add(tmp, tmp, 1u);
++    __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value));
 +
-+  ins_encode %{
-+    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ bind(cont);
 +  %}
 +
-+  ins_pipe(istore_reg_mem);
-+%}
-+
-+instruct storeimmI0(immI0 zero, memory mem)
-+%{
-+  match(Set mem(StoreI mem zero));
++  // using cr flag to indicate the fast_unlock result: 0 for success; others failed.
++  enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Register flag = t1;
++    Register oop = as_Register($object$$reg);
++    Register box = as_Register($box$$reg);
++    Register disp_hdr = as_Register($tmp1$$reg);
++    Register tmp = as_Register($tmp2$$reg);
++    Label cont;
++    Label object_has_monitor;
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sw  zr, $mem\t# int, #@storeimmI0" %}
++    assert_different_registers(oop, box, tmp, disp_hdr, flag);
 +
-+  ins_encode %{
-+    __ sw(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++    if (!UseHeavyMonitors) {
++      // Find the lock address and load the displaced header from the stack.
++      __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 +
-+  ins_pipe(istore_mem);
-+%}
++      // If the displaced header is 0, we have a recursive unlock.
++      __ mv(flag, disp_hdr);
++      __ beqz(disp_hdr, cont);
++    }
 +
-+// Store Long (64 bit signed)
-+instruct storeL(iRegL src, memory mem)
-+%{
-+  match(Set mem (StoreL mem src));
++    // Handle existing monitor.
++    __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
++    __ andi(t0, disp_hdr, markWord::monitor_value);
++    __ bnez(t0, object_has_monitor);
++
++    if (!UseHeavyMonitors) {
++      // Check if it is still a light weight lock, this is true if we
++      // see the stack address of the basicLock in the markWord of the
++      // object.
++
++      __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
++                 Assembler::rl, /*result*/tmp);
++      __ xorr(flag, box, tmp); // box == tmp if cas succeeds
++    } else {
++      __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path
++    }
++    __ j(cont);
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sd  $src, $mem\t# long, #@storeL" %}
++    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 +
-+  ins_encode %{
-+    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++    // Handle existing monitor.
++    __ bind(object_has_monitor);
++    STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
++    __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor
++    __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
 +
-+  ins_pipe(istore_reg_mem);
-+%}
++    Label notRecursive;
++    __ beqz(disp_hdr, notRecursive); // Will be 0 if not recursive.
 +
-+// Store Long (64 bit signed)
-+instruct storeimmL0(immL0 zero, memory mem)
-+%{
-+  match(Set mem (StoreL mem zero));
++    // Recursive lock
++    __ addi(disp_hdr, disp_hdr, -1);
++    __ sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
++    __ mv(flag, zr);
++    __ j(cont);
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sd  zr, $mem\t# long, #@storeimmL0" %}
++    __ bind(notRecursive);
++    __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
++    __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
++    __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
++    __ bnez(flag, cont);
++    // need a release store here
++    __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
++    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++    __ sd(zr, Address(tmp)); // set unowned
 +
-+  ins_encode %{
-+    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
++    __ bind(cont);
 +  %}
 +
-+  ins_pipe(istore_mem);
-+%}
-+
-+// Store Pointer
-+instruct storeP(iRegP src, memory mem)
-+%{
-+  match(Set mem (StoreP mem src));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sd  $src, $mem\t# ptr, #@storeP" %}
++  // arithmetic encodings
 +
-+  ins_encode %{
-+    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++    __ corrected_idivl(dst_reg, src1_reg, src2_reg, false);
 +  %}
 +
-+  ins_pipe(istore_reg_mem);
-+%}
-+
-+// Store Pointer
-+instruct storeimmP0(immP0 zero, memory mem)
-+%{
-+  match(Set mem (StoreP mem zero));
++  enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++    __ corrected_idivq(dst_reg, src1_reg, src2_reg, false);
++  %}
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %}
++  enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++    __ corrected_idivl(dst_reg, src1_reg, src2_reg, true);
++  %}
 +
-+  ins_encode %{
-+    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
++  enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++    __ corrected_idivq(dst_reg, src1_reg, src2_reg, true);
 +  %}
 +
-+  ins_pipe(istore_mem);
-+%}
++  enc_class riscv_enc_tail_call(iRegP jump_target) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
++    Register target_reg = as_Register($jump_target$$reg);
++    __ jr(target_reg);
++  %}
 +
-+// Store Compressed Pointer
-+instruct storeN(iRegN src, memory mem)
-+%{
-+  match(Set mem (StoreN mem src));
++  enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
++    C2_MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
++    Register target_reg = as_Register($jump_target$$reg);
++    // exception oop should be in x10
++    // ret addr has been popped into ra
++    // callee expects it in x13
++    __ mv(x13, ra);
++    __ jr(target_reg);
++  %}
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sw  $src, $mem\t# compressed ptr, #@storeN" %}
++  enc_class riscv_enc_rethrow() %{
++    C2_MacroAssembler _masm(&cbuf);
++    __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
++  %}
 +
-+  ins_encode %{
-+    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  enc_class riscv_enc_ret() %{
++    C2_MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ ret();
 +  %}
 +
-+  ins_pipe(istore_reg_mem);
 +%}
 +
-+instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
-+%{
-+  match(Set mem (StoreN mem zero));
-+  predicate(Universe::narrow_oop_base() == NULL &&
-+            Universe::narrow_klass_base() == NULL);
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sw  rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %}
++//----------FRAME--------------------------------------------------------------
++// Definition of frame structure and management information.
++//
++//  S T A C K   L A Y O U T    Allocators stack-slot number
++//                             |   (to get allocators register number
++//  G  Owned by    |        |  v    add OptoReg::stack0())
++//  r   CALLER     |        |
++//  o     |        +--------+      pad to even-align allocators stack-slot
++//  w     V        |  pad0  |        numbers; owned by CALLER
++//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
++//  h     ^        |   in   |  5
++//        |        |  args  |  4   Holes in incoming args owned by SELF
++//  |     |        |        |  3
++//  |     |        +--------+
++//  V     |        | old out|      Empty on Intel, window on Sparc
++//        |    old |preserve|      Must be even aligned.
++//        |     SP-+--------+----> Matcher::_old_SP, even aligned
++//        |        |   in   |  3   area for Intel ret address
++//     Owned by    |preserve|      Empty on Sparc.
++//       SELF      +--------+
++//        |        |  pad2  |  2   pad to align old SP
++//        |        +--------+  1
++//        |        | locks  |  0
++//        |        +--------+----> OptoReg::stack0(), even aligned
++//        |        |  pad1  | 11   pad to align new SP
++//        |        +--------+
++//        |        |        | 10
++//        |        | spills |  9   spills
++//        V        |        |  8   (pad0 slot for callee)
++//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
++//        ^        |  out   |  7
++//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
++//     Owned by    +--------+
++//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
++//        |    new |preserve|      Must be even-aligned.
++//        |     SP-+--------+----> Matcher::_new_SP, even aligned
++//        |        |        |
++//
++// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
++//         known from SELF's arguments and the Java calling convention.
++//         Region 6-7 is determined per call site.
++// Note 2: If the calling convention leaves holes in the incoming argument
++//         area, those holes are owned by SELF.  Holes in the outgoing area
++//         are owned by the CALLEE.  Holes should not be nessecary in the
++//         incoming area, as the Java calling convention is completely under
++//         the control of the AD file.  Doubles can be sorted and packed to
++//         avoid holes.  Holes in the outgoing arguments may be nessecary for
++//         varargs C calling conventions.
++// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
++//         even aligned with pad0 as needed.
++//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
++//           (the latter is true on Intel but is it false on RISCV?)
++//         region 6-11 is even aligned; it may be padded out more so that
++//         the region from SP to FP meets the minimum stack alignment.
++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
++//         alignment.  Region 11, pad1, may be dynamically extended so that
++//         SP meets the minimum alignment.
 +
-+  ins_encode %{
-+    __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++frame %{
++  // These three registers define part of the calling convention
++  // between compiled code and the interpreter.
 +
-+  ins_pipe(istore_reg_mem);
-+%}
++  // Inline Cache Register or methodOop for I2C.
++  inline_cache_reg(R31);
 +
-+// Store Float
-+instruct storeF(fRegF src, memory mem)
-+%{
-+  match(Set mem (StoreF mem src));
++  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
++  cisc_spilling_operand_name(indOffset);
 +
-+  ins_cost(STORE_COST);
-+  format %{ "fsw  $src, $mem\t# float, #@storeF" %}
++  // Number of stack slots consumed by locking an object
++  // generate Compile::sync_stack_slots
++  // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2
++  sync_stack_slots(1 * VMRegImpl::slots_per_word);
 +
-+  ins_encode %{
-+    __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  // Compiled code's Frame Pointer
++  frame_pointer(R2);
 +
-+  ins_pipe(fp_store_reg_s);
-+%}
++  // Interpreter stores its frame pointer in a register which is
++  // stored to the stack by I2CAdaptors.
++  // I2CAdaptors convert from interpreted java to compiled java.
++  interpreter_frame_pointer(R8);
 +
-+// Store Double
-+instruct storeD(fRegD src, memory mem)
-+%{
-+  match(Set mem (StoreD mem src));
++  // Stack alignment requirement
++  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 +
-+  ins_cost(STORE_COST);
-+  format %{ "fsd  $src, $mem\t# double, #@storeD" %}
++  // Number of outgoing stack slots killed above the out_preserve_stack_slots
++  // for calls to C.  Supports the var-args backing area for register parms.
++  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt);
 +
-+  ins_encode %{
-+    __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  // The after-PROLOG location of the return address.  Location of
++  // return address specifies a type (REG or STACK) and a number
++  // representing the register number (i.e. - use a register name) or
++  // stack slot.
++  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
++  // Otherwise, it is above the locks and verification slot and alignment word
++  // TODO this may well be correct but need to check why that - 2 is there
++  // ppc port uses 0 but we definitely need to allow for fixed_slots
++  // which folds in the space used for monitors
++  return_addr(STACK - 2 +
++              align_up((Compile::current()->in_preserve_stack_slots() +
++                        Compile::current()->fixed_slots()),
++                       stack_alignment_in_slots()));
 +
-+  ins_pipe(fp_store_reg_d);
-+%}
++  // Location of compiled Java return values.  Same as C for now.
++  return_value
++  %{
++    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
++           "only return normal values");
 +
-+// Store Compressed Klass Pointer
-+instruct storeNKlass(iRegN src, memory mem)
-+%{
-+  match(Set mem (StoreNKlass mem src));
++    static const int lo[Op_RegL + 1] = { // enum name
++      0,                                 // Op_Node
++      0,                                 // Op_Set
++      R10_num,                           // Op_RegN
++      R10_num,                           // Op_RegI
++      R10_num,                           // Op_RegP
++      F10_num,                           // Op_RegF
++      F10_num,                           // Op_RegD
++      R10_num                            // Op_RegL
++    };
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sw  $src, $mem\t# compressed klass ptr, #@storeNKlass" %}
++    static const int hi[Op_RegL + 1] = { // enum name
++      0,                                 // Op_Node
++      0,                                 // Op_Set
++      OptoReg::Bad,                      // Op_RegN
++      OptoReg::Bad,                      // Op_RegI
++      R10_H_num,                         // Op_RegP
++      OptoReg::Bad,                      // Op_RegF
++      F10_H_num,                         // Op_RegD
++      R10_H_num                          // Op_RegL
++    };
 +
-+  ins_encode %{
-+    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 +  %}
-+
-+  ins_pipe(istore_reg_mem);
 +%}
 +
-+// ============================================================================
-+// Atomic operation instructions
-+//
-+// Intel and SPARC both implement Ideal Node LoadPLocked and
-+// Store{PIL}Conditional instructions using a normal load for the
-+// LoadPLocked and a CAS for the Store{PIL}Conditional.
-+//
-+// The ideal code appears only to use LoadPLocked/storePConditional as a
-+// pair to lock object allocations from Eden space when not using
-+// TLABs.
-+//
-+// There does not appear to be a Load{IL}Locked Ideal Node and the
-+// Ideal code appears to use Store{IL}Conditional as an alias for CAS
-+// and to use StoreIConditional only for 32-bit and StoreLConditional
-+// only for 64-bit.
-+//
-+// We implement LoadPLocked and storePConditional instructions using,
-+// respectively the RISCV hw load-reserve and store-conditional
-+// instructions. Whereas we must implement each of
-+// Store{IL}Conditional using a CAS which employs a pair of
-+// instructions comprising a load-reserve followed by a
-+// store-conditional.
-+
++//----------ATTRIBUTES---------------------------------------------------------
++//----------Operand Attributes-------------------------------------------------
++op_attrib op_cost(1);        // Required cost attribute
 +
-+// Locked-load (load reserved) of the current heap-top
-+// used when updating the eden heap top
-+// implemented using lr_d on RISCV64
-+instruct loadPLocked(iRegPNoSp dst, indirect mem)
-+%{
-+  match(Set dst (LoadPLocked mem));
++//----------Instruction Attributes---------------------------------------------
++ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
++ins_attrib ins_size(32);        // Required size attribute (in bits)
++ins_attrib ins_short_branch(0); // Required flag: is this instruction
++                                // a non-matching short branch variant
++                                // of some long branch?
++ins_attrib ins_alignment(4);    // Required alignment attribute (must
++                                // be a power of 2) specifies the
++                                // alignment that some part of the
++                                // instruction (not necessarily the
++                                // start) requires.  If > 1, a
++                                // compute_padding() function must be
++                                // provided for the instruction
 +
-+  ins_cost(ALU_COST * 2 + LOAD_COST);
++//----------OPERANDS-----------------------------------------------------------
++// Operand definitions must precede instruction definitions for correct parsing
++// in the ADLC because operands constitute user defined types which are used in
++// instruction definitions.
 +
-+  format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %}
++//----------Simple Operands----------------------------------------------------
 +
-+  ins_encode %{
-+    __ la(t0, Address(as_Register($mem$$base), $mem$$disp));
-+    __ lr_d($dst$$Register, t0, Assembler::aq);
-+  %}
++// Integer operands 32 bit
++// 32 bit immediate
++operand immI()
++%{
++  match(ConI);
 +
-+  ins_pipe(pipe_serial);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+// Conditional-store of the updated heap-top.
-+// Used during allocation of the shared heap.
-+// implemented using sc_d on RISCV.
-+instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
++// 32 bit zero
++operand immI0()
 +%{
-+  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
-+
-+  ins_cost(ALU_COST * 2 + STORE_COST);
++  predicate(n->get_int() == 0);
++  match(ConI);
 +
-+  format %{
-+    "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional"
-+  %}
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  ins_encode %{
-+    __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp));
-+    __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl);
-+  %}
++// 32 bit unit increment
++operand immI_1()
++%{
++  predicate(n->get_int() == 1);
++  match(ConI);
 +
-+  ins_pipe(pipe_serial);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+// storeLConditional is used by PhaseMacroExpand::expand_lock_node
-+// when attempting to rebias a lock towards the current thread.
-+instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
++// 32 bit unit decrement
++operand immI_M1()
 +%{
-+  match(Set cr (StoreLConditional mem (Binary oldval newval)));
++  predicate(n->get_int() == -1);
++  match(ConI);
 +
-+  ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  format %{
-+    "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
-+    "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional"
-+  %}
++// Unsigned Integer Immediate:  6-bit int, greater than 32
++operand uimmI6_ge32() %{
++  predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32));
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
-+    __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
-+  %}
++operand immI_le_4()
++%{
++  predicate(n->get_int() <= 4);
++  match(ConI);
 +
-+  ins_pipe(pipe_slow);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+// storeIConditional also has acquire semantics, for no better reason
-+// than matching storeLConditional.
-+instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
++operand immI_16()
 +%{
-+  match(Set cr (StoreIConditional mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2);
++  predicate(n->get_int() == 16);
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  format %{
-+    "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
-+    "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional"
-+  %}
++operand immI_24()
++%{
++  predicate(n->get_int() == 24);
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
-+    __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
-+  %}
++operand immI_31()
++%{
++  predicate(n->get_int() == 31);
++  match(ConI);
 +
-+  ins_pipe(pipe_slow);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+// standard CompareAndSwapX when we are using barriers
-+// these have higher priority than the rules selected by a predicate
-+instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++operand immI_63()
 +%{
-+  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
++  predicate(n->get_int() == 63);
++  match(ConI);
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++// 32 bit integer valid for add immediate
++operand immIAdd()
++%{
++  predicate(Assembler::operand_valid_for_add_immediate((int64_t)n->get_int()));
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB"
-+  %}
++// 32 bit integer valid for sub immediate
++operand immISub()
++%{
++  predicate(Assembler::operand_valid_for_add_immediate(-(int64_t)n->get_int()));
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+  %}
++// 5 bit signed value.
++operand immI5()
++%{
++  predicate(n->get_int() <= 15 && n->get_int() >= -16);
++  match(ConI);
 +
-+  ins_pipe(pipe_slow);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// 5 bit signed value (simm5)
++operand immL5()
 +%{
-+  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
++  predicate(n->get_long() <= 15 && n->get_long() >= -16);
++  match(ConL);
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++// Integer operands 64 bit
++// 64 bit immediate
++operand immL()
++%{
++  match(ConL);
 +
-+  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS"
-+  %}
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+  %}
++// 64 bit zero
++operand immL0()
++%{
++  predicate(n->get_long() == 0);
++  match(ConL);
 +
-+  ins_pipe(pipe_slow);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval)
++// Pointer operands
++// Pointer Immediate
++operand immP()
 +%{
-+  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++  match(ConP);
 +
-+  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI"
-+  %}
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval));
++// NULL Pointer Immediate
++operand immP0()
++%{
++  predicate(n->get_ptr() == 0);
++  match(ConP);
 +
-+  ins_pipe(pipe_slow);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval)
++// Pointer Immediate One
++// this is used in object initialization (initial object header)
++operand immP_1()
 +%{
-+  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++  predicate(n->get_ptr() == 1);
++  match(ConP);
 +
-+  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL"
-+  %}
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
++// Card Table Byte Map Base
++operand immByteMapBase()
++%{
++  // Get base of card map
++  predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
++            (CardTable::CardValue*)n->get_ptr() ==
++             ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
++  match(ConP);
 +
-+  ins_pipe(pipe_slow);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
++// Int Immediate: low 16-bit mask
++operand immI_16bits()
 +%{
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++  predicate(n->get_int() == 0xFFFF);
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP"
-+  %}
++// Long Immediate: low 32-bit mask
++operand immL_32bits()
++%{
++  predicate(n->get_long() == 0xFFFFFFFFL);
++  match(ConL);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
++// 64 bit unit decrement
++operand immL_M1()
++%{
++  predicate(n->get_long() == -1);
++  match(ConL);
 +
-+  ins_pipe(pipe_slow);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval)
-+%{
-+  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
++// 32 bit offset of pc in thread anchor
 +
-+  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN"
-+  %}
-+
-+  ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval));
++operand immL_pc_off()
++%{
++  predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
++                             in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
++  match(ConL);
 +
-+  ins_pipe(pipe_slow);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+// alternative CompareAndSwapX when we are eliding barriers
-+instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                            iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// 64 bit integer valid for add immediate
++operand immLAdd()
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
-+
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
-+
-+  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq"
-+  %}
-+
-+  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+  %}
-+
-+  ins_pipe(pipe_slow);
++  predicate(Assembler::operand_valid_for_add_immediate(n->get_long()));
++  match(ConL);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                            iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// 64 bit integer valid for sub immediate
++operand immLSub()
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
-+
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
-+
-+  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq"
-+  %}
-+
-+  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+  %}
-+
-+  ins_pipe(pipe_slow);
++  predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long())));
++  match(ConL);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval)
++// Narrow pointer operands
++// Narrow Pointer Immediate
++operand immN()
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
-+
-+  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq"
-+  %}
-+
-+  ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval));
++  match(ConN);
 +
-+  ins_pipe(pipe_slow);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval)
++// Narrow NULL Pointer Immediate
++operand immN0()
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
++  predicate(n->get_narrowcon() == 0);
++  match(ConN);
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq"
-+  %}
++operand immNKlass()
++%{
++  match(ConNKlass);
 +
-+  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  ins_pipe(pipe_slow);
++// Float and Double operands
++// Double Immediate
++operand immD()
++%{
++  match(ConD);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
++// Double Immediate: +0.0d
++operand immD0()
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
++  predicate(jlong_cast(n->getd()) == 0);
++  match(ConD);
 +
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++// Float Immediate
++operand immF()
++%{
++  match(ConF);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq"
-+  %}
++// Float Immediate: +0.0f.
++operand immF0()
++%{
++  predicate(jint_cast(n->getf()) == 0);
++  match(ConF);
 +
-+  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  ins_pipe(pipe_slow);
++operand immIOffset()
++%{
++  predicate(is_imm_in_range(n->get_int(), 12, 0));
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval)
++operand immLOffset()
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
++  predicate(is_imm_in_range(n->get_long(), 12, 0));
++  match(ConL);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
++// Scale values
++operand immIScale()
++%{
++  predicate(1 <= n->get_int() && (n->get_int() <= 3));
++  match(ConI);
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq"
-+  %}
++// Integer 32 bit Register Operands
++operand iRegI()
++%{
++  constraint(ALLOC_IN_RC(any_reg32));
++  match(RegI);
++  match(iRegINoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval));
++// Integer 32 bit Register not Special
++operand iRegINoSp()
++%{
++  constraint(ALLOC_IN_RC(no_special_reg32));
++  match(RegI);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_pipe(pipe_slow);
++// Register R10 only
++operand iRegI_R10()
++%{
++  constraint(ALLOC_IN_RC(int_r10_reg));
++  match(RegI);
++  match(iRegINoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Sundry CAS operations.  Note that release is always true,
-+// regardless of the memory ordering of the CAS.  This is because we
-+// need the volatile case to be sequentially consistent but there is
-+// no trailing StoreLoad barrier emitted by C2.  Unfortunately we
-+// can't check the type of memory ordering here, so we always emit a
-+// sc_d(w) with rl bit set.
-+instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// Register R12 only
++operand iRegI_R12()
 +%{
-+  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
++  constraint(ALLOC_IN_RC(int_r12_reg));
++  match(RegI);
++  match(iRegINoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
++// Register R13 only
++operand iRegI_R13()
++%{
++  constraint(ALLOC_IN_RC(int_r13_reg));
++  match(RegI);
++  match(iRegINoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++// Register R14 only
++operand iRegI_R14()
++%{
++  constraint(ALLOC_IN_RC(int_r14_reg));
++  match(RegI);
++  match(iRegINoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB"
-+  %}
++// Integer 64 bit Register Operands
++operand iRegL()
++%{
++  constraint(ALLOC_IN_RC(any_reg));
++  match(RegL);
++  match(iRegLNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+  %}
++// Integer 64 bit Register not Special
++operand iRegLNoSp()
++%{
++  constraint(ALLOC_IN_RC(no_special_reg));
++  match(RegL);
++  match(iRegL_R10);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_pipe(pipe_slow);
++// Long 64 bit Register R28 only
++operand iRegL_R28()
++%{
++  constraint(ALLOC_IN_RC(r28_reg));
++  match(RegL);
++  match(iRegLNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// Long 64 bit Register R29 only
++operand iRegL_R29()
 +%{
-+  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
++  constraint(ALLOC_IN_RC(r29_reg));
++  match(RegL);
++  match(iRegLNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
++// Long 64 bit Register R30 only
++operand iRegL_R30()
++%{
++  constraint(ALLOC_IN_RC(r30_reg));
++  match(RegL);
++  match(iRegLNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++// Pointer Register Operands
++// Pointer Register
++operand iRegP()
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(RegP);
++  match(iRegPNoSp);
++  match(iRegP_R10);
++  match(javaThread_RegP);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS"
-+  %}
++// Pointer 64 bit Register not Special
++operand iRegPNoSp()
++%{
++  constraint(ALLOC_IN_RC(no_special_ptr_reg));
++  match(RegP);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+  %}
++operand iRegP_R10()
++%{
++  constraint(ALLOC_IN_RC(r10_reg));
++  match(RegP);
++  // match(iRegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_pipe(pipe_slow);
++// Pointer 64 bit Register R11 only
++operand iRegP_R11()
++%{
++  constraint(ALLOC_IN_RC(r11_reg));
++  match(RegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++operand iRegP_R12()
 +%{
-+  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
++  constraint(ALLOC_IN_RC(r12_reg));
++  match(RegP);
++  // match(iRegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++// Pointer 64 bit Register R13 only
++operand iRegP_R13()
++%{
++  constraint(ALLOC_IN_RC(r13_reg));
++  match(RegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  effect(TEMP_DEF res);
++operand iRegP_R14()
++%{
++  constraint(ALLOC_IN_RC(r14_reg));
++  match(RegP);
++  // match(iRegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI"
-+  %}
++operand iRegP_R15()
++%{
++  constraint(ALLOC_IN_RC(r15_reg));
++  match(RegP);
++  // match(iRegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+  %}
++operand iRegP_R16()
++%{
++  constraint(ALLOC_IN_RC(r16_reg));
++  match(RegP);
++  // match(iRegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_pipe(pipe_slow);
++// Pointer 64 bit Register R28 only
++operand iRegP_R28()
++%{
++  constraint(ALLOC_IN_RC(r28_reg));
++  match(RegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
++// Pointer Register Operands
++// Narrow Pointer Register
++operand iRegN()
 +%{
-+  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
++  constraint(ALLOC_IN_RC(any_reg32));
++  match(RegN);
++  match(iRegNNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++// Integer 64 bit Register not Special
++operand iRegNNoSp()
++%{
++  constraint(ALLOC_IN_RC(no_special_reg32));
++  match(RegN);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  effect(TEMP_DEF res);
++// heap base register -- used for encoding immN0
++operand iRegIHeapbase()
++%{
++  constraint(ALLOC_IN_RC(heapbase_reg));
++  match(RegI);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL"
-+  %}
++// Long 64 bit Register R10 only
++operand iRegL_R10()
++%{
++  constraint(ALLOC_IN_RC(r10_reg));
++  match(RegL);
++  match(iRegLNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+  %}
++// Float Register
++// Float register operands
++operand fRegF()
++%{
++  constraint(ALLOC_IN_RC(float_reg));
++  match(RegF);
 +
-+  ins_pipe(pipe_slow);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
++// Double Register
++// Double register operands
++operand fRegD()
 +%{
-+  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
++  constraint(ALLOC_IN_RC(double_reg));
++  match(RegD);
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  effect(TEMP_DEF res);
++// Generic vector class. This will be used for
++// all vector operands.
++operand vReg()
++%{
++  constraint(ALLOC_IN_RC(vectora_reg));
++  match(VecA);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN"
-+  %}
++operand vReg_V1()
++%{
++  constraint(ALLOC_IN_RC(v1_reg));
++  match(VecA);
++  match(vReg);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+  %}
++operand vReg_V2()
++%{
++  constraint(ALLOC_IN_RC(v2_reg));
++  match(VecA);
++  match(vReg);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_pipe(pipe_slow);
++operand vReg_V3()
++%{
++  constraint(ALLOC_IN_RC(v3_reg));
++  match(VecA);
++  match(vReg);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
++operand vReg_V4()
 +%{
-+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
++  constraint(ALLOC_IN_RC(v4_reg));
++  match(VecA);
++  match(vReg);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++operand vReg_V5()
++%{
++  constraint(ALLOC_IN_RC(v5_reg));
++  match(VecA);
++  match(vReg);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  effect(TEMP_DEF res);
++// Java Thread Register
++operand javaThread_RegP(iRegP reg)
++%{
++  constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg
++  match(reg);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP"
++//----------Memory Operands----------------------------------------------------
++// RISCV has only base_plus_offset and literal address mode, so no need to use
++// index and scale. Here set index as 0xffffffff and scale as 0x0.
++operand indirect(iRegP reg)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(reg);
++  op_cost(0);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp(0x0);
 +  %}
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++operand indOffI(iRegP reg, immIOffset off)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg off);
++  op_cost(0);
++  format %{ "[$reg, $off]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
 +  %}
-+
-+  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++operand indOffL(iRegP reg, immLOffset off)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
-+
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
-+
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq"
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg off);
++  op_cost(0);
++  format %{ "[$reg, $off]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
 +  %}
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++operand indirectN(iRegN reg)
++%{
++  predicate(CompressedOops::shift() == 0);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(DecodeN reg);
++  op_cost(0);
++  format %{ "[$reg]\t# narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp(0x0);
 +  %}
-+
-+  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++operand indOffIN(iRegN reg, immIOffset off)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
-+
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
-+
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq"
++  predicate(CompressedOops::shift() == 0);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP (DecodeN reg) off);
++  op_cost(0);
++  format %{ "[$reg, $off]\t# narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
 +  %}
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++operand indOffLN(iRegN reg, immLOffset off)
++%{
++  predicate(CompressedOops::shift() == 0);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP (DecodeN reg) off);
++  op_cost(0);
++  format %{ "[$reg, $off]\t# narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
 +  %}
-+
-+  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++// RISCV opto stubs need to write to the pc slot in the thread anchor
++operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-+
-+  effect(TEMP_DEF res);
-+
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq"
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg off);
++  op_cost(0);
++  format %{ "[$reg, $off]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
 +  %}
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+  %}
 +
-+  ins_pipe(pipe_slow);
++//----------Special Memory Operands--------------------------------------------
++// Stack Slot Operand - This operand is used for loading and storing temporary
++//                      values on the stack where a match requires a value to
++//                      flow through memory.
++operand stackSlotI(sRegI reg)
++%{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  // match(RegI);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x02);  // RSP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
 +%}
 +
-+instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
++operand stackSlotF(sRegF reg)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-+
-+  effect(TEMP_DEF res);
-+
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq"
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  // match(RegF);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x02);  // RSP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
 +  %}
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++operand stackSlotD(sRegD reg)
++%{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  // match(RegD);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x02);  // RSP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
 +  %}
-+
-+  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
++operand stackSlotL(sRegL reg)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  // match(RegL);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x02);  // RSP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
 +
-+  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
++// Special operand allowing long args to int ops to be truncated for free
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++operand iRegL2I(iRegL reg) %{
 +
-+  effect(TEMP_DEF res);
++  op_cost(0);
 +
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq"
-+  %}
++  match(ConvL2I reg);
 +
-+  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+  %}
++  format %{ "l2i($reg)" %}
 +
-+  ins_pipe(pipe_slow);
++  interface(REG_INTER)
 +%}
 +
-+instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
-+%{
-+  predicate(needs_acquiring_load_exclusive(n));
 +
-+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
++// Comparison Operands
++// NOTE: Label is a predefined operand which should not be redefined in
++//       the AD file. It is generically handled within the ADLC.
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++//----------Conditional Branch Operands----------------------------------------
++// Comparison Op  - This is the operation of the comparison, and is limited to
++//                  the following set of codes:
++//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
++//
++// Other attributes of the comparison, such as unsignedness, are specified
++// by the comparison instruction that sets a condition code flags register.
++// That result is represented by a flags operand whose subtype is appropriate
++// to the unsignedness (etc.) of the comparison.
++//
++// Later, the instruction which matches both the Comparison Op (a Bool) and
++// the flags (produced by the Cmp) specifies the coding of the comparison op
++// by matching a specific subtype of Bool operand below, such as cmpOpU.
 +
-+  effect(TEMP_DEF res);
 +
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq"
-+  %}
++// used for signed integral comparisons and fp comparisons
++operand cmpOp()
++%{
++  match(Bool);
 +
-+  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+  %}
++  format %{ "" %}
 +
-+  ins_pipe(pipe_slow);
++  // the values in interface derives from struct BoolTest::mask
++  interface(COND_INTER) %{
++    equal(0x0, "eq");
++    greater(0x1, "gt");
++    overflow(0x2, "overflow");
++    less(0x3, "lt");
++    not_equal(0x4, "ne");
++    less_equal(0x5, "le");
++    no_overflow(0x6, "no_overflow");
++    greater_equal(0x7, "ge");
++  %}
 +%}
 +
-+instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// used for unsigned integral comparisons
++operand cmpOpU()
 +%{
-+  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
-+
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
-+
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapB"
-+  %}
++  match(Bool);
 +
-+  ins_encode %{
-+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++  format %{ "" %}
++  // the values in interface derives from struct BoolTest::mask
++  interface(COND_INTER) %{
++    equal(0x0, "eq");
++    greater(0x1, "gtu");
++    overflow(0x2, "overflow");
++    less(0x3, "ltu");
++    not_equal(0x4, "ne");
++    less_equal(0x5, "leu");
++    no_overflow(0x6, "no_overflow");
++    greater_equal(0x7, "geu");
 +  %}
-+
-+  ins_pipe(pipe_slow);
 +%}
 +
-+instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// used for certain integral comparisons which can be
++// converted to bxx instructions
++operand cmpOpEqNe()
 +%{
-+  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
-+
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
-+
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapS"
-+  %}
++  match(Bool);
++  op_cost(0);
++  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
++            n->as_Bool()->_test._test == BoolTest::eq);
 +
-+  ins_encode %{
-+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x0, "eq");
++    greater(0x1, "gt");
++    overflow(0x2, "overflow");
++    less(0x3, "lt");
++    not_equal(0x4, "ne");
++    less_equal(0x5, "le");
++    no_overflow(0x6, "no_overflow");
++    greater_equal(0x7, "ge");
 +  %}
-+
-+  ins_pipe(pipe_slow);
 +%}
 +
-+instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++operand cmpOpULtGe()
 +%{
-+  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
-+
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapI"
-+  %}
++  match(Bool);
++  op_cost(0);
++  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
++            n->as_Bool()->_test._test == BoolTest::ge);
 +
-+  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x0, "eq");
++    greater(0x1, "gt");
++    overflow(0x2, "overflow");
++    less(0x3, "lt");
++    not_equal(0x4, "ne");
++    less_equal(0x5, "le");
++    no_overflow(0x6, "no_overflow");
++    greater_equal(0x7, "ge");
 +  %}
-+
-+  ins_pipe(pipe_slow);
 +%}
 +
-+instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
++operand cmpOpUEqNeLeGt()
 +%{
-+  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
-+
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapL"
-+  %}
++  match(Bool);
++  op_cost(0);
++  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
++            n->as_Bool()->_test._test == BoolTest::eq ||
++            n->as_Bool()->_test._test == BoolTest::le ||
++            n->as_Bool()->_test._test == BoolTest::gt);
 +
-+  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x0, "eq");
++    greater(0x1, "gt");
++    overflow(0x2, "overflow");
++    less(0x3, "lt");
++    not_equal(0x4, "ne");
++    less_equal(0x5, "le");
++    no_overflow(0x6, "no_overflow");
++    greater_equal(0x7, "ge");
 +  %}
-+
-+  ins_pipe(pipe_slow);
 +%}
 +
-+instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
-+%{
-+  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
++// Flags register, used as output of compare logic
++operand rFlagsReg()
++%{
++  constraint(ALLOC_IN_RC(reg_flags));
++  match(RegFlags);
 +
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapN"
-+  %}
++  op_cost(0);
++  format %{ "RFLAGS" %}
++  interface(REG_INTER);
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+  %}
++// Special Registers
 +
-+  ins_pipe(pipe_slow);
++// Method Register
++operand inline_cache_RegP(iRegP reg)
++%{
++  constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
++  match(reg);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
-+%{
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
++//----------OPERAND CLASSES----------------------------------------------------
++// Operand Classes are groups of operands that are used as to simplify
++// instruction definitions by not requiring the AD writer to specify
++// separate instructions for every form of operand when the
++// instruction accepts multiple operand types with the same basic
++// encoding and format. The classic case of this is memory operands.
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++// memory is used to define read/write location for load/store
++// instruction defs. we can turn a memory op into an Address
 +
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapP"
-+  %}
++opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN);
 +
-+  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+  %}
-+
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
-+%{
-+  predicate(needs_acquiring_load_exclusive(n));
++// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
++// operations. it allows the src to be either an iRegI or a (ConvL2I
++// iRegL). in the latter case the l2i normally planted for a ConvL2I
++// can be elided because the 32-bit instruction will just employ the
++// lower 32 bits anyway.
++//
++// n.b. this does not elide all L2I conversions. if the truncated
++// value is consumed by more than one operation then the ConvL2I
++// cannot be bundled into the consuming nodes so an l2i gets planted
++// (actually a mvw $dst $src) and the downstream instructions consume
++// the result of the l2i as an iRegI input. That's a shame since the
++// mvw is actually redundant but its not too costly.
 +
-+  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
++opclass iRegIorL2I(iRegI, iRegL2I);
++opclass iRegIorL(iRegI, iRegL);
++opclass iRegNorP(iRegN, iRegP);
++opclass iRegILNP(iRegI, iRegL, iRegN, iRegP);
++opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp);
++opclass immIorL(immI, immL);
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
++//----------PIPELINE-----------------------------------------------------------
++// Rules which define the behavior of the target architectures pipeline.
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline
++//pipe_desc(ID, EX, MEM, WR);
++#define ID   S0
++#define EX   S1
++#define MEM  S2
++#define WR   S3
 +
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapBAcq"
-+  %}
++// Integer ALU reg operation
++pipeline %{
 +
-+  ins_encode %{
-+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+  %}
++attributes %{
++  // RISC-V instructions are of fixed length
++  fixed_size_instructions;           // Fixed size instructions TODO does
++  max_instructions_per_bundle = 2;   // Generic RISC-V 1, Sifive Series 7 2
++  // RISC-V instructions come in 32-bit word units
++  instruction_unit_size = 4;         // An instruction is 4 bytes long
++  instruction_fetch_unit_size = 64;  // The processor fetches one line
++  instruction_fetch_units = 1;       // of 64 bytes
 +
-+  ins_pipe(pipe_slow);
++  // List of nop instructions
++  nops( MachNop );
 +%}
 +
-+instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
-+%{
-+  predicate(needs_acquiring_load_exclusive(n));
++// We don't use an actual pipeline model so don't care about resources
++// or description. we do use pipeline classes to introduce fixed
++// latencies
 +
-+  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
++//----------RESOURCES----------------------------------------------------------
++// Resources are the functional units available to the machine
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
++// Generic RISC-V pipeline
++// 1 decoder
++// 1 instruction decoded per cycle
++// 1 load/store ops per cycle, 1 branch, 1 FPU
++// 1 mul, 1 div
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++resources ( DECODE,
++            ALU,
++            MUL,
++            DIV,
++            BRANCH,
++            LDST,
++            FPU);
 +
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapSAcq"
-+  %}
++//----------PIPELINE DESCRIPTION-----------------------------------------------
++// Pipeline Description specifies the stages in the machine's pipeline
 +
-+  ins_encode %{
-+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+  %}
++// Define the pipeline as a generic 6 stage pipeline
++pipe_desc(S0, S1, S2, S3, S4, S5);
 +
-+  ins_pipe(pipe_slow);
-+%}
++//----------PIPELINE CLASSES---------------------------------------------------
++// Pipeline Classes describe the stages in which input and output are
++// referenced by the hardware pipeline.
 +
-+instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
-+
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapIAcq"
-+  %}
-+
-+  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+  %}
-+
-+  ins_pipe(pipe_slow);
++  single_instruction;
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
++pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
-+
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapLAcq"
-+  %}
-+
-+  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+  %}
-+
-+  ins_pipe(pipe_slow);
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
++pipe_class fp_uop_s(fRegF dst, fRegF src)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
-+
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapNAcq"
-+  %}
-+
-+  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+  %}
-+
-+  ins_pipe(pipe_slow);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
++pipe_class fp_uop_d(fRegD dst, fRegD src)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
-+
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapPAcq"
-+  %}
-+
-+  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+  %}
-+
-+  ins_pipe(pipe_slow);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev)
++pipe_class fp_d2f(fRegF dst, fRegD src)
 +%{
-+  match(Set prev (GetAndSetI mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchgw  $prev, $newv, [$mem]\t#@get_and_setI" %}
-+
-+  ins_encode %{
-+    __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev)
++pipe_class fp_f2d(fRegD dst, fRegF src)
 +%{
-+  match(Set prev (GetAndSetL mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setL" %}
-+
-+  ins_encode %{
-+    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev)
++pipe_class fp_f2i(iRegINoSp dst, fRegF src)
 +%{
-+  match(Set prev (GetAndSetN mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %}
-+
-+  ins_encode %{
-+    __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev)
++pipe_class fp_f2l(iRegLNoSp dst, fRegF src)
 +%{
-+  match(Set prev (GetAndSetP mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setP" %}
-+
-+  ins_encode %{
-+    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev)
++pipe_class fp_i2f(fRegF dst, iRegIorL2I src)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set prev (GetAndSetI mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]\t#@get_and_setIAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev)
++pipe_class fp_l2f(fRegF dst, iRegL src)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set prev (GetAndSetL mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setLAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
++pipe_class fp_d2i(iRegINoSp dst, fRegD src)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set prev (GetAndSetN mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev)
++pipe_class fp_d2l(iRegLNoSp dst, fRegD src)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set prev (GetAndSetP mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setPAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr)
++pipe_class fp_i2d(fRegD dst, iRegIorL2I src)
 +%{
-+  match(Set newval (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %}
-+
-+  ins_encode %{
-+    __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr)
++pipe_class fp_l2d(fRegD dst, iRegIorL2I src)
 +%{
-+  predicate(n->as_LoadStore()->result_not_used());
-+
-+  match(Set dummy (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %}
-+
-+  ins_encode %{
-+    __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr)
++pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2)
 +%{
-+  match(Set newval (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %}
-+
-+  ins_encode %{
-+    __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  single_instruction;
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr)
++pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2)
 +%{
-+  predicate(n->as_LoadStore()->result_not_used());
-+
-+  match(Set dummy (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %}
-+
-+  ins_encode %{
-+    __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  single_instruction;
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr)
++pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2)
 +%{
-+  match(Set newval (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %}
-+
-+  ins_encode %{
-+    __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  single_instruction;
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr)
++pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2)
 +%{
-+  predicate(n->as_LoadStore()->result_not_used());
-+
-+  match(Set dummy (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %}
-+
-+  ins_encode %{
-+    __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  single_instruction;
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr)
++pipe_class fp_load_constant_s(fRegF dst)
 +%{
-+  match(Set newval (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %}
-+
-+  ins_encode %{
-+    __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  single_instruction;
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr)
++pipe_class fp_load_constant_d(fRegD dst)
 +%{
-+  predicate(n->as_LoadStore()->result_not_used());
-+
-+  match(Set dummy (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %}
++  single_instruction;
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
++%}
 +
-+  ins_encode %{
-+    __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
-+  %}
++pipe_class fp_load_mem_s(fRegF dst, memory mem)
++%{
++  single_instruction;
++  mem    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  ins_pipe(pipe_serial);
++pipe_class fp_load_mem_d(fRegD dst, memory mem)
++%{
++  single_instruction;
++  mem    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr)
++pipe_class fp_store_reg_s(fRegF src, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
++  single_instruction;
++  src    : S1(read);
++  mem    : S5(write);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  match(Set newval (GetAndAddL mem incr));
++pipe_class fp_store_reg_d(fRegD src, memory mem)
++%{
++  single_instruction;
++  src    : S1(read);
++  mem    : S5(write);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  ins_cost(ALU_COST);
++//------- Integer ALU operations --------------------------
 +
-+  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %}
++// Integer ALU reg-reg operation
++// Operands needs in ID, result generated in EX
++// E.g.  ADD   Rd, Rs1, Rs2
++pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
++%{
++  single_instruction;
++  dst    : EX(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  ALU    : EX;
++%}
 +
-+  ins_encode %{
-+    __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
-+  %}
++// Integer ALU reg operation with constant shift
++// E.g. SLLI    Rd, Rs1, #shift
++pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
++%{
++  single_instruction;
++  dst    : EX(write);
++  src1   : ID(read);
++  DECODE : ID;
++  ALU    : EX;
++%}
 +
-+  ins_pipe(pipe_serial);
++// Integer ALU reg-reg operation with variable shift
++// both operands must be available in ID
++// E.g. SLL   Rd, Rs1, Rs2
++pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
++%{
++  single_instruction;
++  dst    : EX(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  ALU    : EX;
 +%}
 +
-+instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
-+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
++// Integer ALU reg operation
++// E.g. NEG   Rd, Rs2
++pipe_class ialu_reg(iRegI dst, iRegI src)
++%{
++  single_instruction;
++  dst    : EX(write);
++  src    : ID(read);
++  DECODE : ID;
++  ALU    : EX;
++%}
 +
-+  match(Set dummy (GetAndAddL mem incr));
++// Integer ALU reg immediate operation
++// E.g. ADDI   Rd, Rs1, #imm
++pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
++%{
++  single_instruction;
++  dst    : EX(write);
++  src1   : ID(read);
++  DECODE : ID;
++  ALU    : EX;
++%}
 +
-+  ins_cost(ALU_COST);
++// Integer ALU immediate operation (no source operands)
++// E.g. LI    Rd, #imm
++pipe_class ialu_imm(iRegI dst)
++%{
++  single_instruction;
++  dst    : EX(write);
++  DECODE : ID;
++  ALU    : EX;
++%}
 +
-+  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %}
++//------- Multiply pipeline operations --------------------
 +
-+  ins_encode %{
-+    __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
-+  %}
++// Multiply reg-reg
++// E.g. MULW   Rd, Rs1, Rs2
++pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
++%{
++  single_instruction;
++  dst    : WR(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  MUL    : WR;
++%}
 +
-+  ins_pipe(pipe_serial);
++// E.g. MUL   RD, Rs1, Rs2
++pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
++%{
++  single_instruction;
++  fixed_latency(3); // Maximum latency for 64 bit mul
++  dst    : WR(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  MUL    : WR;
 +%}
 +
-+instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr)
++//------- Divide pipeline operations --------------------
++
++// E.g. DIVW   Rd, Rs1, Rs2
++pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
++  single_instruction;
++  fixed_latency(8); // Maximum latency for 32 bit divide
++  dst    : WR(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  DIV    : WR;
++%}
 +
-+  match(Set newval (GetAndAddL mem incr));
++// E.g. DIV   RD, Rs1, Rs2
++pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
++%{
++  single_instruction;
++  fixed_latency(16); // Maximum latency for 64 bit divide
++  dst    : WR(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  DIV    : WR;
++%}
 +
-+  ins_cost(ALU_COST);
++//------- Load pipeline operations ------------------------
 +
-+  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %}
++// Load - reg, mem
++// E.g. LA    Rd, mem
++pipe_class iload_reg_mem(iRegI dst, memory mem)
++%{
++  single_instruction;
++  dst    : WR(write);
++  mem    : ID(read);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  ins_encode %{
-+    __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
-+  %}
++// Load - reg, reg
++// E.g. LD    Rd, Rs
++pipe_class iload_reg_reg(iRegI dst, iRegI src)
++%{
++  single_instruction;
++  dst    : WR(write);
++  src    : ID(read);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  ins_pipe(pipe_serial);
++//------- Store pipeline operations -----------------------
++
++// Store - zr, mem
++// E.g. SD    zr, mem
++pipe_class istore_mem(memory mem)
++%{
++  single_instruction;
++  mem    : ID(read);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr)
++// Store - reg, mem
++// E.g. SD    Rs, mem
++pipe_class istore_reg_mem(iRegI src, memory mem)
 +%{
-+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
++  single_instruction;
++  mem    : ID(read);
++  src    : EX(read);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  match(Set dummy (GetAndAddL mem incr));
++// Store - reg, reg
++// E.g. SD    Rs2, Rs1
++pipe_class istore_reg_reg(iRegI dst, iRegI src)
++%{
++  single_instruction;
++  dst    : ID(read);
++  src    : EX(read);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  ins_cost(ALU_COST);
++//------- Store pipeline operations -----------------------
 +
-+  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %}
++// Branch
++pipe_class pipe_branch()
++%{
++  single_instruction;
++  DECODE : ID;
++  BRANCH : EX;
++%}
 +
-+  ins_encode %{
-+    __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
-+  %}
++// Branch
++pipe_class pipe_branch_reg(iRegI src)
++%{
++  single_instruction;
++  src    : ID(read);
++  DECODE : ID;
++  BRANCH : EX;
++%}
 +
-+  ins_pipe(pipe_serial);
++// Compare & Branch
++// E.g. BEQ   Rs1, Rs2, L
++pipe_class pipe_cmp_branch(iRegI src1, iRegI src2)
++%{
++  single_instruction;
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  BRANCH : EX;
 +%}
 +
-+instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr)
++// E.g. BEQZ Rs, L
++pipe_class pipe_cmpz_branch(iRegI src)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
++  single_instruction;
++  src    : ID(read);
++  DECODE : ID;
++  BRANCH : EX;
++%}
 +
-+  match(Set newval (GetAndAddI mem incr));
++//------- Synchronisation operations ----------------------
++// Any operation requiring serialization
++// E.g. FENCE/Atomic Ops/Load Acquire/Store Release
++pipe_class pipe_serial()
++%{
++  single_instruction;
++  force_serialization;
++  fixed_latency(16);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  ins_cost(ALU_COST);
++pipe_class pipe_slow()
++%{
++  instruction_count(10);
++  multiple_bundles;
++  force_serialization;
++  fixed_latency(16);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %}
++// Empty pipeline class
++pipe_class pipe_class_empty()
++%{
++  single_instruction;
++  fixed_latency(0);
++%}
 +
-+  ins_encode %{
-+    __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
-+  %}
++// Default pipeline class.
++pipe_class pipe_class_default()
++%{
++  single_instruction;
++  fixed_latency(2);
++%}
 +
-+  ins_pipe(pipe_serial);
++// Pipeline class for compares.
++pipe_class pipe_class_compare()
++%{
++  single_instruction;
++  fixed_latency(16);
 +%}
 +
-+instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr)
++// Pipeline class for memory operations.
++pipe_class pipe_class_memory()
 +%{
-+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
++  single_instruction;
++  fixed_latency(16);
++%}
 +
-+  match(Set dummy (GetAndAddI mem incr));
++// Pipeline class for call.
++pipe_class pipe_class_call()
++%{
++  single_instruction;
++  fixed_latency(100);
++%}
 +
-+  ins_cost(ALU_COST);
++// Define the class for the Nop node.
++define %{
++   MachNop = pipe_class_empty;
++%}
++%}
++//----------INSTRUCTIONS-------------------------------------------------------
++//
++// match      -- States which machine-independent subtree may be replaced
++//               by this instruction.
++// ins_cost   -- The estimated cost of this instruction is used by instruction
++//               selection to identify a minimum cost tree of machine
++//               instructions that matches a tree of machine-independent
++//               instructions.
++// format     -- A string providing the disassembly for this instruction.
++//               The value of an instruction's operand may be inserted
++//               by referring to it with a '$' prefix.
++// opcode     -- Three instruction opcodes may be provided.  These are referred
++//               to within an encode class as $primary, $secondary, and $tertiary
++//               rrspectively.  The primary opcode is commonly used to
++//               indicate the type of machine instruction, while secondary
++//               and tertiary are often used for prefix options or addressing
++//               modes.
++// ins_encode -- A list of encode classes with parameters. The encode class
++//               name must have been defined in an 'enc_class' specification
++//               in the encode section of the architecture description.
 +
-+  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %}
++// ============================================================================
++// Memory (Load/Store) Instructions
++
++// Load Instructions
++
++// Load Byte (8 bit signed)
++instruct loadB(iRegINoSp dst, memory mem)
++%{
++  match(Set dst (LoadB mem));
++
++  ins_cost(LOAD_COST);
++  format %{ "lb  $dst, $mem\t# byte, #@loadB" %}
 +
 +  ins_encode %{
-+    __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
++    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr)
++// Load Byte (8 bit signed) into long
++instruct loadB2L(iRegLNoSp dst, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set newval (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
++  match(Set dst (ConvI2L (LoadB mem)));
 +
-+  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %}
++  ins_cost(LOAD_COST);
++  format %{ "lb  $dst, $mem\t# byte, #@loadB2L" %}
 +
 +  ins_encode %{
-+    __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
++    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr)
++// Load Byte (8 bit unsigned)
++instruct loadUB(iRegINoSp dst, memory mem)
 +%{
-+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
-+
-+  match(Set dummy (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
++  match(Set dst (LoadUB mem));
 +
-+  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %}
++  ins_cost(LOAD_COST);
++  format %{ "lbu  $dst, $mem\t# byte, #@loadUB" %}
 +
 +  ins_encode %{
-+    __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
++    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// ============================================================================
-+// Arithmetic Instructions
-+//
-+
-+// Integer Addition
-+
-+// TODO
-+// these currently employ operations which do not set CR and hence are
-+// not flagged as killing CR but we would like to isolate the cases
-+// where we want to set flags from those where we don't. need to work
-+// out how to do that.
-+instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (AddI src1 src2));
++// Load Byte (8 bit unsigned) into long
++instruct loadUB2L(iRegLNoSp dst, memory mem)
++%{
++  match(Set dst (ConvI2L (LoadUB mem)));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "addw  $dst, $src1, $src2\t#@addI_reg_reg" %}
++  ins_cost(LOAD_COST);
++  format %{ "lbu  $dst, $mem\t# byte, #@loadUB2L" %}
 +
 +  ins_encode %{
-+    __ addw(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{
-+  match(Set dst (AddI src1 src2));
++// Load Short (16 bit signed)
++instruct loadS(iRegINoSp dst, memory mem)
++%{
++  match(Set dst (LoadS mem));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "addiw  $dst, $src1, $src2\t#@addI_reg_imm" %}
++  ins_cost(LOAD_COST);
++  format %{ "lh  $dst, $mem\t# short, #@loadS" %}
 +
 +  ins_encode %{
-+    int32_t con = (int32_t)$src2$$constant;
-+    __ addiw(as_Register($dst$$reg),
-+             as_Register($src1$$reg),
-+             $src2$$constant);
++    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{
-+  match(Set dst (AddI (ConvL2I src1) src2));
++// Load Short (16 bit signed) into long
++instruct loadS2L(iRegLNoSp dst, memory mem)
++%{
++  match(Set dst (ConvI2L (LoadS mem)));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "addiw  $dst, $src1, $src2\t#@addI_reg_imm_l2i" %}
++  ins_cost(LOAD_COST);
++  format %{ "lh  $dst, $mem\t# short, #@loadS2L" %}
 +
 +  ins_encode %{
-+    __ addiw(as_Register($dst$$reg),
-+             as_Register($src1$$reg),
-+             $src2$$constant);
++    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Pointer Addition
-+instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
-+  match(Set dst (AddP src1 src2));
++// Load Char (16 bit unsigned)
++instruct loadUS(iRegINoSp dst, memory mem)
++%{
++  match(Set dst (LoadUS mem));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %}
++  ins_cost(LOAD_COST);
++  format %{ "lhu  $dst, $mem\t# short, #@loadUS" %}
 +
 +  ins_encode %{
-+    __ add(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
++    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// If we shift more than 32 bits, we need not convert I2L.
-+instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{
-+  match(Set dst (LShiftL (ConvI2L src) scale));
-+  ins_cost(ALU_COST);
-+  format %{ "slli  $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %}
++// Load Short/Char (16 bit unsigned) into long
++instruct loadUS2L(iRegLNoSp dst, memory mem)
++%{
++  match(Set dst (ConvI2L (LoadUS mem)));
++
++  ins_cost(LOAD_COST);
++  format %{ "lhu  $dst, $mem\t# short, #@loadUS2L" %}
 +
 +  ins_encode %{
-+    __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63);
++    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Pointer Immediate Addition
-+// n.b. this needs to be more expensive than using an indirect memory
-+// operand
-+instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{
-+  match(Set dst (AddP src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "addi  $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %}
++// Load Integer (32 bit signed)
++instruct loadI(iRegINoSp dst, memory mem)
++%{
++  match(Set dst (LoadI mem));
++
++  ins_cost(LOAD_COST);
++  format %{ "lw  $dst, $mem\t# int, #@loadI" %}
 +
 +  ins_encode %{
-+    // src2 is imm, so actually call the addi
-+    __ add(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           $src2$$constant);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Long Addition
-+instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (AddL src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "add  $dst, $src1, $src2\t#@addL_reg_reg" %}
++// Load Integer (32 bit signed) into long
++instruct loadI2L(iRegLNoSp dst, memory mem)
++%{
++  match(Set dst (ConvI2L (LoadI mem)));
++
++  ins_cost(LOAD_COST);
++  format %{ "lw  $dst, $mem\t# int, #@loadI2L" %}
 +
 +  ins_encode %{
-+    __ add(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// No constant pool entries requiredLong Immediate Addition.
-+instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
-+  match(Set dst (AddL src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "addi  $dst, $src1, $src2\t#@addL_reg_imm" %}
++// Load Integer (32 bit unsigned) into long
++instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
++%{
++  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
++
++  ins_cost(LOAD_COST);
++  format %{ "lwu  $dst, $mem\t# int, #@loadUI2L" %}
 +
 +  ins_encode %{
-+    // src2 is imm, so actually call the addi
-+    __ add(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           $src2$$constant);
++    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Integer Subtraction
-+instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (SubI src1 src2));
++// Load Long (64 bit signed)
++instruct loadL(iRegLNoSp dst, memory mem)
++%{
++  match(Set dst (LoadL mem));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "subw  $dst, $src1, $src2\t#@subI_reg_reg" %}
++  ins_cost(LOAD_COST);
++  format %{ "ld  $dst, $mem\t# int, #@loadL" %}
 +
 +  ins_encode %{
-+    __ subw(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Immediate Subtraction
-+instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{
-+  match(Set dst (SubI src1 src2));
++// Load Range
++instruct loadRange(iRegINoSp dst, memory mem)
++%{
++  match(Set dst (LoadRange mem));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "addiw  $dst, $src1, -$src2\t#@subI_reg_imm" %}
++  ins_cost(LOAD_COST);
++  format %{ "lwu  $dst, $mem\t# range, #@loadRange" %}
 +
 +  ins_encode %{
-+    // src2 is imm, so actually call the addiw
-+    __ subw(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            $src2$$constant);
++    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Long Subtraction
-+instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (SubL src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "sub  $dst, $src1, $src2\t#@subL_reg_reg" %}
++// Load Pointer
++instruct loadP(iRegPNoSp dst, memory mem)
++%{
++  match(Set dst (LoadP mem));
++  predicate(n->as_Load()->barrier_data() == 0);
++
++  ins_cost(LOAD_COST);
++  format %{ "ld  $dst, $mem\t# ptr, #@loadP" %}
 +
 +  ins_encode %{
-+    __ sub(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// No constant pool entries requiredLong Immediate Subtraction.
-+instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{
-+  match(Set dst (SubL src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "addi  $dst, $src1, -$src2\t#@subL_reg_imm" %}
++// Load Compressed Pointer
++instruct loadN(iRegNNoSp dst, memory mem)
++%{
++  match(Set dst (LoadN mem));
++
++  ins_cost(LOAD_COST);
++  format %{ "lwu  $dst, $mem\t# loadN, compressed ptr, #@loadN" %}
 +
 +  ins_encode %{
-+    // src2 is imm, so actually call the addi
-+    __ sub(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           $src2$$constant);
++    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Integer Negation (special case for sub)
++// Load Klass Pointer
++instruct loadKlass(iRegPNoSp dst, memory mem)
++%{
++  match(Set dst (LoadKlass mem));
 +
-+instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
-+  match(Set dst (SubI zero src));
-+  ins_cost(ALU_COST);
-+  format %{ "subw  $dst, x0, $src\t# int, #@negI_reg" %}
++  ins_cost(LOAD_COST);
++  format %{ "ld  $dst, $mem\t# class, #@loadKlass" %}
 +
 +  ins_encode %{
-+    // actually call the subw
-+    __ negw(as_Register($dst$$reg),
-+            as_Register($src$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Long Negation
++// Load Narrow Klass Pointer
++instruct loadNKlass(iRegNNoSp dst, memory mem)
++%{
++  match(Set dst (LoadNKlass mem));
 +
-+instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero) %{
-+  match(Set dst (SubL zero src));
-+  ins_cost(ALU_COST);
-+  format %{ "sub  $dst, x0, $src\t# long, #@negL_reg" %}
++  ins_cost(LOAD_COST);
++  format %{ "lwu  $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %}
 +
 +  ins_encode %{
-+    // actually call the sub
-+    __ neg(as_Register($dst$$reg),
-+           as_Register($src$$reg));
++    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Integer Multiply
++// Load Float
++instruct loadF(fRegF dst, memory mem)
++%{
++  match(Set dst (LoadF mem));
 +
-+instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (MulI src1 src2));
-+  ins_cost(IMUL_COST);
-+  format %{ "mulw  $dst, $src1, $src2\t#@mulI" %}
++  ins_cost(LOAD_COST);
++  format %{ "flw  $dst, $mem\t# float, #@loadF" %}
 +
-+  //this means 2 word multi, and no sign extend to 64 bits
 +  ins_encode %{
-+    // riscv64 mulw will sign-extension to high 32 bits in dst reg
-+    __ mulw(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(imul_reg_reg);
++  ins_pipe(fp_load_mem_s);
 +%}
 +
-+// Long Multiply
++// Load Double
++instruct loadD(fRegD dst, memory mem)
++%{
++  match(Set dst (LoadD mem));
 +
-+instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (MulL src1 src2));
-+  ins_cost(IMUL_COST);
-+  format %{ "mul  $dst, $src1, $src2\t#@mulL" %}
++  ins_cost(LOAD_COST);
++  format %{ "fld  $dst, $mem\t# double, #@loadD" %}
 +
 +  ins_encode %{
-+    __ mul(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(lmul_reg_reg);
++  ins_pipe(fp_load_mem_d);
 +%}
 +
-+instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2)
++// Load Int Constant
++instruct loadConI(iRegINoSp dst, immI src)
 +%{
-+  match(Set dst (MulHiL src1 src2));
-+  ins_cost(IMUL_COST);
-+  format %{ "mulh  $dst, $src1, $src2\t# mulhi, #@mulHiL_rReg" %}
++  match(Set dst src);
 +
-+  ins_encode %{
-+    __ mulh(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
-+  %}
++  ins_cost(ALU_COST);
++  format %{ "li $dst, $src\t# int, #@loadConI" %}
 +
-+  ins_pipe(lmul_reg_reg);
++  ins_encode(riscv_enc_li_imm(dst, src));
++
++  ins_pipe(ialu_imm);
 +%}
 +
-+// Integer Divide
++// Load Long Constant
++instruct loadConL(iRegLNoSp dst, immL src)
++%{
++  match(Set dst src);
 +
-+instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (DivI src1 src2));
-+  ins_cost(IDIVSI_COST);
-+  format %{ "divw  $dst, $src1, $src2\t#@divI"%}
++  ins_cost(ALU_COST);
++  format %{ "li $dst, $src\t# long, #@loadConL" %}
 +
-+  ins_encode(riscv_enc_divw(dst, src1, src2));
-+  ins_pipe(idiv_reg_reg);
++  ins_encode(riscv_enc_li_imm(dst, src));
++
++  ins_pipe(ialu_imm);
 +%}
 +
-+instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
-+  match(Set dst (URShiftI (RShiftI src1 div1) div2));
++// Load Pointer Constant
++instruct loadConP(iRegPNoSp dst, immP con)
++%{
++  match(Set dst con);
++
 +  ins_cost(ALU_COST);
-+  format %{ "srliw $dst, $src1, $div1\t# int signExtract, #@signExtract" %}
++  format %{ "mv  $dst, $con\t# ptr, #@loadConP" %}
 +
-+  ins_encode %{
-+    __ srliw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
-+  %}
-+  ins_pipe(ialu_reg_shift);
++  ins_encode(riscv_enc_mov_p(dst, con));
++
++  ins_pipe(ialu_imm);
 +%}
 +
-+// Long Divide
++// Load Null Pointer Constant
++instruct loadConP0(iRegPNoSp dst, immP0 con)
++%{
++  match(Set dst con);
 +
-+instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (DivL src1 src2));
-+  ins_cost(IDIVDI_COST);
-+  format %{ "div  $dst, $src1, $src2\t#@divL" %}
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $con\t# NULL ptr, #@loadConP0" %}
 +
-+  ins_encode(riscv_enc_div(dst, src1, src2));
-+  ins_pipe(ldiv_reg_reg);
++  ins_encode(riscv_enc_mov_zero(dst));
++
++  ins_pipe(ialu_imm);
 +%}
 +
-+instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
-+  match(Set dst (URShiftL (RShiftL src1 div1) div2));
++// Load Pointer Constant One
++instruct loadConP1(iRegPNoSp dst, immP_1 con)
++%{
++  match(Set dst con);
++
 +  ins_cost(ALU_COST);
-+  format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %}
++  format %{ "mv  $dst, $con\t# load ptr constant one, #@loadConP1" %}
 +
-+  ins_encode %{
-+    __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63);
-+  %}
-+  ins_pipe(ialu_reg_shift);
++  ins_encode(riscv_enc_mov_p1(dst));
++
++  ins_pipe(ialu_imm);
 +%}
 +
-+// Integer Remainder
++// Load Byte Map Base Constant
++instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
++%{
++  match(Set dst con);
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $con\t# Byte Map Base, #@loadByteMapBase" %}
 +
-+instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (ModI src1 src2));
-+  ins_cost(IDIVSI_COST);
-+  format %{ "remw  $dst, $src1, $src2\t#@modI" %}
++  ins_encode(riscv_enc_mov_byte_map_base(dst));
 +
-+  ins_encode(riscv_enc_modw(dst, src1, src2));
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(ialu_imm);
 +%}
 +
-+// Long Remainder
++// Load Narrow Pointer Constant
++instruct loadConN(iRegNNoSp dst, immN con)
++%{
++  match(Set dst con);
 +
-+instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (ModL src1 src2));
-+  ins_cost(IDIVDI_COST);
-+  format %{ "rem  $dst, $src1, $src2\t#@modL" %}
++  ins_cost(ALU_COST * 4);
++  format %{ "mv  $dst, $con\t# compressed ptr, #@loadConN" %}
 +
-+  ins_encode(riscv_enc_mod(dst, src1, src2));
-+  ins_pipe(ialu_reg_reg);
++  ins_encode(riscv_enc_mov_n(dst, con));
++
++  ins_pipe(ialu_imm);
 +%}
 +
-+// Integer Shifts
++// Load Narrow Null Pointer Constant
++instruct loadConN0(iRegNNoSp dst, immN0 con)
++%{
++  match(Set dst con);
 +
-+// Shift Left Register
-+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
-+instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (LShiftI src1 src2));
 +  ins_cost(ALU_COST);
-+  format %{ "sllw  $dst, $src1, $src2\t#@lShiftI_reg_reg" %}
++  format %{ "mv  $dst, $con\t# compressed NULL ptr, #@loadConN0" %}
 +
-+  ins_encode %{
-+    __ sllw(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
-+  %}
++  ins_encode(riscv_enc_mov_zero(dst));
 +
-+  ins_pipe(ialu_reg_reg_vshift);
++  ins_pipe(ialu_imm);
 +%}
 +
-+// Shift Left Immediate
-+instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
-+  match(Set dst (LShiftI src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "slliw  $dst, $src1, ($src2 & 0x1f)\t#@lShiftI_reg_imm" %}
++// Load Narrow Klass Constant
++instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
++%{
++  match(Set dst con);
 +
-+  ins_encode %{
-+    // the shift amount is encoded in the lower
-+    // 5 bits of the I-immediate field for RV32I
-+    __ slliw(as_Register($dst$$reg),
-+             as_Register($src1$$reg),
-+             (unsigned) $src2$$constant & 0x1f);
-+  %}
++  ins_cost(ALU_COST * 6);
++  format %{ "mv  $dst, $con\t# compressed klass ptr, #@loadConNKlass" %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_encode(riscv_enc_mov_nk(dst, con));
++
++  ins_pipe(ialu_imm);
 +%}
 +
-+// Shift Right Logical Register
-+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
-+instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (URShiftI src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "srlw  $dst, $src1, $src2\t#@urShiftI_reg_reg" %}
++// Load Float Constant
++instruct loadConF(fRegF dst, immF con) %{
++  match(Set dst con);
 +
-+  ins_encode %{
-+    __ srlw(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++  ins_cost(LOAD_COST);
++  format %{
++    "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF"
 +  %}
 +
-+  ins_pipe(ialu_reg_reg_vshift);
-+%}
-+
-+// Shift Right Logical Immediate
-+instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
-+  match(Set dst (URShiftI src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "srliw  $dst, $src1, ($src2 & 0x1f)\t#@urShiftI_reg_imm" %}
-+
 +  ins_encode %{
-+    // the shift amount is encoded in the lower
-+    // 6 bits of the I-immediate field for RV64I
-+    __ srliw(as_Register($dst$$reg),
-+             as_Register($src1$$reg),
-+             (unsigned) $src2$$constant & 0x1f);
++    __ flw(as_FloatRegister($dst$$reg), $constantaddress($con));
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(fp_load_constant_s);
 +%}
 +
-+// Shift Right Arithmetic Register
-+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
-+instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (RShiftI src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "sraw  $dst, $src1, $src2\t#@rShiftI_reg_reg" %}
++instruct loadConF0(fRegF dst, immF0 con) %{
++  match(Set dst con);
++
++  ins_cost(XFER_COST);
++
++  format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %}
 +
 +  ins_encode %{
-+    // riscv will sign-ext dst high 32 bits
-+    __ sraw(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    __ fmv_w_x(as_FloatRegister($dst$$reg), zr);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg_vshift);
++  ins_pipe(fp_load_constant_s);
 +%}
 +
-+// Shift Right Arithmetic Immediate
-+instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
-+  match(Set dst (RShiftI src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "sraiw  $dst, $src1, ($src2 & 0x1f)\t#@rShiftI_reg_imm" %}
++// Load Double Constant
++instruct loadConD(fRegD dst, immD con) %{
++  match(Set dst con);
++
++  ins_cost(LOAD_COST);
++  format %{
++    "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD"
++  %}
 +
 +  ins_encode %{
-+    // riscv will sign-ext dst high 32 bits
-+    __ sraiw(as_Register($dst$$reg),
-+             as_Register($src1$$reg),
-+             (unsigned) $src2$$constant & 0x1f);
++    __ fld(as_FloatRegister($dst$$reg), $constantaddress($con));
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(fp_load_constant_d);
 +%}
 +
-+// Long Shifts
++instruct loadConD0(fRegD dst, immD0 con) %{
++  match(Set dst con);
 +
-+// Shift Left Register
-+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
-+instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
-+  match(Set dst (LShiftL src1 src2));
++  ins_cost(XFER_COST);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "sll  $dst, $src1, $src2\t#@lShiftL_reg_reg" %}
++  format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %}
 +
 +  ins_encode %{
-+    __ sll(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
++    __ fmv_d_x(as_FloatRegister($dst$$reg), zr);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg_vshift);
++  ins_pipe(fp_load_constant_d);
 +%}
 +
-+// Shift Left Immediate
-+instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
-+  match(Set dst (LShiftL src1 src2));
++// Store Instructions
++// Store CMS card-mark Immediate
++instruct storeimmCM0(immI0 zero, memory mem)
++%{
++  match(Set mem (StoreCM mem zero));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "slli  $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %}
++  ins_cost(STORE_COST);
++  format %{ "storestore (elided)\n\t"
++            "sb zr, $mem\t# byte, #@storeimmCM0" %}
 +
 +  ins_encode %{
-+    // the shift amount is encoded in the lower
-+    // 6 bits of the I-immediate field for RV64I
-+    __ slli(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            (unsigned) $src2$$constant & 0x3f);
++    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(istore_mem);
 +%}
 +
-+// Shift Right Logical Register
-+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
-+instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
-+  match(Set dst (URShiftL src1 src2));
++// Store CMS card-mark Immediate with intervening StoreStore
++// needed when using CMS with no conditional card marking
++instruct storeimmCM0_ordered(immI0 zero, memory mem)
++%{
++  match(Set mem (StoreCM mem zero));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "srl  $dst, $src1, $src2\t#@urShiftL_reg_reg" %}
++  ins_cost(ALU_COST + STORE_COST);
++  format %{ "membar(StoreStore)\n\t"
++            "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %}
 +
 +  ins_encode %{
-+    __ srl(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg_vshift);
++  ins_pipe(istore_mem);
 +%}
 +
-+// Shift Right Logical Immediate
-+instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
-+  match(Set dst (URShiftL src1 src2));
++// Store Byte
++instruct storeB(iRegIorL2I src, memory mem)
++%{
++  match(Set mem (StoreB mem src));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "srli  $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %}
++  ins_cost(STORE_COST);
++  format %{ "sb  $src, $mem\t# byte, #@storeB" %}
 +
 +  ins_encode %{
-+    // the shift amount is encoded in the lower
-+    // 6 bits of the I-immediate field for RV64I
-+    __ srli(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            (unsigned) $src2$$constant & 0x3f);
++    __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+// A special-case pattern for card table stores.
-+instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
-+  match(Set dst (URShiftL (CastP2X src1) src2));
++instruct storeimmB0(immI0 zero, memory mem)
++%{
++  match(Set mem (StoreB mem zero));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "srli  $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %}
++  ins_cost(STORE_COST);
++  format %{ "sb zr, $mem\t# byte, #@storeimmB0" %}
 +
 +  ins_encode %{
-+    // the shift amount is encoded in the lower
-+    // 6 bits of the I-immediate field for RV64I
-+    __ srli(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            (unsigned) $src2$$constant & 0x3f);
++    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(istore_mem);
 +%}
 +
-+// Shift Right Arithmetic Register
-+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
-+instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
-+  match(Set dst (RShiftL src1 src2));
++// Store Char/Short
++instruct storeC(iRegIorL2I src, memory mem)
++%{
++  match(Set mem (StoreC mem src));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "sra  $dst, $src1, $src2\t#@rShiftL_reg_reg" %}
++  ins_cost(STORE_COST);
++  format %{ "sh  $src, $mem\t# short, #@storeC" %}
 +
 +  ins_encode %{
-+    __ sra(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
++    __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg_vshift);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+// Shift Right Arithmetic Immediate
-+instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
-+  match(Set dst (RShiftL src1 src2));
++instruct storeimmC0(immI0 zero, memory mem)
++%{
++  match(Set mem (StoreC mem zero));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "srai  $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %}
++  ins_cost(STORE_COST);
++  format %{ "sh  zr, $mem\t# short, #@storeimmC0" %}
 +
 +  ins_encode %{
-+    // the shift amount is encoded in the lower
-+    // 6 bits of the I-immediate field for RV64I
-+    __ srai(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            (unsigned) $src2$$constant & 0x3f);
++    __ sh(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(istore_mem);
 +%}
 +
-+instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{
-+  match(Set dst (XorI src1 m1));
-+  ins_cost(ALU_COST);
-+  format %{ "xori  $dst, $src1, -1\t#@regI_not_reg" %}
++// Store Integer
++instruct storeI(iRegIorL2I src, memory mem)
++%{
++  match(Set mem(StoreI mem src));
++
++  ins_cost(STORE_COST);
++  format %{ "sw  $src, $mem\t# int, #@storeI" %}
 +
 +  ins_encode %{
-+    __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{
-+  match(Set dst (XorL src1 m1));
-+  ins_cost(ALU_COST);
-+  format %{ "xori  $dst, $src1, -1\t#@regL_not_reg" %}
++instruct storeimmI0(immI0 zero, memory mem)
++%{
++  match(Set mem(StoreI mem zero));
++
++  ins_cost(STORE_COST);
++  format %{ "sw  zr, $mem\t# int, #@storeimmI0" %}
 +
 +  ins_encode %{
-+    __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
++    __ sw(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(istore_mem);
 +%}
 +
++// Store Long (64 bit signed)
++instruct storeL(iRegL src, memory mem)
++%{
++  match(Set mem (StoreL mem src));
 +
-+// ============================================================================
-+// Floating Point Arithmetic Instructions
-+
-+instruct addF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
-+  match(Set dst (AddF src1 src2));
-+
-+  ins_cost(FMUL_SINGLE_COST);
-+  format %{ "fadd.s  $dst, $src1, $src2\t#@addF_reg_reg" %}
++  ins_cost(STORE_COST);
++  format %{ "sd  $src, $mem\t# long, #@storeL" %}
 +
 +  ins_encode %{
-+    __ fadd_s(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_s);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+instruct addD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
-+  match(Set dst (AddD src1 src2));
++// Store Long (64 bit signed)
++instruct storeimmL0(immL0 zero, memory mem)
++%{
++  match(Set mem (StoreL mem zero));
 +
-+  ins_cost(FMUL_DOUBLE_COST);
-+  format %{ "fadd.d  $dst, $src1, $src2\t#@addD_reg_reg" %}
++  ins_cost(STORE_COST);
++  format %{ "sd  zr, $mem\t# long, #@storeimmL0" %}
 +
 +  ins_encode %{
-+    __ fadd_d(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_d);
++  ins_pipe(istore_mem);
 +%}
 +
-+instruct subF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
-+  match(Set dst (SubF src1 src2));
++// Store Pointer
++instruct storeP(iRegP src, memory mem)
++%{
++  match(Set mem (StoreP mem src));
 +
-+  ins_cost(FMUL_SINGLE_COST);
-+  format %{ "fsub.s  $dst, $src1, $src2\t#@subF_reg_reg" %}
++  ins_cost(STORE_COST);
++  format %{ "sd  $src, $mem\t# ptr, #@storeP" %}
 +
 +  ins_encode %{
-+    __ fsub_s(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_s);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+instruct subD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
-+  match(Set dst (SubD src1 src2));
++// Store Pointer
++instruct storeimmP0(immP0 zero, memory mem)
++%{
++  match(Set mem (StoreP mem zero));
 +
-+  ins_cost(FMUL_DOUBLE_COST);
-+  format %{ "fsub.d  $dst, $src1, $src2\t#@subD_reg_reg" %}
++  ins_cost(STORE_COST);
++  format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %}
 +
 +  ins_encode %{
-+    __ fsub_d(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_d);
++  ins_pipe(istore_mem);
 +%}
 +
-+instruct mulF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
-+  match(Set dst (MulF src1 src2));
++// Store Compressed Pointer
++instruct storeN(iRegN src, memory mem)
++%{
++  match(Set mem (StoreN mem src));
 +
-+  ins_cost(FMUL_SINGLE_COST);
-+  format %{ "fmul.s  $dst, $src1, $src2\t#@mulF_reg_reg" %}
++  ins_cost(STORE_COST);
++  format %{ "sw  $src, $mem\t# compressed ptr, #@storeN" %}
 +
 +  ins_encode %{
-+    __ fmul_s(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_s);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+instruct mulD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
-+  match(Set dst (MulD src1 src2));
++instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
++%{
++  match(Set mem (StoreN mem zero));
 +
-+  ins_cost(FMUL_DOUBLE_COST);
-+  format %{ "fmul.d  $dst, $src1, $src2\t#@mulD_reg_reg" %}
++  ins_cost(STORE_COST);
++  format %{ "sw  rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %}
 +
 +  ins_encode %{
-+    __ fmul_d(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++    __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_d);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+// src1 * src2 + src3
-+instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaF src3 (Binary src1 src2)));
++// Store Float
++instruct storeF(fRegF src, memory mem)
++%{
++  match(Set mem (StoreF mem src));
 +
-+  ins_cost(FMUL_SINGLE_COST);
-+  format %{ "fmadd.s  $dst, $src1, $src2, $src3\t#@maddF_reg_reg" %}
++  ins_cost(STORE_COST);
++  format %{ "fsw  $src, $mem\t# float, #@storeF" %}
 +
 +  ins_encode %{
-+    __ fmadd_s(as_FloatRegister($dst$$reg),
-+               as_FloatRegister($src1$$reg),
-+               as_FloatRegister($src2$$reg),
-+               as_FloatRegister($src3$$reg));
++    __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(fp_store_reg_s);
 +%}
 +
-+// src1 * src2 + src3
-+instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaD src3 (Binary src1 src2)));
++// Store Double
++instruct storeD(fRegD src, memory mem)
++%{
++  match(Set mem (StoreD mem src));
 +
-+  ins_cost(FMUL_DOUBLE_COST);
-+  format %{ "fmadd.d  $dst, $src1, $src2, $src3\t#@maddD_reg_reg" %}
++  ins_cost(STORE_COST);
++  format %{ "fsd  $src, $mem\t# double, #@storeD" %}
 +
 +  ins_encode %{
-+    __ fmadd_d(as_FloatRegister($dst$$reg),
-+               as_FloatRegister($src1$$reg),
-+               as_FloatRegister($src2$$reg),
-+               as_FloatRegister($src3$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(fp_store_reg_d);
 +%}
 +
-+// src1 * src2 - src3
-+instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
++// Store Compressed Klass Pointer
++instruct storeNKlass(iRegN src, memory mem)
++%{
++  match(Set mem (StoreNKlass mem src));
 +
-+  ins_cost(FMUL_SINGLE_COST);
-+  format %{ "fmsub.s  $dst, $src1, $src2, $src3\t#@msubF_reg_reg" %}
++  ins_cost(STORE_COST);
++  format %{ "sw  $src, $mem\t# compressed klass ptr, #@storeNKlass" %}
 +
 +  ins_encode %{
-+    __ fmsub_s(as_FloatRegister($dst$$reg),
-+               as_FloatRegister($src1$$reg),
-+               as_FloatRegister($src2$$reg),
-+               as_FloatRegister($src3$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+// src1 * src2 - src3
-+instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
-+
-+  ins_cost(FMUL_DOUBLE_COST);
-+  format %{ "fmsub.d  $dst, $src1, $src2, $src3\t#@msubD_reg_reg" %}
++// ============================================================================
++// Atomic operation instructions
++//
++// Intel and SPARC both implement Ideal Node LoadPLocked and
++// Store{PIL}Conditional instructions using a normal load for the
++// LoadPLocked and a CAS for the Store{PIL}Conditional.
++//
++// The ideal code appears only to use LoadPLocked/storePConditional as a
++// pair to lock object allocations from Eden space when not using
++// TLABs.
++//
++// There does not appear to be a Load{IL}Locked Ideal Node and the
++// Ideal code appears to use Store{IL}Conditional as an alias for CAS
++// and to use StoreIConditional only for 32-bit and StoreLConditional
++// only for 64-bit.
++//
++// We implement LoadPLocked and storePConditional instructions using,
++// respectively the RISCV hw load-reserve and store-conditional
++// instructions. Whereas we must implement each of
++// Store{IL}Conditional using a CAS which employs a pair of
++// instructions comprising a load-reserve followed by a
++// store-conditional.
 +
-+  ins_encode %{
-+    __ fmsub_d(as_FloatRegister($dst$$reg),
-+               as_FloatRegister($src1$$reg),
-+               as_FloatRegister($src2$$reg),
-+               as_FloatRegister($src3$$reg));
-+  %}
 +
-+  ins_pipe(pipe_class_default);
-+%}
++// Locked-load (load reserved) of the current heap-top
++// used when updating the eden heap top
++// implemented using lr_d on RISCV64
++instruct loadPLocked(iRegPNoSp dst, indirect mem)
++%{
++  match(Set dst (LoadPLocked mem));
 +
-+// -src1 * src2 + src3
-+instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
-+  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
++  ins_cost(ALU_COST * 2 + LOAD_COST);
 +
-+  ins_cost(FMUL_SINGLE_COST);
-+  format %{ "fnmsub.s  $dst, $src1, $src2, $src3\t#@nmsubF_reg_reg" %}
++  format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %}
 +
 +  ins_encode %{
-+    __ fnmsub_s(as_FloatRegister($dst$$reg),
-+                as_FloatRegister($src1$$reg),
-+                as_FloatRegister($src2$$reg),
-+                as_FloatRegister($src3$$reg));
++    __ la(t0, Address(as_Register($mem$$base), $mem$$disp));
++    __ lr_d($dst$$Register, t0, Assembler::aq);
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// -src1 * src2 + src3
-+instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
-+  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
++// Conditional-store of the updated heap-top.
++// Used during allocation of the shared heap.
++// implemented using sc_d on RISCV64.
++instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
++%{
++  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
 +
-+  ins_cost(FMUL_DOUBLE_COST);
-+  format %{ "fnmsub.d  $dst, $src1, $src2, $src3\t#@nmsubD_reg_reg" %}
++  ins_cost(ALU_COST * 2 + STORE_COST);
++
++  format %{
++    "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional"
++  %}
 +
 +  ins_encode %{
-+    __ fnmsub_d(as_FloatRegister($dst$$reg),
-+                as_FloatRegister($src1$$reg),
-+                as_FloatRegister($src2$$reg),
-+                as_FloatRegister($src3$$reg));
++    __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp));
++    __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl);
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// -src1 * src2 - src3
-+instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
-+  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
++instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
++%{
++  match(Set cr (StoreLConditional mem (Binary oldval newval)));
 +
-+  ins_cost(FMUL_SINGLE_COST);
-+  format %{ "fnmadd.s  $dst, $src1, $src2, $src3\t#@nmaddF_reg_reg" %}
++  ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST);
++
++  format %{
++    "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
++    "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional"
++  %}
 +
 +  ins_encode %{
-+    __ fnmadd_s(as_FloatRegister($dst$$reg),
-+                as_FloatRegister($src1$$reg),
-+                as_FloatRegister($src2$$reg),
-+                as_FloatRegister($src3$$reg));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
++    __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// -src1 * src2 - src3
-+instruct nmaddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
-+  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
++// storeIConditional also has acquire semantics, for no better reason
++// than matching storeLConditional.
++instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
++%{
++  match(Set cr (StoreIConditional mem (Binary oldval newval)));
 +
-+  ins_cost(FMUL_DOUBLE_COST);
-+  format %{ "fnmadd.d  $dst, $src1, $src2, $src3\t#@nmaddD_reg_reg" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2);
++
++  format %{
++    "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
++    "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional"
++  %}
 +
 +  ins_encode %{
-+    __ fnmadd_d(as_FloatRegister($dst$$reg),
-+                as_FloatRegister($src1$$reg),
-+                as_FloatRegister($src2$$reg),
-+                as_FloatRegister($src3$$reg));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
++    __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Math.max(FF)F
-+instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
-+  match(Set dst (MaxF src1 src2));
-+  effect(TEMP_DEF dst, USE src1, USE src2);
++// standard CompareAndSwapX when we are using barriers
++// these have higher priority than the rules selected by a predicate
++instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                         iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++%{
++  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
 +
-+  ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_SINGLE_COST + BRANCH_COST);
-+  format %{ "fsflags zr\t#@maxF_reg_reg\n\t"
-+            "fmax.s  $dst, $src1, $src2\n\t"
-+            "flt.s   zr, $src1, $src2\n\t"
-+            "frflags t0\n\t"
-+            "beqz    t0, Ldone\n\t"
-+            "fadd.s  $dst, $src1, $src2" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
++
++  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
++
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB"
++  %}
 +
 +  ins_encode %{
-+    __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
-+                 as_FloatRegister($src2$$reg), /* is_double */ false, /* is_min */ false);
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
++                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_s);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Math.min(FF)F
-+instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
-+  match(Set dst (MinF src1 src2));
-+  effect(TEMP_DEF dst, USE src1, USE src2);
++instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                         iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++%{
++  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
++
++  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
 +
-+  ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_SINGLE_COST + BRANCH_COST);
-+  format %{ "fsflags zr\t#@minF_reg_reg\n\t"
-+            "fmin.s  $dst, $src1, $src2\n\t"
-+            "flt.s   zr, $src1, $src2\n\t"
-+            "frflags t0\n\t"
-+            "beqz    t0, Ldone\n\t"
-+            "fadd.s  $dst, $src1, $src2" %}
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS"
++  %}
 +
 +  ins_encode %{
-+    __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
-+                 as_FloatRegister($src2$$reg), /* is_double */ false, /* is_min */ true);
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
++                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_s);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Math.max(DD)D
-+instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
-+  match(Set dst (MaxD src1 src2));
-+  effect(TEMP_DEF dst, USE src1, USE src2);
++instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval)
++%{
++  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
 +
-+  ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_DOUBLE_COST + BRANCH_COST);
-+  format %{ "fsflags zr\t#@maxD_reg_reg\n\t"
-+            "fmax.d  $dst, $src1, $src2\n\t"
-+            "flt.d   zr, $src1, $src2\n\t"
-+            "frflags t0\n\t"
-+            "beqz    t0, Ldone\n\t"
-+            "fadd.d  $dst, $src1, $src2" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
 +
-+  ins_encode %{
-+    __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
-+                 as_FloatRegister($src2$$reg), /* is_double */ true, /* is_min */ false);
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI"
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_d);
++  ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Math.min(DD)D
-+instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
-+  match(Set dst (MinD src1 src2));
-+  effect(TEMP_DEF dst, USE src1, USE src2);
++instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval)
++%{
++  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
 +
-+  ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_DOUBLE_COST + BRANCH_COST);
-+  format %{ "fsflags zr\t#@minD_reg_reg\n\t"
-+            "fmin.d  $dst, $src1, $src2\n\t"
-+            "flt.d   zr, $src1, $src2\n\t"
-+            "frflags t0\n\t"
-+            "beqz    t0, Ldone\n\t"
-+            "fadd.d  $dst, $src1, $src2" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
 +
-+  ins_encode %{
-+    __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
-+                 as_FloatRegister($src2$$reg), /* is_double */ true, /* is_min */ true);
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL"
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_d);
++  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct divF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
-+  match(Set dst (DivF src1  src2));
++instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
++%{
++  predicate(n->as_LoadStore()->barrier_data() == 0);
 +
-+  ins_cost(FDIV_COST);
-+  format %{ "fdiv.s  $dst, $src1, $src2\t#@divF_reg_reg" %}
++  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
 +
-+  ins_encode %{
-+    __ fdiv_s(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP"
 +  %}
 +
-+  ins_pipe(fp_div_s);
++  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct divD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
-+  match(Set dst (DivD src1  src2));
++instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval)
++%{
++  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
 +
-+  ins_cost(FDIV_COST);
-+  format %{ "fdiv.d  $dst, $src1, $src2\t#@divD_reg_reg" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
 +
-+  ins_encode %{
-+    __ fdiv_d(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN"
 +  %}
 +
-+  ins_pipe(fp_div_d);
++  ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct negF_reg_reg(fRegF dst, fRegF src) %{
-+  match(Set dst (NegF src));
++// alternative CompareAndSwapX when we are eliding barriers
++instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                            iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fsgnjn.s  $dst, $src, $src\t#@negF_reg_reg" %}
++  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
++
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq"
++  %}
 +
 +  ins_encode %{
-+    __ fneg_s(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src$$reg));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
++                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(fp_uop_s);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct negD_reg_reg(fRegD dst, fRegD src) %{
-+  match(Set dst (NegD src));
++instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                            iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fsgnjn.d  $dst, $src, $src\t#@negD_reg_reg" %}
++  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
++
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq"
++  %}
 +
 +  ins_encode %{
-+    __ fneg_d(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src$$reg));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
++                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(fp_uop_d);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{
-+  match(Set dst (AbsI src));
++instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(ALU_COST * 3);
-+  format %{ "sraiw t0, $src, 0x1f\n\t"
-+            "xorr $dst, $src, t0\n\t"
-+            "subw $dst, $dst, t0\t#@absI_reg" %}
++  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
 +
-+  ins_encode %{
-+    __ sraiw(t0, as_Register($src$$reg), 0x1f);
-+    __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0);
-+    __ subw(as_Register($dst$$reg), as_Register($dst$$reg), t0);
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq"
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct absI2L_reg(iRegLNoSp dst, iRegIorL2I src) %{
-+  match(Set dst (ConvI2L (AbsI src)));
++instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(ALU_COST * 3);
-+  format %{ "sraiw t0, $src, 0x1f\n\t"
-+            "xorr $dst, $src, t0\n\t"
-+            "subw $dst, $dst, t0\t#@absI2L_reg" %}
++  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
 +
-+  ins_encode %{
-+    __ sraiw(t0, as_Register($src$$reg), 0x1f);
-+    __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0);
-+    __ subw(as_Register($dst$$reg), as_Register($dst$$reg), t0);
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq"
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct absL_reg(iRegLNoSp dst, iRegL src) %{
-+  match(Set dst (AbsL src));
++instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
++%{
++  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
 +
-+  ins_cost(ALU_COST * 3);
-+  format %{ "srai t0, $src, 0x3f\n\t"
-+            "xorr $dst, $src, t0\n\t"
-+            "sub $dst, $dst, t0\t#absL_reg" %}
++  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
 +
-+  ins_encode %{
-+    __ srai(t0, as_Register($src$$reg), 0x3f);
-+    __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0);
-+    __ sub(as_Register($dst$$reg), as_Register($dst$$reg), t0);
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq"
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct absF_reg(fRegF dst, fRegF src) %{
-+  match(Set dst (AbsF src));
-+
-+  ins_cost(XFER_COST);
-+  format %{ "fsgnjx.s  $dst, $src, $src\t#@absF_reg" %}
-+  ins_encode %{
-+    __ fabs_s(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src$$reg));
-+  %}
++instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_pipe(fp_uop_s);
-+%}
++  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
 +
-+instruct absD_reg(fRegD dst, fRegD src) %{
-+  match(Set dst (AbsD src));
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fsgnjx.d  $dst, $src, $src\t#@absD_reg" %}
-+  ins_encode %{
-+    __ fabs_d(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src$$reg));
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq"
 +  %}
 +
-+  ins_pipe(fp_uop_d);
++  ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct sqrtF_reg(fRegF dst, fRegF src) %{
-+  match(Set dst (SqrtF src));
++// Sundry CAS operations.  Note that release is always true,
++// regardless of the memory ordering of the CAS.  This is because we
++// need the volatile case to be sequentially consistent but there is
++// no trailing StoreLoad barrier emitted by C2.  Unfortunately we
++// can't check the type of memory ordering here, so we always emit a
++// sc_d(w) with rl bit set.
++instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++%{
++  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
 +
-+  ins_cost(FSQRT_COST);
-+  format %{ "fsqrt.s  $dst, $src\t#@sqrtF_reg" %}
-+  ins_encode %{
-+    __ fsqrt_s(as_FloatRegister($dst$$reg),
-+               as_FloatRegister($src$$reg));
-+  %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
 +
-+  ins_pipe(fp_sqrt_s);
-+%}
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
-+instruct sqrtD_reg(fRegD dst, fRegD src) %{
-+  match(Set dst (SqrtD src));
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB"
++  %}
 +
-+  ins_cost(FSQRT_COST);
-+  format %{ "fsqrt.d  $dst, $src\t#@sqrtD_reg" %}
 +  ins_encode %{
-+    __ fsqrt_d(as_FloatRegister($dst$$reg),
-+               as_FloatRegister($src$$reg));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(fp_sqrt_d);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Arithmetic Instructions End
++instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++%{
++  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
 +
-+// ============================================================================
-+// Logical Instructions
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
 +
-+// Register And
-+instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
-+  match(Set dst (AndI src1 src2));
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
-+  format %{ "andr  $dst, $src1, $src2\t#@andI_reg_reg" %}
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS"
++  %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ andr(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Immediate And
-+instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
-+  match(Set dst (AndI src1 src2));
++instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++%{
++  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
 +
-+  format %{ "andi  $dst, $src1, $src2\t#@andI_reg_imm" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++
++  effect(TEMP_DEF res);
++
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI"
++  %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ andi(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            (int32_t)($src2$$constant));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Register Or
-+instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
-+  match(Set dst (OrI src1 src2));
++instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
++%{
++  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
 +
-+  format %{ "orr  $dst, $src1, $src2\t#@orI_reg_reg" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++
++  effect(TEMP_DEF res);
++
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL"
++  %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ orr(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Immediate Or
-+instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
-+  match(Set dst (OrI src1 src2));
++instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
++%{
++  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
 +
-+  format %{ "ori  $dst, $src1, $src2\t#@orI_reg_imm" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3);
++
++  effect(TEMP_DEF res);
++
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN"
++  %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ ori(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           (int32_t)($src2$$constant));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Register Xor
-+instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
-+  match(Set dst (XorI src1 src2));
++instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
++%{
++  predicate(n->as_LoadStore()->barrier_data() == 0);
++  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
 +
-+  format %{ "xorr  $dst, $src1, $src2\t#@xorI_reg_reg" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++
++  effect(TEMP_DEF res);
++
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP"
++  %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ xorr(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Immediate Xor
-+instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
-+  match(Set dst (XorI src1 src2));
-+
-+  format %{ "xori  $dst, $src1, $src2\t#@xorI_reg_imm" %}
++instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ xori(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            (int32_t)($src2$$constant));
-+  %}
++  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
 +
-+  ins_pipe(ialu_reg_imm);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
 +
-+// Register And Long
-+instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (AndL src1 src2));
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
-+  format %{ "andr  $dst, $src1, $src2\t#@andL_reg_reg" %}
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq"
++  %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ andr(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Immediate And Long
-+instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
-+  match(Set dst (AndL src1 src2));
-+
-+  format %{ "andi  $dst, $src1, $src2\t#@andL_reg_imm" %}
++instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ andi(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            (int32_t)($src2$$constant));
-+  %}
++  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
 +
-+  ins_pipe(ialu_reg_imm);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
 +
-+// Register Or Long
-+instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (OrL src1 src2));
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
-+  format %{ "orr  $dst, $src1, $src2\t#@orL_reg_reg" %}
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq"
++  %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ orr(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Immediate Or Long
-+instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
-+  match(Set dst (OrL src1 src2));
++instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  format %{ "ori  $dst, $src1, $src2\t#@orL_reg_imm" %}
++  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++
++  effect(TEMP_DEF res);
++
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq"
++  %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ ori(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           (int32_t)($src2$$constant));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Register Xor Long
-+instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (XorL src1 src2));
++instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  format %{ "xorr  $dst, $src1, $src2\t#@xorL_reg_reg" %}
++  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++
++  effect(TEMP_DEF res);
++
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq"
++  %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ xorr(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Immediate Xor Long
-+instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
-+  match(Set dst (XorL src1 src2));
++instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "xori  $dst, $src1, $src2\t#@xorL_reg_imm" %}
++  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++
++  effect(TEMP_DEF res);
++
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq"
++  %}
 +
 +  ins_encode %{
-+    __ xori(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            (int32_t)($src2$$constant));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// ============================================================================
-+// BSWAP Instructions
++instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
++%{
++  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
 +
-+instruct bytes_reverse_int(rFlagsReg cr, iRegINoSp dst, iRegIorL2I src) %{
-+  match(Set dst (ReverseBytesI src));
-+  effect(TEMP cr);
++  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
 +
-+  ins_cost(ALU_COST * 13);
-+  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++
++  effect(TEMP_DEF res);
++
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq"
++  %}
 +
 +  ins_encode %{
-+    __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct bytes_reverse_long(rFlagsReg cr, iRegLNoSp dst, iRegL src) %{
-+  match(Set dst (ReverseBytesL src));
-+  effect(TEMP cr);
++instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++%{
++  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
 +
-+  ins_cost(ALU_COST * 29);
-+  format %{ "revb  $dst, $src\t#@bytes_reverse_long" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
++
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{
++    "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapB"
++  %}
 +
 +  ins_encode %{
-+    __ revb(as_Register($dst$$reg), as_Register($src$$reg));
++    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
-+  match(Set dst (ReverseBytesUS src));
++instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++%{
++  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
 +
-+  ins_cost(ALU_COST * 5);
-+  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
++
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{
++    "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapS"
++  %}
 +
 +  ins_encode %{
-+    __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
++    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
-+  match(Set dst (ReverseBytesS src));
++instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++%{
++  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
 +
-+  ins_cost(ALU_COST * 5);
-+  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++
++  format %{
++    "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapI"
++  %}
 +
 +  ins_encode %{
-+    __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// ============================================================================
-+// MemBar Instruction
++instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
++%{
++  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
 +
-+instruct load_fence() %{
-+  match(LoadFence);
-+  ins_cost(ALU_COST);
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
 +
-+  format %{ "#@load_fence" %}
++  format %{
++    "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapL"
++  %}
 +
 +  ins_encode %{
-+    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
-+  ins_pipe(pipe_serial);
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct membar_acquire() %{
-+  match(MemBarAcquire);
-+  ins_cost(ALU_COST);
++instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
++%{
++  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
 +
-+  format %{ "#@membar_acquire\n\t"
-+            "fence ir iorw" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
++
++  format %{
++    "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapN"
++  %}
 +
 +  ins_encode %{
-+    __ block_comment("membar_acquire");
-+    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct membar_acquire_lock() %{
-+  match(MemBarAcquireLock);
-+  ins_cost(0);
++instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
++%{
++  predicate(n->as_LoadStore()->barrier_data() == 0);
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
 +
-+  format %{ "#@membar_acquire_lock (elided)" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++
++  format %{
++    "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapP"
++  %}
 +
 +  ins_encode %{
-+    __ block_comment("membar_acquire_lock (elided)");
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct store_fence() %{
-+  match(StoreFence);
-+  ins_cost(ALU_COST);
++instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  format %{ "#@store_fence" %}
++  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
 +
-+  ins_encode %{
-+    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+  %}
-+  ins_pipe(pipe_serial);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
 +
-+instruct membar_release() %{
-+  match(MemBarRelease);
-+  ins_cost(ALU_COST);
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
-+  format %{ "#@membar_release\n\t"
-+            "fence iorw ow" %}
++  format %{
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapBAcq"
++  %}
 +
 +  ins_encode %{
-+    __ block_comment("membar_release");
-+    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
-+  ins_pipe(pipe_serial);
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct membar_storestore() %{
-+  match(MemBarStoreStore);
-+  ins_cost(ALU_COST);
++instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  format %{ "MEMBAR-store-store\t#@membar_storestore" %}
++  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
 +
-+  ins_encode %{
-+    __ membar(MacroAssembler::StoreStore);
-+  %}
-+  ins_pipe(pipe_serial);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
 +
-+instruct membar_release_lock() %{
-+  match(MemBarReleaseLock);
-+  ins_cost(0);
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
-+  format %{ "#@membar_release_lock (elided)" %}
++  format %{
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapSAcq"
++  %}
 +
 +  ins_encode %{
-+    __ block_comment("membar_release_lock (elided)");
++    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct membar_volatile() %{
-+  match(MemBarVolatile);
-+  ins_cost(ALU_COST);
++instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  format %{ "#@membar_volatile\n\t"
-+             "fence iorw iorw"%}
++  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++
++  format %{
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapIAcq"
++  %}
 +
 +  ins_encode %{
-+    __ block_comment("membar_volatile");
-+    __ membar(MacroAssembler::StoreLoad);
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// ============================================================================
-+// Cast Instructions (Java-level type cast)
++instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+instruct castX2P(iRegPNoSp dst, iRegL src) %{
-+  match(Set dst (CastX2P src));
++  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $src\t# long -> ptr, #@castX2P" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++
++  format %{
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapLAcq"
++  %}
 +
 +  ins_encode %{
-+    if ($dst$$reg != $src$$reg) {
-+      __ mv(as_Register($dst$$reg), as_Register($src$$reg));
-+    }
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct castP2X(iRegLNoSp dst, iRegP src) %{
-+  match(Set dst (CastP2X src));
++instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $src\t# ptr -> long, #@castP2X" %}
++  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
++
++  format %{
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapNAcq"
++  %}
 +
 +  ins_encode %{
-+    if ($dst$$reg != $src$$reg) {
-+      __ mv(as_Register($dst$$reg), as_Register($src$$reg));
-+    }
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct castPP(iRegPNoSp dst)
++instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
 +%{
-+  match(Set dst (CastPP dst));
-+  ins_cost(0);
++  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
 +
-+  size(0);
-+  format %{ "# castPP of $dst, #@castPP" %}
-+  ins_encode(/* empty encoding */);
-+  ins_pipe(pipe_class_empty);
-+%}
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
 +
-+instruct castII(iRegI dst)
-+%{
-+  match(Set dst (CastII dst));
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
 +
-+  size(0);
-+  format %{ "# castII of $dst, #@castII" %}
-+  ins_encode(/* empty encoding */);
-+  ins_cost(0);
-+  ins_pipe(pipe_class_empty);
-+%}
++  format %{
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapPAcq"
++  %}
 +
-+instruct checkCastPP(iRegPNoSp dst)
-+%{
-+  match(Set dst (CheckCastPP dst));
++  ins_encode %{
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
++  %}
 +
-+  size(0);
-+  ins_cost(0);
-+  format %{ "# checkcastPP of $dst, #@checkCastPP" %}
-+  ins_encode(/* empty encoding */);
-+  ins_pipe(pipe_class_empty);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// ============================================================================
-+// Convert Instructions
-+
-+// int to bool
-+instruct convI2Bool(iRegINoSp dst, iRegI src)
++instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev)
 +%{
-+  match(Set dst (Conv2B src));
++  match(Set prev (GetAndSetI mem newv));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "snez  $dst, $src\t#@convI2Bool" %}
++
++  format %{ "atomic_xchgw  $prev, $newv, [$mem]\t#@get_and_setI" %}
 +
 +  ins_encode %{
-+    __ snez(as_Register($dst$$reg), as_Register($src$$reg));
++    __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// pointer to bool
-+instruct convP2Bool(iRegINoSp dst, iRegP src)
++instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev)
 +%{
-+  match(Set dst (Conv2B src));
++  match(Set prev (GetAndSetL mem newv));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "snez  $dst, $src\t#@convP2Bool" %}
++
++  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setL" %}
 +
 +  ins_encode %{
-+    __ snez(as_Register($dst$$reg), as_Register($src$$reg));
++    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// int <-> long
-+
-+instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
++instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev)
 +%{
-+  match(Set dst (ConvI2L src));
++  match(Set prev (GetAndSetN mem newv));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "addw  $dst, $src, zr\t#@convI2L_reg_reg" %}
-+  ins_encode %{
-+    __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
-+  %}
-+  ins_pipe(ialu_reg);
-+%}
-+
-+instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
-+  match(Set dst (ConvL2I src));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "addw  $dst, $src, zr\t#@convL2I_reg" %}
++  format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %}
 +
 +  ins_encode %{
-+    __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
++    __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// int to unsigned long (Zero-extend)
-+instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
++instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev)
 +%{
-+  match(Set dst (AndL (ConvI2L src) mask));
++  predicate(n->as_LoadStore()->barrier_data() == 0);
++  match(Set prev (GetAndSetP mem newv));
 +
-+  ins_cost(ALU_COST * 2);
-+  format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %}
++  ins_cost(ALU_COST);
++
++  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setP" %}
 +
 +  ins_encode %{
-+    __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32);
++    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// float <-> double
++instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+instruct convF2D_reg(fRegD dst, fRegF src) %{
-+  match(Set dst (ConvF2D src));
++  match(Set prev (GetAndSetI mem newv));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.d.s  $dst, $src\t#@convF2D_reg" %}
++  ins_cost(ALU_COST);
++
++  format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]\t#@get_and_setIAcq" %}
 +
 +  ins_encode %{
-+    __ fcvt_d_s(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
++    __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_f2d);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct convD2F_reg(fRegF dst, fRegD src) %{
-+  match(Set dst (ConvD2F src));
++instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.s.d  $dst, $src\t#@convD2F_reg" %}
++  match(Set prev (GetAndSetL mem newv));
++
++  ins_cost(ALU_COST);
++
++  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setLAcq" %}
 +
 +  ins_encode %{
-+    __ fcvt_s_d(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
++    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_d2f);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// float <-> int
++instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{
-+  match(Set dst (ConvF2I src));
++  match(Set prev (GetAndSetN mem newv));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.w.s  $dst, $src\t#@convF2I_reg_reg" %}
++  ins_cost(ALU_COST);
++
++  format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %}
 +
 +  ins_encode %{
-+    __ fcvt_w_s_safe($dst$$Register, $src$$FloatRegister);
++    __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_f2i);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct convI2F_reg_reg(fRegF dst, iRegIorL2I src) %{
-+  match(Set dst (ConvI2F src));
++instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev)
++%{
++  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.s.w  $dst, $src\t#@convI2F_reg_reg" %}
++  match(Set prev (GetAndSetP mem newv));
++
++  ins_cost(ALU_COST);
++
++  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setPAcq" %}
 +
 +  ins_encode %{
-+    __ fcvt_s_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
++    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_i2f);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// float <-> long
++instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr)
++%{
++  match(Set newval (GetAndAddL mem incr));
 +
-+instruct convF2L_reg_reg(iRegLNoSp dst, fRegF src) %{
-+  match(Set dst (ConvF2L src));
++  ins_cost(ALU_COST);
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.l.s  $dst, $src\t#@convF2L_reg_reg" %}
++  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %}
 +
 +  ins_encode %{
-+    __ fcvt_l_s_safe($dst$$Register, $src$$FloatRegister);
++    __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_f2l);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct convL2F_reg_reg(fRegF dst, iRegL src) %{
-+  match(Set dst (ConvL2F src));
++instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr)
++%{
++  predicate(n->as_LoadStore()->result_not_used());
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.s.l  $dst, $src\t#@convL2F_reg_reg" %}
++  match(Set dummy (GetAndAddL mem incr));
++
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %}
 +
 +  ins_encode %{
-+    __ fcvt_s_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
++    __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_l2f);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// double <-> int
++instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr)
++%{
++  match(Set newval (GetAndAddL mem incr));
 +
-+instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{
-+  match(Set dst (ConvD2I src));
++  ins_cost(ALU_COST);
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.w.d  $dst, $src\t#@convD2I_reg_reg" %}
++  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %}
 +
 +  ins_encode %{
-+    __ fcvt_w_d_safe($dst$$Register, $src$$FloatRegister);
++    __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_d2i);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct convI2D_reg_reg(fRegD dst, iRegIorL2I src) %{
-+  match(Set dst (ConvI2D src));
++instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr)
++%{
++  predicate(n->as_LoadStore()->result_not_used());
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.d.w  $dst, $src\t#@convI2D_reg_reg" %}
++  match(Set dummy (GetAndAddL mem incr));
++
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %}
 +
 +  ins_encode %{
-+    __ fcvt_d_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
++    __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_i2d);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// double <-> long
++instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr)
++%{
++  match(Set newval (GetAndAddI mem incr));
 +
-+instruct convD2L_reg_reg(iRegLNoSp dst, fRegD src) %{
-+  match(Set dst (ConvD2L src));
++  ins_cost(ALU_COST);
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.l.d  $dst, $src\t#@convD2L_reg_reg" %}
++  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %}
 +
 +  ins_encode %{
-+    __ fcvt_l_d_safe($dst$$Register, $src$$FloatRegister);
++    __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_d2l);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct convL2D_reg_reg(fRegD dst, iRegL src) %{
-+  match(Set dst (ConvL2D src));
-+
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.d.l  $dst, $src\t#@convL2D_reg_reg" %}
-+
-+  ins_encode %{
-+    __ fcvt_d_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
-+  %}
++instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr)
++%{
++  predicate(n->as_LoadStore()->result_not_used());
 +
-+  ins_pipe(fp_l2d);
-+%}
++  match(Set dummy (GetAndAddI mem incr));
 +
-+// Convert oop into int for vectors alignment masking
-+instruct convP2I(iRegINoSp dst, iRegP src) %{
-+  match(Set dst (ConvL2I (CastP2X src)));
++  ins_cost(ALU_COST);
 +
-+  ins_cost(ALU_COST * 2);
-+  format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %}
++  format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %}
 +
 +  ins_encode %{
-+    __ zero_extend($dst$$Register, $src$$Register, 32);
++    __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// Convert compressed oop into int for vectors alignment masking
-+// in case of 32bit oops (heap < 4Gb).
-+instruct convN2I(iRegINoSp dst, iRegN src)
++instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr)
 +%{
-+  predicate(Universe::narrow_oop_shift() == 0);
-+  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
++  match(Set newval (GetAndAddI mem incr));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $src\t# compressed ptr -> int, #@convN2I" %}
++
++  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %}
 +
 +  ins_encode %{
-+    __ mv($dst$$Register, $src$$Register);
++    __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// Convert oop pointer into compressed form
-+instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
-+  match(Set dst (EncodeP src));
-+  ins_cost(ALU_COST);
-+  format %{ "encode_heap_oop  $dst, $src\t#@encodeHeapOop" %}
-+  ins_encode %{
-+    Register s = $src$$Register;
-+    Register d = $dst$$Register;
-+    __ encode_heap_oop(d, s);
-+  %}
-+  ins_pipe(ialu_reg);
-+%}
++instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr)
++%{
++  predicate(n->as_LoadStore()->result_not_used());
 +
-+instruct decodeHeapOop(iRegPNoSp dst, iRegN src) %{
-+  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
-+            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
-+  match(Set dst (DecodeN src));
++  match(Set dummy (GetAndAddI mem incr));
 +
-+  ins_cost(0);
-+  format %{ "decode_heap_oop  $dst, $src\t#@decodeHeapOop" %}
-+  ins_encode %{
-+    Register s = $src$$Register;
-+    Register d = $dst$$Register;
-+    __ decode_heap_oop(d, s);
-+  %}
-+  ins_pipe(ialu_reg);
-+%}
++  ins_cost(ALU_COST);
 +
-+instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src) %{
-+  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
-+            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
-+  match(Set dst (DecodeN src));
++  format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %}
 +
-+  ins_cost(0);
-+  format %{ "decode_heap_oop_not_null $dst, $src\t#@decodeHeapOop_not_null" %}
 +  ins_encode %{
-+    Register s = $src$$Register;
-+    Register d = $dst$$Register;
-+    __ decode_heap_oop_not_null(d, s);
++    __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
 +  %}
-+  ins_pipe(ialu_reg);
++
++  ins_pipe(pipe_serial);
 +%}
 +
-+// Convert klass pointer into compressed form.
-+instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
-+  match(Set dst (EncodePKlass src));
++instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
++
++  match(Set newval (GetAndAddL mem incr));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "encode_klass_not_null  $dst, $src\t#@encodeKlass_not_null" %}
++
++  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %}
 +
 +  ins_encode %{
-+    Register src_reg = as_Register($src$$reg);
-+    Register dst_reg = as_Register($dst$$reg);
-+    __ encode_klass_not_null(dst_reg, src_reg, t0);
++    __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+   ins_pipe(ialu_reg);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
-+  predicate(!maybe_use_tmp_register_decoding_klass());
++instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
++  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
 +
-+  match(Set dst (DecodeNKlass src));
++  match(Set dummy (GetAndAddL mem incr));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "decode_klass_not_null  $dst, $src\t#@decodeKlass_not_null" %}
++
++  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %}
 +
 +  ins_encode %{
-+    Register src_reg = as_Register($src$$reg);
-+    Register dst_reg = as_Register($dst$$reg);
-+    __ decode_klass_not_null(dst_reg, src_reg, UseCompressedOops ? xheapbase : t0);
++    __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+   ins_pipe(ialu_reg);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct decodeKlass_not_null_with_tmp(iRegPNoSp dst, iRegN src, rFlagsReg tmp) %{
-+  predicate(maybe_use_tmp_register_decoding_klass());
-+
-+  match(Set dst (DecodeNKlass src));
++instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  effect(TEMP tmp);
++  match(Set newval (GetAndAddL mem incr));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "decode_klass_not_null  $dst, $src\t#@decodeKlass_not_null" %}
++
++  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %}
 +
 +  ins_encode %{
-+    Register src_reg = as_Register($src$$reg);
-+    Register dst_reg = as_Register($dst$$reg);
-+    Register tmp_reg = as_Register($tmp$$reg);
-+    __ decode_klass_not_null(dst_reg, src_reg, tmp_reg);
++    __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+   ins_pipe(ialu_reg);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// stack <-> reg and reg <-> reg shuffles with no conversion
-+
-+instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
-+
-+  match(Set dst (MoveF2I src));
++instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr)
++%{
++  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
 +
-+  effect(DEF dst, USE src);
++  match(Set dummy (GetAndAddL mem incr));
 +
-+  ins_cost(LOAD_COST);
++  ins_cost(ALU_COST);
 +
-+  format %{ "lw  $dst, $src\t#@MoveF2I_stack_reg" %}
++  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %}
 +
 +  ins_encode %{
-+    __ lw(as_Register($dst$$reg), Address(sp, $src$$disp));
++    __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(iload_reg_reg);
-+
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct MoveI2F_stack_reg(fRegF dst, stackSlotI src) %{
-+
-+  match(Set dst (MoveI2F src));
++instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  effect(DEF dst, USE src);
++  match(Set newval (GetAndAddI mem incr));
 +
-+  ins_cost(LOAD_COST);
++  ins_cost(ALU_COST);
 +
-+  format %{ "flw  $dst, $src\t#@MoveI2F_stack_reg" %}
++  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %}
 +
 +  ins_encode %{
-+    __ flw(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
++    __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_class_memory);
-+
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
-+
-+  match(Set dst (MoveD2L src));
++instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr)
++%{
++  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
 +
-+  effect(DEF dst, USE src);
++  match(Set dummy (GetAndAddI mem incr));
 +
-+  ins_cost(LOAD_COST);
++  ins_cost(ALU_COST);
 +
-+  format %{ "ld  $dst, $src\t#@MoveD2L_stack_reg" %}
++  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %}
 +
 +  ins_encode %{
-+    __ ld(as_Register($dst$$reg), Address(sp, $src$$disp));
++    __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(iload_reg_reg);
-+
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{
-+
-+  match(Set dst (MoveL2D src));
++instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  effect(DEF dst, USE src);
++  match(Set newval (GetAndAddI mem incr));
 +
-+  ins_cost(LOAD_COST);
++  ins_cost(ALU_COST);
 +
-+  format %{ "fld  $dst, $src\t#@MoveL2D_stack_reg" %}
++  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %}
 +
 +  ins_encode %{
-+    __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
++    __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_class_memory);
-+
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct MoveF2I_reg_stack(stackSlotI dst, fRegF src) %{
-+
-+  match(Set dst (MoveF2I src));
++instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr)
++%{
++  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
 +
-+  effect(DEF dst, USE src);
++  match(Set dummy (GetAndAddI mem incr));
 +
-+  ins_cost(STORE_COST);
++  ins_cost(ALU_COST);
 +
-+  format %{ "fsw  $src, $dst\t#@MoveF2I_reg_stack" %}
++  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %}
 +
 +  ins_encode %{
-+    __ fsw(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
++    __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_class_memory);
-+
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
-+
-+  match(Set dst (MoveI2F src));
-+
-+  effect(DEF dst, USE src);
++// ============================================================================
++// Arithmetic Instructions
++//
 +
-+  ins_cost(STORE_COST);
++// Integer Addition
 +
-+  format %{ "sw  $src, $dst\t#@MoveI2F_reg_stack" %}
++// TODO
++// these currently employ operations which do not set CR and hence are
++// not flagged as killing CR but we would like to isolate the cases
++// where we want to set flags from those where we don't. need to work
++// out how to do that.
++instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (AddI src1 src2));
++
++  ins_cost(ALU_COST);
++  format %{ "addw  $dst, $src1, $src2\t#@addI_reg_reg" %}
 +
 +  ins_encode %{
-+    __ sw(as_Register($src$$reg), Address(sp, $dst$$disp));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ addw(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(istore_reg_reg);
-+
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{
-+
-+  match(Set dst (MoveD2L src));
-+
-+  effect(DEF dst, USE src);
-+
-+  ins_cost(STORE_COST);
++instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{
++  match(Set dst (AddI src1 src2));
 +
-+  format %{ "fsd  $dst, $src\t#@MoveD2L_reg_stack" %}
++  ins_cost(ALU_COST);
++  format %{ "addiw  $dst, $src1, $src2\t#@addI_reg_imm" %}
 +
 +  ins_encode %{
-+    __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
++    Assembler::CompressibleRegion cr(&_masm);
++    int32_t con = (int32_t)$src2$$constant;
++    __ addiw(as_Register($dst$$reg),
++             as_Register($src1$$reg),
++             $src2$$constant);
 +  %}
 +
-+  ins_pipe(pipe_class_memory);
-+
++  ins_pipe(ialu_reg_imm);
 +%}
 +
-+instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
-+
-+  match(Set dst (MoveL2D src));
-+
-+  effect(DEF dst, USE src);
-+
-+  ins_cost(STORE_COST);
++instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{
++  match(Set dst (AddI (ConvL2I src1) src2));
 +
-+  format %{ "sd  $src, $dst\t#@MoveL2D_reg_stack" %}
++  ins_cost(ALU_COST);
++  format %{ "addiw  $dst, $src1, $src2\t#@addI_reg_imm_l2i" %}
 +
 +  ins_encode %{
-+    __ sd(as_Register($src$$reg), Address(sp, $dst$$disp));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ addiw(as_Register($dst$$reg),
++             as_Register($src1$$reg),
++             $src2$$constant);
 +  %}
 +
-+  ins_pipe(istore_reg_reg);
-+
++  ins_pipe(ialu_reg_imm);
 +%}
 +
-+instruct MoveF2I_reg_reg(iRegINoSp dst, fRegF src) %{
-+
-+  match(Set dst (MoveF2I src));
-+
-+  effect(DEF dst, USE src);
-+
-+  ins_cost(XFER_COST);
++// Pointer Addition
++instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
++  match(Set dst (AddP src1 src2));
 +
-+  format %{ "fmv.x.w  $dst, $src\t#@MoveL2D_reg_stack" %}
++  ins_cost(ALU_COST);
++  format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %}
 +
 +  ins_encode %{
-+    __ fmv_x_w(as_Register($dst$$reg), as_FloatRegister($src$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ add(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(fp_f2i);
-+
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct MoveI2F_reg_reg(fRegF dst, iRegI src) %{
-+
-+  match(Set dst (MoveI2F src));
-+
-+  effect(DEF dst, USE src);
-+
-+  ins_cost(XFER_COST);
-+
-+  format %{ "fmv.w.x  $dst, $src\t#@MoveI2F_reg_reg" %}
++// If we shift more than 32 bits, we need not convert I2L.
++instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{
++  match(Set dst (LShiftL (ConvI2L src) scale));
++  ins_cost(ALU_COST);
++  format %{ "slli  $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %}
 +
 +  ins_encode %{
-+    __ fmv_w_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63);
 +  %}
 +
-+  ins_pipe(fp_i2f);
-+
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+instruct MoveD2L_reg_reg(iRegLNoSp dst, fRegD src) %{
-+
-+  match(Set dst (MoveD2L src));
++// Pointer Immediate Addition
++// n.b. this needs to be more expensive than using an indirect memory
++// operand
++instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{
++  match(Set dst (AddP src1 src2));
++  ins_cost(ALU_COST);
++  format %{ "addi  $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %}
 +
-+  effect(DEF dst, USE src);
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    // src2 is imm, so actually call the addi
++    __ add(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           $src2$$constant);
++  %}
 +
-+  ins_cost(XFER_COST);
++  ins_pipe(ialu_reg_imm);
++%}
 +
-+  format %{ "fmv.x.d $dst, $src\t#@MoveD2L_reg_reg" %}
++// Long Addition
++instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (AddL src1 src2));
++  ins_cost(ALU_COST);
++  format %{ "add  $dst, $src1, $src2\t#@addL_reg_reg" %}
 +
 +  ins_encode %{
-+    __ fmv_x_d(as_Register($dst$$reg), as_FloatRegister($src$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ add(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(fp_d2l);
-+
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct MoveL2D_reg_reg(fRegD dst, iRegL src) %{
++// No constant pool entries requiredLong Immediate Addition.
++instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
++  match(Set dst (AddL src1 src2));
++  ins_cost(ALU_COST);
++  format %{ "addi  $dst, $src1, $src2\t#@addL_reg_imm" %}
 +
-+  match(Set dst (MoveL2D src));
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    // src2 is imm, so actually call the addi
++    __ add(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           $src2$$constant);
++  %}
 +
-+  effect(DEF dst, USE src);
++  ins_pipe(ialu_reg_imm);
++%}
 +
-+  ins_cost(XFER_COST);
++// Integer Subtraction
++instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (SubI src1 src2));
 +
-+  format %{ "fmv.d.x  $dst, $src\t#@MoveD2L_reg_reg" %}
++  ins_cost(ALU_COST);
++  format %{ "subw  $dst, $src1, $src2\t#@subI_reg_reg" %}
 +
 +  ins_encode %{
-+    __ fmv_d_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ subw(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(fp_l2d);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+// ============================================================================
-+// Compare Instructions which set the result float comparisons in dest register.
-+
-+instruct cmpF3_reg_reg(iRegINoSp dst, fRegF op1, fRegF op2)
-+%{
-+  match(Set dst (CmpF3 op1 op2));
++// Immediate Subtraction
++instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{
++  match(Set dst (SubI src1 src2));
 +
-+  ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
-+  format %{ "flt.s  $dst, $op2, $op1\t#@cmpF3_reg_reg\n\t"
-+            "bgtz   $dst, done\n\t"
-+            "feq.s  $dst, $op1, $op2\n\t"
-+            "addi   $dst, $dst, -1\t#@cmpF3_reg_reg"
-+  %}
++  ins_cost(ALU_COST);
++  format %{ "addiw  $dst, $src1, -$src2\t#@subI_reg_imm" %}
 +
 +  ins_encode %{
-+    // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
-+    __ float_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg),
-+                     as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
++    Assembler::CompressibleRegion cr(&_masm);
++    // src2 is imm, so actually call the addiw
++    __ subw(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            $src2$$constant);
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(ialu_reg_imm);
 +%}
 +
-+instruct cmpD3_reg_reg(iRegINoSp dst, fRegD op1, fRegD op2)
-+%{
-+  match(Set dst (CmpD3 op1 op2));
-+
-+  ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
-+  format %{ "flt.d  $dst, $op2, $op1\t#@cmpD3_reg_reg\n\t"
-+            "bgtz   $dst, done\n\t"
-+            "feq.d  $dst, $op1, $op2\n\t"
-+            "addi   $dst, $dst, -1\t#@cmpD3_reg_reg"
-+  %}
++// Long Subtraction
++instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(ALU_COST);
++  format %{ "sub  $dst, $src1, $src2\t#@subL_reg_reg" %}
 +
 +  ins_encode %{
-+    // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
-+    __ double_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sub(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2)
-+%{
-+  match(Set dst (CmpL3 op1 op2));
++// No constant pool entries requiredLong Immediate Subtraction.
++instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(ALU_COST);
++  format %{ "addi  $dst, $src1, -$src2\t#@subL_reg_imm" %}
 +
-+  ins_cost(ALU_COST * 3 + BRANCH_COST);
-+  format %{ "slt   $dst, $op2, $op1\t#@cmpL3_reg_reg\n\t"
-+            "bnez  $dst, done\n\t"
-+            "slt  $dst, $op1, $op2\n\t"
-+            "neg   $dst, $dst\t#@cmpL3_reg_reg"
-+  %}
 +  ins_encode %{
-+    __ cmp_l2i(t0, as_Register($op1$$reg), as_Register($op2$$reg));
-+    __ mv(as_Register($dst$$reg), t0);
++    Assembler::CompressibleRegion cr(&_masm);
++    // src2 is imm, so actually call the addi
++    __ sub(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           $src2$$constant);
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(ialu_reg_imm);
 +%}
 +
-+instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q)
-+%{
-+  match(Set dst (CmpLTMask p q));
-+
-+  ins_cost(2 * ALU_COST);
++// Integer Negation (special case for sub)
 +
-+  format %{ "slt $dst, $p, $q\t#@cmpLTMask_reg_reg\n\t"
-+            "subw $dst, zr, $dst\t#@cmpLTMask_reg_reg"
-+  %}
++instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
++  match(Set dst (SubI zero src));
++  ins_cost(ALU_COST);
++  format %{ "subw  $dst, x0, $src\t# int, #@negI_reg" %}
 +
 +  ins_encode %{
-+    __ slt(as_Register($dst$$reg), as_Register($p$$reg), as_Register($q$$reg));
-+    __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
++    // actually call the subw
++    __ negw(as_Register($dst$$reg),
++            as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I op, immI0 zero)
-+%{
-+  match(Set dst (CmpLTMask op zero));
++// Long Negation
 +
++instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero) %{
++  match(Set dst (SubL zero src));
 +  ins_cost(ALU_COST);
-+
-+  format %{ "sraiw $dst, $dst, 31\t#@cmpLTMask_reg_reg" %}
++  format %{ "sub  $dst, x0, $src\t# long, #@negL_reg" %}
 +
 +  ins_encode %{
-+    __ sraiw(as_Register($dst$$reg), as_Register($op$$reg), 31);
++    // actually call the sub
++    __ neg(as_Register($dst$$reg),
++           as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(ialu_reg);
 +%}
 +
++// Integer Multiply
 +
-+// ============================================================================
-+// Max and Min
-+
-+instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
-+%{
-+  match(Set dst (MinI src1 src2));
-+
-+  effect(DEF dst, USE src1, USE src2);
-+
-+  ins_cost(BRANCH_COST + ALU_COST * 2);
-+  format %{
-+    "ble $src1, $src2, Lsrc1.\t#@minI_rReg\n\t"
-+    "mv $dst, $src2\n\t"
-+    "j Ldone\n\t"
-+    "bind Lsrc1\n\t"
-+    "mv $dst, $src1\n\t"
-+    "bind\t#@minI_rReg"
-+  %}
++instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (MulI src1 src2));
++  ins_cost(IMUL_COST);
++  format %{ "mulw  $dst, $src1, $src2\t#@mulI" %}
 +
++  //this means 2 word multi, and no sign extend to 64 bits
 +  ins_encode %{
-+    Label Lsrc1, Ldone;
-+    __ ble(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
-+    __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
-+    __ j(Ldone);
-+    __ bind(Lsrc1);
-+    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
-+    __ bind(Ldone);
++    // riscv64 mulw will sign-extension to high 32 bits in dst reg
++    __ mulw(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(imul_reg_reg);
 +%}
 +
-+instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
-+%{
-+  match(Set dst (MaxI src1 src2));
-+
-+  effect(DEF dst, USE src1, USE src2);
++// Long Multiply
 +
-+  ins_cost(BRANCH_COST + ALU_COST * 2);
-+  format %{
-+    "bge $src1, $src2, Lsrc1\t#@maxI_rReg\n\t"
-+    "mv $dst, $src2\n\t"
-+    "j Ldone\n\t"
-+    "bind Lsrc1\n\t"
-+    "mv $dst, $src1\n\t"
-+    "bind\t#@maxI_rReg"
-+  %}
++instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (MulL src1 src2));
++  ins_cost(IMUL_COST);
++  format %{ "mul  $dst, $src1, $src2\t#@mulL" %}
 +
 +  ins_encode %{
-+    Label Lsrc1, Ldone;
-+    __ bge(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
-+    __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
-+    __ j(Ldone);
-+    __ bind(Lsrc1);
-+    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
-+    __ bind(Ldone);
-+
++    __ mul(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(lmul_reg_reg);
 +%}
 +
-+// ============================================================================
-+// Branch Instructions
-+// Direct Branch.
-+instruct branch(label lbl)
++instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2)
 +%{
-+  match(Goto);
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
-+  format %{ "j  $lbl\t#@branch" %}
++  match(Set dst (MulHiL src1 src2));
++  ins_cost(IMUL_COST);
++  format %{ "mulh  $dst, $src1, $src2\t# mulhi, #@mulHiL_rReg" %}
 +
-+  ins_encode(riscv_enc_j(lbl));
++  ins_encode %{
++    __ mulh(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
++  %}
 +
-+  ins_pipe(pipe_branch);
++  ins_pipe(lmul_reg_reg);
 +%}
 +
-+// ============================================================================
-+// Compare and Branch Instructions
++// Integer Divide
 +
-+// Patterns for short (< 12KiB) variants
++instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (DivI src1 src2));
++  ins_cost(IDIVSI_COST);
++  format %{ "divw  $dst, $src1, $src2\t#@divI"%}
 +
-+// Compare flags and branch near instructions.
-+instruct cmpFlag_branch(cmpOpEqNe cmp, rFlagsReg cr, label lbl) %{
-+  match(If cmp cr);
-+  effect(USE lbl);
++  ins_encode(riscv_enc_divw(dst, src1, src2));
++  ins_pipe(idiv_reg_reg);
++%}
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp  $cr, zr, $lbl\t#@cmpFlag_branch" %}
++instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
++  match(Set dst (URShiftI (RShiftI src1 div1) div2));
++  ins_cost(ALU_COST);
++  format %{ "srliw $dst, $src1, $div1\t# int signExtract, #@signExtract" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label));
++    __ srliw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
 +  %}
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+// Compare signed int and branch near instructions
-+instruct cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpI_branch'.
-+  match(If cmp (CmpI op1 op2));
++// Long Divide
 +
-+  effect(USE lbl);
++instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (DivL src1 src2));
++  ins_cost(IDIVDI_COST);
++  format %{ "div  $dst, $src1, $src2\t#@divL" %}
 +
-+  ins_cost(BRANCH_COST);
++  ins_encode(riscv_enc_div(dst, src1, src2));
++  ins_pipe(ldiv_reg_reg);
++%}
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpI_branch" %}
++instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
++  match(Set dst (URShiftL (RShiftL src1 div1) div2));
++  ins_cost(ALU_COST);
++  format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63);
 +  %}
-+
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+instruct cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpI_loop'.
-+  match(CountedLoopEnd cmp (CmpI op1 op2));
++// Integer Remainder
 +
-+  effect(USE lbl);
++instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (ModI src1 src2));
++  ins_cost(IDIVSI_COST);
++  format %{ "remw  $dst, $src1, $src2\t#@modI" %}
 +
-+  ins_cost(BRANCH_COST);
++  ins_encode(riscv_enc_modw(dst, src1, src2));
++  ins_pipe(ialu_reg_reg);
++%}
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpI_loop" %}
++// Long Remainder
 +
-+  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
-+  %}
++instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (ModL src1 src2));
++  ins_cost(IDIVDI_COST);
++  format %{ "rem  $dst, $src1, $src2\t#@modL" %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_encode(riscv_enc_mod(dst, src1, src2));
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+// Compare unsigned int and branch near instructions
-+instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpU_branch'.
-+  match(If cmp (CmpU op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++// Integer Shifts
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_branch" %}
++// Shift Left Register
++// In RV64I, only the low 5 bits of src2 are considered for the shift amount
++instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (LShiftI src1 src2));
++  ins_cost(ALU_COST);
++  format %{ "sllw  $dst, $src1, $src2\t#@lShiftI_reg_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
++    __ sllw(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_reg_vshift);
 +%}
 +
-+instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpU_loop'.
-+  match(CountedLoopEnd cmp (CmpU op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
-+
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_loop" %}
++// Shift Left Immediate
++instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
++  match(Set dst (LShiftI src1 src2));
++  ins_cost(ALU_COST);
++  format %{ "slliw  $dst, $src1, ($src2 & 0x1f)\t#@lShiftI_reg_imm" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
++    // the shift amount is encoded in the lower
++    // 5 bits of the I-immediate field for RV32I
++    __ slliw(as_Register($dst$$reg),
++             as_Register($src1$$reg),
++             (unsigned) $src2$$constant & 0x1f);
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+// Compare signed long and branch near instructions
-+instruct cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpL_branch'.
-+  match(If cmp (CmpL op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
-+
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpL_branch" %}
++// Shift Right Logical Register
++// In RV64I, only the low 5 bits of src2 are considered for the shift amount
++instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (URShiftI src1 src2));
++  ins_cost(ALU_COST);
++  format %{ "srlw  $dst, $src1, $src2\t#@urShiftI_reg_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
++    __ srlw(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_reg_vshift);
 +%}
 +
-+instruct cmpL_loop(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpL_loop'.
-+  match(CountedLoopEnd cmp (CmpL op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
-+
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpL_loop" %}
++// Shift Right Logical Immediate
++instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
++  match(Set dst (URShiftI src1 src2));
++  ins_cost(ALU_COST);
++  format %{ "srliw  $dst, $src1, ($src2 & 0x1f)\t#@urShiftI_reg_imm" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
++    // the shift amount is encoded in the lower
++    // 6 bits of the I-immediate field for RV64I
++    __ srliw(as_Register($dst$$reg),
++             as_Register($src1$$reg),
++             (unsigned) $src2$$constant & 0x1f);
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+// Compare unsigned long and branch near instructions
-+instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpUL_branch'.
-+  match(If cmp (CmpUL op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_branch" %}
++// Shift Right Arithmetic Register
++// In RV64I, only the low 5 bits of src2 are considered for the shift amount
++instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (RShiftI src1 src2));
++  ins_cost(ALU_COST);
++  format %{ "sraw  $dst, $src1, $src2\t#@rShiftI_reg_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
++    // riscv will sign-ext dst high 32 bits
++    __ sraw(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_reg_vshift);
 +%}
 +
-+instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpUL_loop'.
-+  match(CountedLoopEnd cmp (CmpUL op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_loop" %}
++// Shift Right Arithmetic Immediate
++instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
++  match(Set dst (RShiftI src1 src2));
++  ins_cost(ALU_COST);
++  format %{ "sraiw  $dst, $src1, ($src2 & 0x1f)\t#@rShiftI_reg_imm" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
++    // riscv will sign-ext dst high 32 bits
++    __ sraiw(as_Register($dst$$reg),
++             as_Register($src1$$reg),
++             (unsigned) $src2$$constant & 0x1f);
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+// Compare pointer and branch near instructions
-+instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpP_branch'.
-+  match(If cmp (CmpP op1 op2));
-+
-+  effect(USE lbl);
++// Long Shifts
 +
-+  ins_cost(BRANCH_COST);
++// Shift Left Register
++// In RV64I, only the low 6 bits of src2 are considered for the shift amount
++instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
++  match(Set dst (LShiftL src1 src2));
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_branch" %}
++  ins_cost(ALU_COST);
++  format %{ "sll  $dst, $src1, $src2\t#@lShiftL_reg_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
++    __ sll(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_reg_vshift);
 +%}
 +
-+instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpP_loop'.
-+  match(CountedLoopEnd cmp (CmpP op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++// Shift Left Immediate
++instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
++  match(Set dst (LShiftL src1 src2));
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_loop" %}
++  ins_cost(ALU_COST);
++  format %{ "slli  $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
++    Assembler::CompressibleRegion cr(&_masm);
++    // the shift amount is encoded in the lower
++    // 6 bits of the I-immediate field for RV64I
++    __ slli(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (unsigned) $src2$$constant & 0x3f);
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+// Compare narrow pointer and branch near instructions
-+instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpN_branch'.
-+  match(If cmp (CmpN op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++// Shift Right Logical Register
++// In RV64I, only the low 6 bits of src2 are considered for the shift amount
++instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
++  match(Set dst (URShiftL src1 src2));
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_branch" %}
++  ins_cost(ALU_COST);
++  format %{ "srl  $dst, $src1, $src2\t#@urShiftL_reg_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
++    __ srl(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_reg_vshift);
 +%}
 +
-+instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpN_loop'.
-+  match(CountedLoopEnd cmp (CmpN op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++// Shift Right Logical Immediate
++instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
++  match(Set dst (URShiftL src1 src2));
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_loop" %}
++  ins_cost(ALU_COST);
++  format %{ "srli  $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
++    Assembler::CompressibleRegion cr(&_masm);
++    // the shift amount is encoded in the lower
++    // 6 bits of the I-immediate field for RV64I
++    __ srli(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (unsigned) $src2$$constant & 0x3f);
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+// Compare float and branch near instructions
-+instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpF_branch'.
-+  match(If cmp (CmpF op1 op2));
-+
-+  effect(USE lbl);
++// A special-case pattern for card table stores.
++instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
++  match(Set dst (URShiftL (CastP2X src1) src2));
 +
-+  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "float_b$cmp $op1, $op2 $lbl \t#@cmpF_branch"%}
++  ins_cost(ALU_COST);
++  format %{ "srli  $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
++    Assembler::CompressibleRegion cr(&_masm);
++    // the shift amount is encoded in the lower
++    // 6 bits of the I-immediate field for RV64I
++    __ srli(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (unsigned) $src2$$constant & 0x3f);
 +  %}
 +
-+  ins_pipe(pipe_class_compare);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpF_loop'.
-+  match(CountedLoopEnd cmp (CmpF op1 op2));
-+  effect(USE lbl);
++// Shift Right Arithmetic Register
++// In RV64I, only the low 6 bits of src2 are considered for the shift amount
++instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
++  match(Set dst (RShiftL src1 src2));
 +
-+  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%}
++  ins_cost(ALU_COST);
++  format %{ "sra  $dst, $src1, $src2\t#@rShiftL_reg_reg" %}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
++    __ sra(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_class_compare);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_reg_vshift);
 +%}
 +
-+// Compare double and branch near instructions
-+instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpD_branch'.
-+  match(If cmp (CmpD op1 op2));
-+  effect(USE lbl);
++// Shift Right Arithmetic Immediate
++instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
++  match(Set dst (RShiftL src1 src2));
 +
-+  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%}
++  ins_cost(ALU_COST);
++  format %{ "srai  $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+                        as_FloatRegister($op2$$reg), *($lbl$$label));
++    Assembler::CompressibleRegion cr(&_masm);
++    // the shift amount is encoded in the lower
++    // 6 bits of the I-immediate field for RV64I
++    __ srai(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (unsigned) $src2$$constant & 0x3f);
 +  %}
 +
-+  ins_pipe(pipe_class_compare);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpD_loop'.
-+  match(CountedLoopEnd cmp (CmpD op1 op2));
-+  effect(USE lbl);
-+
-+  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%}
++instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{
++  match(Set dst (XorI src1 m1));
++  ins_cost(ALU_COST);
++  format %{ "xori  $dst, $src1, -1\t#@regI_not_reg" %}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+                        as_FloatRegister($op2$$reg), *($lbl$$label));
++    __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
 +  %}
 +
-+  ins_pipe(pipe_class_compare);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg);
 +%}
 +
-+// Compare signed int with zero and branch near instructions
-+instruct cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpI_reg_imm0_branch'.
-+  match(If cmp (CmpI op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpI_reg_imm0_branch" %}
++instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{
++  match(Set dst (XorL src1 m1));
++  ins_cost(ALU_COST);
++  format %{ "xori  $dst, $src1, -1\t#@regL_not_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
++    __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpI_reg_imm0_loop'.
-+  match(CountedLoopEnd cmp (CmpI op1 zero));
 +
-+  effect(USE op1, USE lbl);
++// ============================================================================
++// Floating Point Arithmetic Instructions
 +
-+  ins_cost(BRANCH_COST);
++instruct addF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
++  match(Set dst (AddF src1 src2));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpI_reg_imm0_loop" %}
++  ins_cost(FMUL_SINGLE_COST);
++  format %{ "fadd.s  $dst, $src1, $src2\t#@addF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
++    __ fadd_s(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_dop_reg_reg_s);
 +%}
 +
-+// Compare unsigned int with zero and branch near instructions
-+instruct cmpUEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_branch'.
-+  match(If cmp (CmpU op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++instruct addD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
++  match(Set dst (AddD src1 src2));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_branch" %}
++  ins_cost(FMUL_DOUBLE_COST);
++  format %{ "fadd.d  $dst, $src1, $src2\t#@addD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    __ fadd_d(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_dop_reg_reg_d);
 +%}
 +
-+instruct cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_loop'.
-+  match(CountedLoopEnd cmp (CmpU op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST);
-+
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_loop" %}
++instruct subF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
++  match(Set dst (SubF src1 src2));
 +
++  ins_cost(FMUL_SINGLE_COST);
++  format %{ "fsub.s  $dst, $src1, $src2\t#@subF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    __ fsub_s(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_dop_reg_reg_s);
 +%}
 +
-+// Compare signed long with zero and branch near instructions
-+instruct cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpL_reg_imm0_branch'.
-+  match(If cmp (CmpL op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++instruct subD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
++  match(Set dst (SubD src1 src2));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpL_reg_imm0_branch" %}
++  ins_cost(FMUL_DOUBLE_COST);
++  format %{ "fsub.d  $dst, $src1, $src2\t#@subD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
++    __ fsub_d(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_dop_reg_reg_d);
 +%}
 +
-+instruct cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpL_reg_imm0_loop'.
-+  match(CountedLoopEnd cmp (CmpL op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++instruct mulF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
++  match(Set dst (MulF src1 src2));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpL_reg_imm0_loop" %}
++  ins_cost(FMUL_SINGLE_COST);
++  format %{ "fmul.s  $dst, $src1, $src2\t#@mulF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
++    __ fmul_s(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_dop_reg_reg_s);
 +%}
 +
-+// Compare unsigned long with zero and branch near instructions
-+instruct cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_branch'.
-+  match(If cmp (CmpUL op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++instruct mulD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
++  match(Set dst (MulD src1 src2));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_branch" %}
++  ins_cost(FMUL_DOUBLE_COST);
++  format %{ "fmul.d  $dst, $src1, $src2\t#@mulD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    __ fmul_d(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_dop_reg_reg_d);
 +%}
 +
-+instruct cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_loop'.
-+  match(CountedLoopEnd cmp (CmpUL op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++// src1 * src2 + src3
++instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary src1 src2)));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_loop" %}
++  ins_cost(FMUL_SINGLE_COST);
++  format %{ "fmadd.s  $dst, $src1, $src2, $src3\t#@maddF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    __ fmadd_s(as_FloatRegister($dst$$reg),
++               as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg),
++               as_FloatRegister($src3$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+// Compare pointer with zero and branch near instructions
-+instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
-+  // Same match rule as `far_cmpP_reg_imm0_branch'.
-+  match(If cmp (CmpP op1 zero));
-+  effect(USE lbl);
++// src1 * src2 + src3
++instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary src1 src2)));
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_imm0_branch" %}
++  ins_cost(FMUL_DOUBLE_COST);
++  format %{ "fmadd.d  $dst, $src1, $src2, $src3\t#@maddD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    __ fmadd_d(as_FloatRegister($dst$$reg),
++               as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg),
++               as_FloatRegister($src3$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+instruct cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
-+  // Same match rule as `far_cmpP_reg_imm0_loop'.
-+  match(CountedLoopEnd cmp (CmpP op1 zero));
-+  effect(USE lbl);
++// src1 * src2 - src3
++instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_imm0_loop" %}
++  ins_cost(FMUL_SINGLE_COST);
++  format %{ "fmsub.s  $dst, $src1, $src2, $src3\t#@msubF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    __ fmsub_s(as_FloatRegister($dst$$reg),
++               as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg),
++               as_FloatRegister($src3$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+// Compare narrow pointer with zero and branch near instructions
-+instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
-+  // Same match rule as `far_cmpN_reg_imm0_branch'.
-+  match(If cmp (CmpN op1 zero));
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++// src1 * src2 - src3
++instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpN_imm0_branch" %}
++  ins_cost(FMUL_DOUBLE_COST);
++  format %{ "fmsub.d  $dst, $src1, $src2, $src3\t#@msubD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    __ fmsub_d(as_FloatRegister($dst$$reg),
++               as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg),
++               as_FloatRegister($src3$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+instruct cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
-+  // Same match rule as `far_cmpN_reg_imm0_loop'.
-+  match(CountedLoopEnd cmp (CmpN op1 zero));
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++// -src1 * src2 + src3
++instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
++  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpN_imm0_loop" %}
++  ins_cost(FMUL_SINGLE_COST);
++  format %{ "fnmsub.s  $dst, $src1, $src2, $src3\t#@nmsubF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    __ fnmsub_s(as_FloatRegister($dst$$reg),
++                as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg),
++                as_FloatRegister($src3$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+// Compare narrow pointer with pointer zero and branch near instructions
-+instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
-+  // Same match rule as `far_cmpP_narrowOop_imm0_branch'.
-+  match(If cmp (CmpP (DecodeN op1) zero));
-+  effect(USE lbl);
++// -src1 * src2 + src3
++instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
++  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_branch" %}
++  ins_cost(FMUL_DOUBLE_COST);
++  format %{ "fnmsub.d  $dst, $src1, $src2, $src3\t#@nmsubD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
-+  %}
-+
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++    __ fnmsub_d(as_FloatRegister($dst$$reg),
++                as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg),
++                as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(pipe_class_default);
 +%}
 +
-+instruct cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
-+  // Same match rule as `far_cmpP_narrowOop_imm0_loop'.
-+  match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
-+  effect(USE lbl);
++// -src1 * src2 - src3
++instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
++  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_loop" %}
++  ins_cost(FMUL_SINGLE_COST);
++  format %{ "fnmadd.s  $dst, $src1, $src2, $src3\t#@nmaddF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    __ fnmadd_s(as_FloatRegister($dst$$reg),
++                as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg),
++                as_FloatRegister($src3$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+// Patterns for far (20KiB) variants
-+
-+instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{
-+  match(If cmp cr);
-+  effect(USE lbl);
++// -src1 * src2 - src3
++instruct nmaddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
++  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%}
++  ins_cost(FMUL_DOUBLE_COST);
++  format %{ "fnmadd.d  $dst, $src1, $src2, $src3\t#@nmaddD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true);
++    __ fnmadd_d(as_FloatRegister($dst$$reg),
++                as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg),
++                as_FloatRegister($src3$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+// Compare signed int and branch far instructions
-+instruct far_cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
-+  match(If cmp (CmpI op1 op2));
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST * 2);
++// Math.max(FF)F
++instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
++  match(Set dst (MaxF src1 src2));
++  effect(TEMP_DEF dst);
 +
-+  // the format instruction [far_b$cmp] here is be used as two insructions
-+  // in macroassembler: b$not_cmp(op1, op2, done), j($lbl), bind(done)
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpI_branch" %}
++  format %{ "maxF $dst, $src1, $src2" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ minmax_FD(as_FloatRegister($dst$$reg),
++                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
++                 false /* is_double */, false /* is_min */);
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(fp_dop_reg_reg_s);
 +%}
 +
-+instruct far_cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
-+  match(CountedLoopEnd cmp (CmpI op1 op2));
-+  effect(USE lbl);
++// Math.min(FF)F
++instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
++  match(Set dst (MinF src1 src2));
++  effect(TEMP_DEF dst);
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpI_loop" %}
++  format %{ "minF $dst, $src1, $src2" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ minmax_FD(as_FloatRegister($dst$$reg),
++                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
++                 false /* is_double */, true /* is_min */);
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(fp_dop_reg_reg_s);
 +%}
 +
-+instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
-+  match(If cmp (CmpU op1 op2));
-+  effect(USE lbl);
++// Math.max(DD)D
++instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
++  match(Set dst (MaxD src1 src2));
++  effect(TEMP_DEF dst);
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %}
++  format %{ "maxD $dst, $src1, $src2" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ minmax_FD(as_FloatRegister($dst$$reg),
++                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
++                 true /* is_double */, false /* is_min */);
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(fp_dop_reg_reg_d);
 +%}
 +
-+instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
-+  match(CountedLoopEnd cmp (CmpU op1 op2));
-+  effect(USE lbl);
++// Math.min(DD)D
++instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
++  match(Set dst (MinD src1 src2));
++  effect(TEMP_DEF dst);
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %}
++  format %{ "minD $dst, $src1, $src2" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ minmax_FD(as_FloatRegister($dst$$reg),
++                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
++                 true /* is_double */, true /* is_min */);
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(fp_dop_reg_reg_d);
 +%}
 +
-+instruct far_cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
-+  match(If cmp (CmpL op1 op2));
-+  effect(USE lbl);
++instruct divF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
++  match(Set dst (DivF src1  src2));
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpL_branch" %}
++  ins_cost(FDIV_COST);
++  format %{ "fdiv.s  $dst, $src1, $src2\t#@divF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ fdiv_s(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(fp_div_s);
 +%}
 +
-+instruct far_cmpLloop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
-+  match(CountedLoopEnd cmp (CmpL op1 op2));
-+  effect(USE lbl);
++instruct divD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
++  match(Set dst (DivD src1  src2));
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpL_loop" %}
++  ins_cost(FDIV_COST);
++  format %{ "fdiv.d  $dst, $src1, $src2\t#@divD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ fdiv_d(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(fp_div_d);
 +%}
 +
-+instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
-+  match(If cmp (CmpUL op1 op2));
-+  effect(USE lbl);
++instruct negF_reg_reg(fRegF dst, fRegF src) %{
++  match(Set dst (NegF src));
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_branch" %}
++  ins_cost(XFER_COST);
++  format %{ "fsgnjn.s  $dst, $src, $src\t#@negF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ fneg_s(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(fp_uop_s);
 +%}
 +
-+instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
-+  match(CountedLoopEnd cmp (CmpUL op1 op2));
-+  effect(USE lbl);
++instruct negD_reg_reg(fRegD dst, fRegD src) %{
++  match(Set dst (NegD src));
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_loop" %}
++  ins_cost(XFER_COST);
++  format %{ "fsgnjn.d  $dst, $src, $src\t#@negD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ fneg_d(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(fp_uop_d);
 +%}
 +
-+instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-+%{
-+  match(If cmp (CmpP op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST * 2);
++instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{
++  match(Set dst (AbsI src));
 +
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_branch" %}
++  ins_cost(ALU_COST * 3);
++  format %{
++    "sraiw  t0, $src, 0x1f\n\t"
++    "addw  $dst, $src, t0\n\t"
++    "xorr  $dst, $dst, t0\t#@absI_reg"
++  %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ sraiw(t0, as_Register($src$$reg), 0x1f);
++    __ addw(as_Register($dst$$reg), as_Register($src$$reg), t0);
++    __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0);
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-+%{
-+  match(CountedLoopEnd cmp (CmpP op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST * 2);
++instruct absL_reg(iRegLNoSp dst, iRegL src) %{
++  match(Set dst (AbsL src));
 +
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_loop" %}
++  ins_cost(ALU_COST * 3);
++  format %{
++    "srai  t0, $src, 0x3f\n\t"
++    "add  $dst, $src, t0\n\t"
++    "xorr  $dst, $dst, t0\t#@absL_reg"
++  %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ srai(t0, as_Register($src$$reg), 0x3f);
++    __ add(as_Register($dst$$reg), as_Register($src$$reg), t0);
++    __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0);
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
-+%{
-+  match(If cmp (CmpN op1 op2));
++instruct absF_reg(fRegF dst, fRegF src) %{
++  match(Set dst (AbsF src));
 +
-+  effect(USE lbl);
++  ins_cost(XFER_COST);
++  format %{ "fsgnjx.s  $dst, $src, $src\t#@absF_reg" %}
++  ins_encode %{
++    __ fabs_s(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src$$reg));
++  %}
 +
-+  ins_cost(BRANCH_COST * 2);
++  ins_pipe(fp_uop_s);
++%}
 +
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_branch" %}
++instruct absD_reg(fRegD dst, fRegD src) %{
++  match(Set dst (AbsD src));
 +
++  ins_cost(XFER_COST);
++  format %{ "fsgnjx.d  $dst, $src, $src\t#@absD_reg" %}
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ fabs_d(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(fp_uop_d);
 +%}
 +
-+instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
-+%{
-+  match(CountedLoopEnd cmp (CmpN op1 op2));
++instruct sqrtF_reg(fRegF dst, fRegF src) %{
++  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
 +
-+  effect(USE lbl);
++  ins_cost(FSQRT_COST);
++  format %{ "fsqrt.s  $dst, $src\t#@sqrtF_reg" %}
++  ins_encode %{
++    __ fsqrt_s(as_FloatRegister($dst$$reg),
++               as_FloatRegister($src$$reg));
++  %}
 +
-+  ins_cost(BRANCH_COST * 2);
++  ins_pipe(fp_sqrt_s);
++%}
 +
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_loop" %}
++instruct sqrtD_reg(fRegD dst, fRegD src) %{
++  match(Set dst (SqrtD src));
 +
++  ins_cost(FSQRT_COST);
++  format %{ "fsqrt.d  $dst, $src\t#@sqrtD_reg" %}
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ fsqrt_d(as_FloatRegister($dst$$reg),
++               as_FloatRegister($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(fp_sqrt_d);
 +%}
 +
-+// Float compare and branch instructions
-+instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
-+%{
-+  match(If cmp (CmpF op1 op2));
++// Arithmetic Instructions End
 +
-+  effect(USE lbl);
++// ============================================================================
++// Logical Instructions
 +
-+  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%}
++// Register And
++instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
++  match(Set dst (AndI src1 src2));
 +
++  format %{ "andr  $dst, $src1, $src2\t#@andI_reg_reg" %}
++
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
-+                        *($lbl$$label), /* is_far */ true);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ andr(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_class_compare);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
-+%{
-+  match(CountedLoopEnd cmp (CmpF op1 op2));
-+  effect(USE lbl);
++// Immediate And
++instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
++  match(Set dst (AndI src1 src2));
 +
-+  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%}
++  format %{ "andi  $dst, $src1, $src2\t#@andI_reg_imm" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
-+                        *($lbl$$label), /* is_far */ true);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ andi(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (int32_t)($src2$$constant));
 +  %}
 +
-+  ins_pipe(pipe_class_compare);
++  ins_pipe(ialu_reg_imm);
 +%}
 +
-+// Double compare and branch instructions
-+instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-+%{
-+  match(If cmp (CmpD op1 op2));
-+  effect(USE lbl);
++// Register Or
++instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
++  match(Set dst (OrI src1 src2));
 +
-+  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%}
++  format %{ "orr  $dst, $src1, $src2\t#@orI_reg_reg" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ orr(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_class_compare);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-+%{
-+  match(CountedLoopEnd cmp (CmpD op1 op2));
-+  effect(USE lbl);
++// Immediate Or
++instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
++  match(Set dst (OrI src1 src2));
 +
-+  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%}
++  format %{ "ori  $dst, $src1, $src2\t#@orI_reg_imm" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ ori(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           (int32_t)($src2$$constant));
 +  %}
 +
-+  ins_pipe(pipe_class_compare);
++  ins_pipe(ialu_reg_imm);
 +%}
 +
-+instruct far_cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
-+%{
-+  match(If cmp (CmpI op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST * 2);
++// Register Xor
++instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
++  match(Set dst (XorI src1 src2));
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpI_reg_imm0_branch" %}
++  format %{ "xorr  $dst, $src1, $src2\t#@xorI_reg_reg" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ xorr(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct far_cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
-+%{
-+  match(CountedLoopEnd cmp (CmpI op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST * 2);
++// Immediate Xor
++instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
++  match(Set dst (XorI src1 src2));
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpI_reg_imm0_loop" %}
++  format %{ "xori  $dst, $src1, $src2\t#@xorI_reg_imm" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
++    __ xori(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (int32_t)($src2$$constant));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(ialu_reg_imm);
 +%}
 +
-+instruct far_cmpUEqNeLeGt_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
-+%{
-+  match(If cmp (CmpU op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST * 2);
++// Register And Long
++instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (AndL src1 src2));
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_imm0_branch" %}
++  format %{ "andr  $dst, $src1, $src2\t#@andL_reg_reg" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ andr(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct far_cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
-+%{
-+  match(CountedLoopEnd cmp (CmpU op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST * 2);
-+
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_reg_imm0_loop" %}
++// Immediate And Long
++instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
++  match(Set dst (AndL src1 src2));
 +
++  format %{ "andi  $dst, $src1, $src2\t#@andL_reg_imm" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ andi(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (int32_t)($src2$$constant));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(ialu_reg_imm);
 +%}
 +
-+// compare lt/ge unsigned instructs has no short instruct with same match
-+instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
-+%{
-+  match(If cmp (CmpU op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++// Register Or Long
++instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (OrL src1 src2));
 +
-+  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %}
++  format %{ "orr  $dst, $src1, $src2\t#@orL_reg_reg" %}
 +
-+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
++  ins_cost(ALU_COST);
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    __ orr(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
++  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
-+%{
-+  match(CountedLoopEnd cmp (CmpU op1 zero));
++// Immediate Or Long
++instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
++  match(Set dst (OrL src1 src2));
 +
-+  effect(USE op1, USE lbl);
++  format %{ "ori  $dst, $src1, $src2\t#@orL_reg_imm" %}
 +
-+  ins_cost(BRANCH_COST);
++  ins_cost(ALU_COST);
++  ins_encode %{
++    __ ori(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           (int32_t)($src2$$constant));
++  %}
 +
-+  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %}
++  ins_pipe(ialu_reg_imm);
++%}
 +
-+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
++// Register Xor Long
++instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (XorL src1 src2));
 +
-+  ins_pipe(pipe_cmpz_branch);
-+%}
++  format %{ "xorr  $dst, $src1, $src2\t#@xorL_reg_reg" %}
 +
-+instruct far_cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
-+%{
-+  match(If cmp (CmpL op1 zero));
++  ins_cost(ALU_COST);
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    __ xorr(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
++  %}
 +
-+  effect(USE op1, USE lbl);
++  ins_pipe(ialu_reg_reg);
++%}
 +
-+  ins_cost(BRANCH_COST * 2);
++// Immediate Xor Long
++instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
++  match(Set dst (XorL src1 src2));
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpL_reg_imm0_branch" %}
++  ins_cost(ALU_COST);
++  format %{ "xori  $dst, $src1, $src2\t#@xorL_reg_imm" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
++    __ xori(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (int32_t)($src2$$constant));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(ialu_reg_imm);
 +%}
 +
-+instruct far_cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
-+%{
-+  match(CountedLoopEnd cmp (CmpL op1 zero));
-+
-+  effect(USE op1, USE lbl);
++// ============================================================================
++// BSWAP Instructions
 +
-+  ins_cost(BRANCH_COST * 2);
++instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr) %{
++  match(Set dst (ReverseBytesI src));
++  effect(TEMP cr);
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpL_reg_imm0_loop" %}
++  ins_cost(ALU_COST * 13);
++  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
++    __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct far_cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
-+%{
-+  match(If cmp (CmpUL op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST * 2);
++instruct bytes_reverse_long(iRegLNoSp dst, iRegL src, rFlagsReg cr) %{
++  match(Set dst (ReverseBytesL src));
++  effect(TEMP cr);
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_branch" %}
++  ins_cost(ALU_COST * 29);
++  format %{ "revb  $dst, $src\t#@bytes_reverse_long" %}
 +
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ revb(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct far_cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
-+%{
-+  match(CountedLoopEnd cmp (CmpUL op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST * 2);
++instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
++  match(Set dst (ReverseBytesUS src));
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_loop" %}
++  ins_cost(ALU_COST * 5);
++  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short" %}
 +
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(ialu_reg);
 +%}
 +
-+// compare lt/ge unsigned instructs has no short instruct with same match
-+instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
-+%{
-+  match(If cmp (CmpUL op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
++  match(Set dst (ReverseBytesS src));
 +
-+  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %}
++  ins_cost(ALU_COST * 5);
++  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short" %}
 +
-+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
++  ins_encode %{
++    __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
++  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
-+%{
-+  match(CountedLoopEnd cmp (CmpUL op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST);
-+
-+  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %}
-+
-+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
-+
-+  ins_pipe(pipe_cmpz_branch);
-+%}
++// ============================================================================
++// MemBar Instruction
 +
-+instruct far_cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
-+  match(If cmp (CmpP op1 zero));
-+  effect(USE lbl);
++instruct load_fence() %{
++  match(LoadFence);
++  ins_cost(ALU_COST);
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_imm0_branch" %}
++  format %{ "#@load_fence" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
 +  %}
-+
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct far_cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
-+  match(CountedLoopEnd cmp (CmpP op1 zero));
-+  effect(USE lbl);
++instruct membar_acquire() %{
++  match(MemBarAcquire);
++  ins_cost(ALU_COST);
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_imm0_loop" %}
++  format %{ "#@membar_acquire\n\t"
++            "fence ir iorw" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ block_comment("membar_acquire");
++    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct far_cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
-+  match(If cmp (CmpN op1 zero));
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST * 2);
++instruct membar_acquire_lock() %{
++  match(MemBarAcquireLock);
++  ins_cost(0);
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpN_imm0_branch" %}
++  format %{ "#@membar_acquire_lock (elided)" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ block_comment("membar_acquire_lock (elided)");
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct far_cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
-+  match(CountedLoopEnd cmp (CmpN op1 zero));
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST * 2);
++instruct store_fence() %{
++  match(StoreFence);
++  ins_cost(ALU_COST);
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpN_imm0_loop" %}
++  format %{ "#@store_fence" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
 +  %}
-+
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct far_cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
-+  match(If cmp (CmpP (DecodeN op1) zero));
-+  effect(USE lbl);
++instruct membar_release() %{
++  match(MemBarRelease);
++  ins_cost(ALU_COST);
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_branch" %}
++  format %{ "#@membar_release\n\t"
++            "fence iorw ow" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ block_comment("membar_release");
++    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
 +  %}
-+
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct far_cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
-+  match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
-+  effect(USE lbl);
++instruct membar_storestore() %{
++  match(MemBarStoreStore);
++  match(StoreStoreFence);
++  ins_cost(ALU_COST);
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_loop" %}
++  format %{ "MEMBAR-store-store\t#@membar_storestore" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ membar(MacroAssembler::StoreStore);
 +  %}
-+
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// ============================================================================
-+// Conditional Move Instructions
-+instruct cmovI_cmpI(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOp cop) %{
-+  match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src)));
-+  ins_cost(ALU_COST + BRANCH_COST);
++instruct membar_release_lock() %{
++  match(MemBarReleaseLock);
++  ins_cost(0);
 +
-+  format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t"
-+            "mv $dst, $src\n\t"
-+            "skip:"
-+  %}
++  format %{ "#@membar_release_lock (elided)" %}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode,
-+                 as_Register($op1$$reg), as_Register($op2$$reg),
-+                 as_Register($dst$$reg), as_Register($src$$reg));
++    __ block_comment("membar_release_lock (elided)");
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{
-+  match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src)));
-+  ins_cost(ALU_COST + BRANCH_COST);
++instruct membar_volatile() %{
++  match(MemBarVolatile);
++  ins_cost(ALU_COST);
 +
-+  format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t"
-+            "mv $dst, $src\n\t"
-+            "skip:"
-+  %}
++  format %{ "#@membar_volatile\n\t"
++             "fence iorw iorw"%}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode,
-+                 as_Register($op1$$reg), as_Register($op2$$reg),
-+                 as_Register($dst$$reg), as_Register($src$$reg));
++    __ block_comment("membar_volatile");
++    __ membar(MacroAssembler::StoreLoad);
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{
-+  match(Set dst (CMoveI (Binary cop (CmpU op1 op2)) (Binary dst src)));
-+  ins_cost(ALU_COST + BRANCH_COST);
-+  format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t"
-+            "mv $dst, $src\n\t"
-+            "skip:"
-+  %}
-+
-+  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
-+                 as_Register($op1$$reg), as_Register($op2$$reg),
-+                 as_Register($dst$$reg), as_Register($src$$reg));
-+  %}
++// ============================================================================
++// Cast Instructions (Java-level type cast)
 +
-+  ins_pipe(pipe_slow);
-+%}
++instruct castX2P(iRegPNoSp dst, iRegL src) %{
++  match(Set dst (CastX2P src));
 +
-+instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{
-+  match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src)));
-+  ins_cost(ALU_COST + BRANCH_COST);
-+  format %{ "bneg$cop $op1 $op2, skip\t#@cmovI_cmpUL\n\t"
-+            "mv $dst, $src\n\t"
-+            "skip:"
-+  %}
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $src\t# long -> ptr, #@castX2P" %}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
-+                 as_Register($op1$$reg), as_Register($op2$$reg),
-+                 as_Register($dst$$reg), as_Register($src$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    if ($dst$$reg != $src$$reg) {
++      __ mv(as_Register($dst$$reg), as_Register($src$$reg));
++    }
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{
-+  match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src)));
-+  ins_cost(ALU_COST + BRANCH_COST);
++instruct castP2X(iRegLNoSp dst, iRegP src) %{
++  match(Set dst (CastP2X src));
 +
-+  format %{ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t"
-+            "mv $dst, $src\n\t"
-+            "skip:"
-+  %}
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $src\t# ptr -> long, #@castP2X" %}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode,
-+                 as_Register($op1$$reg), as_Register($op2$$reg),
-+                 as_Register($dst$$reg), as_Register($src$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    if ($dst$$reg != $src$$reg) {
++      __ mv(as_Register($dst$$reg), as_Register($src$$reg));
++    }
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{
-+  match(Set dst (CMoveL (Binary cop (CmpUL op1 op2)) (Binary dst src)));
-+  ins_cost(ALU_COST + BRANCH_COST);
++instruct castPP(iRegPNoSp dst)
++%{
++  match(Set dst (CastPP dst));
++  ins_cost(0);
 +
-+  format %{ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t"
-+            "mv $dst, $src\n\t"
-+            "skip:"
-+  %}
++  size(0);
++  format %{ "# castPP of $dst, #@castPP" %}
++  ins_encode(/* empty encoding */);
++  ins_pipe(pipe_class_empty);
++%}
 +
-+  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
-+                 as_Register($op1$$reg), as_Register($op2$$reg),
-+                 as_Register($dst$$reg), as_Register($src$$reg));
-+  %}
++instruct castLL(iRegL dst)
++%{
++  match(Set dst (CastLL dst));
 +
-+  ins_pipe(pipe_slow);
++  size(0);
++  format %{ "# castLL of $dst, #@castLL" %}
++  ins_encode(/* empty encoding */);
++  ins_cost(0);
++  ins_pipe(pipe_class_empty);
 +%}
 +
++instruct castII(iRegI dst)
++%{
++  match(Set dst (CastII dst));
 +
-+// ============================================================================
-+// Procedure Call/Return Instructions
-+
-+// Call Java Static Instruction
++  size(0);
++  format %{ "# castII of $dst, #@castII" %}
++  ins_encode(/* empty encoding */);
++  ins_cost(0);
++  ins_pipe(pipe_class_empty);
++%}
 +
-+instruct CallStaticJavaDirect(method meth)
++instruct checkCastPP(iRegPNoSp dst)
 +%{
-+  match(CallStaticJava);
++  match(Set dst (CheckCastPP dst));
 +
-+  effect(USE meth);
++  size(0);
++  ins_cost(0);
++  format %{ "# checkcastPP of $dst, #@checkCastPP" %}
++  ins_encode(/* empty encoding */);
++  ins_pipe(pipe_class_empty);
++%}
 +
-+  ins_cost(BRANCH_COST);
++instruct castFF(fRegF dst)
++%{
++  match(Set dst (CastFF dst));
 +
-+  format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %}
++  size(0);
++  format %{ "# castFF of $dst" %}
++  ins_encode(/* empty encoding */);
++  ins_cost(0);
++  ins_pipe(pipe_class_empty);
++%}
 +
-+  ins_encode( riscv_enc_java_static_call(meth),
-+              riscv_enc_call_epilog );
++instruct castDD(fRegD dst)
++%{
++  match(Set dst (CastDD dst));
 +
-+  ins_pipe(pipe_class_call);
++  size(0);
++  format %{ "# castDD of $dst" %}
++  ins_encode(/* empty encoding */);
++  ins_cost(0);
++  ins_pipe(pipe_class_empty);
 +%}
 +
-+// TO HERE
-+
-+// Call Java Dynamic Instruction
-+instruct CallDynamicJavaDirect(method meth, rFlagsReg cr)
++instruct castVV(vReg dst)
 +%{
-+  match(CallDynamicJava);
++  match(Set dst (CastVV dst));
 +
-+  effect(USE meth, KILL cr);
++  size(0);
++  format %{ "# castVV of $dst" %}
++  ins_encode(/* empty encoding */);
++  ins_cost(0);
++  ins_pipe(pipe_class_empty);
++%}
 +
-+  ins_cost(BRANCH_COST + ALU_COST * 6);
++// ============================================================================
++// Convert Instructions
 +
-+  format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %}
++// int to bool
++instruct convI2Bool(iRegINoSp dst, iRegI src)
++%{
++  match(Set dst (Conv2B src));
 +
-+  ins_encode( riscv_enc_java_dynamic_call(meth),
-+               riscv_enc_call_epilog );
++  ins_cost(ALU_COST);
++  format %{ "snez  $dst, $src\t#@convI2Bool" %}
 +
-+  ins_pipe(pipe_class_call);
-+%}
++  ins_encode %{
++    __ snez(as_Register($dst$$reg), as_Register($src$$reg));
++  %}
 +
-+// Call Runtime Instruction
++  ins_pipe(ialu_reg);
++%}
 +
-+instruct CallRuntimeDirect(method meth, rFlagsReg cr)
++// pointer to bool
++instruct convP2Bool(iRegINoSp dst, iRegP src)
 +%{
-+  match(CallRuntime);
-+
-+  effect(USE meth, KILL cr);
-+
-+  ins_cost(BRANCH_COST);
++  match(Set dst (Conv2B src));
 +
-+  format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %}
++  ins_cost(ALU_COST);
++  format %{ "snez  $dst, $src\t#@convP2Bool" %}
 +
-+  ins_encode( riscv_enc_java_to_runtime(meth) );
++  ins_encode %{
++    __ snez(as_Register($dst$$reg), as_Register($src$$reg));
++  %}
 +
-+  ins_pipe(pipe_class_call);
++  ins_pipe(ialu_reg);
 +%}
 +
-+// Call Runtime Instruction
++// int <-> long
 +
-+instruct CallLeafDirect(method meth, rFlagsReg cr)
++instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
 +%{
-+  match(CallLeaf);
++  match(Set dst (ConvI2L src));
 +
-+  effect(USE meth, KILL cr);
++  ins_cost(ALU_COST);
++  format %{ "addw  $dst, $src, zr\t#@convI2L_reg_reg" %}
++  ins_encode %{
++    __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
++  %}
++  ins_pipe(ialu_reg);
++%}
 +
-+  ins_cost(BRANCH_COST);
++instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
++  match(Set dst (ConvL2I src));
 +
-+  format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %}
++  ins_cost(ALU_COST);
++  format %{ "addw  $dst, $src, zr\t#@convL2I_reg" %}
 +
-+  ins_encode( riscv_enc_java_to_runtime(meth) );
++  ins_encode %{
++    __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
++  %}
 +
-+  ins_pipe(pipe_class_call);
++  ins_pipe(ialu_reg);
 +%}
 +
-+// Call Runtime Instruction
-+
-+instruct CallLeafNoFPDirect(method meth, rFlagsReg cr)
++// int to unsigned long (Zero-extend)
++instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
 +%{
-+  match(CallLeafNoFP);
++  match(Set dst (AndL (ConvI2L src) mask));
 +
-+  effect(USE meth, KILL cr);
++  ins_cost(ALU_COST * 2);
++  format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %}
 +
-+  ins_cost(BRANCH_COST);
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32);
++  %}
 +
-+  format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %}
++  ins_pipe(ialu_reg_shift);
++%}
 +
-+  ins_encode( riscv_enc_java_to_runtime(meth) );
++// float <-> double
 +
-+  ins_pipe(pipe_class_call);
-+%}
++instruct convF2D_reg(fRegD dst, fRegF src) %{
++  match(Set dst (ConvF2D src));
 +
-+// ============================================================================
-+// Partial Subtype Check
-+//
-+// superklass array for an instance of the superklass.  Set a hidden
-+// internal cache on a hit (cache is checked with exposed code in
-+// gen_subtype_check()).  Return zero for a hit.  The encoding
-+// ALSO sets flags.
++  ins_cost(XFER_COST);
++  format %{ "fcvt.d.s  $dst, $src\t#@convF2D_reg" %}
 +
-+instruct partialSubtypeCheck(rFlagsReg cr, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result)
-+%{
-+  match(Set result (PartialSubtypeCheck sub super));
-+  effect(KILL temp, KILL cr);
++  ins_encode %{
++    __ fcvt_d_s(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
++  %}
 +
-+  ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
-+  format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %}
++  ins_pipe(fp_f2d);
++%}
 +
-+  ins_encode(riscv_enc_partial_subtype_check(sub, super, temp, result));
++instruct convD2F_reg(fRegF dst, fRegD src) %{
++  match(Set dst (ConvD2F src));
 +
-+  opcode(0x1); // Force zero of result reg on hit
++  ins_cost(XFER_COST);
++  format %{ "fcvt.s.d  $dst, $src\t#@convD2F_reg" %}
 +
-+  ins_pipe(pipe_class_memory);
++  ins_encode %{
++    __ fcvt_s_d(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
++  %}
++
++  ins_pipe(fp_d2f);
 +%}
 +
-+instruct partialSubtypeCheckVsZero(iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result,
-+                                           immP0 zero, rFlagsReg cr)
-+%{
-+  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
-+  effect(KILL temp, KILL result);
++// float <-> int
 +
-+  ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
-+  format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %}
++instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{
++  match(Set dst (ConvF2I src));
 +
-+  ins_encode(riscv_enc_partial_subtype_check(sub, super, temp, result));
++  ins_cost(XFER_COST);
++  format %{ "fcvt.w.s  $dst, $src\t#@convF2I_reg_reg" %}
 +
-+  opcode(0x0); // Don't zero result reg on hit
++  ins_encode %{
++    __ fcvt_w_s_safe($dst$$Register, $src$$FloatRegister);
++  %}
 +
-+  ins_pipe(pipe_class_memory);
++  ins_pipe(fp_f2i);
 +%}
 +
-+instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++instruct convI2F_reg_reg(fRegF dst, iRegIorL2I src) %{
++  match(Set dst (ConvI2F src));
++
++  ins_cost(XFER_COST);
++  format %{ "fcvt.s.w  $dst, $src\t#@convI2F_reg_reg" %}
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
 +  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_compare($str1$$Register, $str2$$Register,
-+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
-+                      StrIntrinsicNode::UU);
++    __ fcvt_s_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(fp_i2f);
 +%}
 +
-+instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++// float <-> long
++
++instruct convF2L_reg_reg(iRegLNoSp dst, fRegF src) %{
++  match(Set dst (ConvF2L src));
++
++  ins_cost(XFER_COST);
++  format %{ "fcvt.l.s  $dst, $src\t#@convF2L_reg_reg" %}
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
 +  ins_encode %{
-+    __ string_compare($str1$$Register, $str2$$Register,
-+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
-+                      StrIntrinsicNode::LL);
++    __ fcvt_l_s_safe($dst$$Register, $src$$FloatRegister);
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(fp_f2l);
 +%}
 +
-+instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++instruct convL2F_reg_reg(fRegF dst, iRegL src) %{
++  match(Set dst (ConvL2F src));
++
++  ins_cost(XFER_COST);
++  format %{ "fcvt.s.l  $dst, $src\t#@convL2F_reg_reg" %}
 +
-+  format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
 +  ins_encode %{
-+    __ string_compare($str1$$Register, $str2$$Register,
-+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
-+                      StrIntrinsicNode::UL);
++    __ fcvt_s_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(fp_l2f);
 +%}
 +
-+instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3,
-+                          rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++// double <-> int
++
++instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{
++  match(Set dst (ConvD2I src));
++
++  ins_cost(XFER_COST);
++  format %{ "fcvt.w.d  $dst, $src\t#@convD2I_reg_reg" %}
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
 +  ins_encode %{
-+    __ string_compare($str1$$Register, $str2$$Register,
-+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
-+                      StrIntrinsicNode::LU);
++    __ fcvt_w_d_safe($dst$$Register, $src$$FloatRegister);
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(fp_d2i);
 +%}
 +
-+instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+       iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
-+       iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp)
-+%{
-+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp);
-+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
++instruct convI2D_reg_reg(fRegD dst, iRegIorL2I src) %{
++  match(Set dst (ConvI2D src));
++
++  ins_cost(XFER_COST);
++  format %{ "fcvt.d.w  $dst, $src\t#@convI2D_reg_reg" %}
 +
 +  ins_encode %{
-+    __ string_indexof($str1$$Register, $str2$$Register,
-+                      $cnt1$$Register, $cnt2$$Register,
-+                      $tmp1$$Register, $tmp2$$Register,
-+                      $tmp3$$Register, $tmp4$$Register,
-+                      $tmp5$$Register, $tmp6$$Register,
-+                      $result$$Register, StrIntrinsicNode::UU);
++    __ fcvt_d_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(fp_i2d);
 +%}
 +
-+instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+       iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
-+       iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp)
-+%{
-+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp);
-+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
++// double <-> long
++
++instruct convD2L_reg_reg(iRegLNoSp dst, fRegD src) %{
++  match(Set dst (ConvD2L src));
++
++  ins_cost(XFER_COST);
++  format %{ "fcvt.l.d  $dst, $src\t#@convD2L_reg_reg" %}
 +
 +  ins_encode %{
-+    __ string_indexof($str1$$Register, $str2$$Register,
-+                      $cnt1$$Register, $cnt2$$Register,
-+                      $tmp1$$Register, $tmp2$$Register,
-+                      $tmp3$$Register, $tmp4$$Register,
-+                      $tmp5$$Register, $tmp6$$Register,
-+                      $result$$Register, StrIntrinsicNode::LL);
++    __ fcvt_l_d_safe($dst$$Register, $src$$FloatRegister);
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(fp_d2l);
 +%}
 +
-+instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+       iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
-+       iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp)
-+%{
-+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
-+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp);
-+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
++instruct convL2D_reg_reg(fRegD dst, iRegL src) %{
++  match(Set dst (ConvL2D src));
++
++  ins_cost(XFER_COST);
++  format %{ "fcvt.d.l  $dst, $src\t#@convL2D_reg_reg" %}
 +
 +  ins_encode %{
-+    __ string_indexof($str1$$Register, $str2$$Register,
-+                      $cnt1$$Register, $cnt2$$Register,
-+                      $tmp1$$Register, $tmp2$$Register,
-+                      $tmp3$$Register, $tmp4$$Register,
-+                      $tmp5$$Register, $tmp6$$Register,
-+                      $result$$Register, StrIntrinsicNode::UL);
++    __ fcvt_d_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(fp_l2d);
 +%}
 +
-+instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
-+                 immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
-+%{
-+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
-+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
++// Convert oop into int for vectors alignment masking
++instruct convP2I(iRegINoSp dst, iRegP src) %{
++  match(Set dst (ConvL2I (CastP2X src)));
++
++  ins_cost(ALU_COST * 2);
++  format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %}
 +
 +  ins_encode %{
-+    int icnt2 = (int)$int_cnt2$$constant;
-+    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
-+                                 $cnt1$$Register, zr,
-+                                 $tmp1$$Register, $tmp2$$Register,
-+                                 $tmp3$$Register, $tmp4$$Register,
-+                                 icnt2, $result$$Register, StrIntrinsicNode::UU);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ zero_extend($dst$$Register, $src$$Register, 32);
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
-+                 immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
++// Convert compressed oop into int for vectors alignment masking
++// in case of 32bit oops (heap < 4Gb).
++instruct convN2I(iRegINoSp dst, iRegN src)
 +%{
-+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
-+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
++  predicate(CompressedOops::shift() == 0);
++  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
++
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $src\t# compressed ptr -> int, #@convN2I" %}
 +
 +  ins_encode %{
-+    int icnt2 = (int)$int_cnt2$$constant;
-+    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
-+                                 $cnt1$$Register, zr,
-+                                 $tmp1$$Register, $tmp2$$Register,
-+                                 $tmp3$$Register, $tmp4$$Register,
-+                                 icnt2, $result$$Register, StrIntrinsicNode::LL);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ mv($dst$$Register, $src$$Register);
 +  %}
-+  ins_pipe(pipe_class_memory);
-+%}
 +
-+instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
-+                 immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
-+%{
-+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
-+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
-+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
++  ins_pipe(ialu_reg);
++%}
 +
++// Convert oop pointer into compressed form
++instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
++  match(Set dst (EncodeP src));
++  ins_cost(ALU_COST);
++  format %{ "encode_heap_oop  $dst, $src\t#@encodeHeapOop" %}
 +  ins_encode %{
-+    int icnt2 = (int)$int_cnt2$$constant;
-+    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
-+                                 $cnt1$$Register, zr,
-+                                 $tmp1$$Register, $tmp2$$Register,
-+                                 $tmp3$$Register, $tmp4$$Register,
-+                                 icnt2, $result$$Register, StrIntrinsicNode::UL);
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    __ encode_heap_oop(d, s);
 +  %}
-+  ins_pipe(pipe_class_memory);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-+                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
-+%{
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
-+  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
-+
-+  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
++instruct decodeHeapOop(iRegPNoSp dst, iRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
++            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
++  match(Set dst (DecodeN src));
 +
++  ins_cost(0);
++  format %{ "decode_heap_oop  $dst, $src\t#@decodeHeapOop" %}
 +  ins_encode %{
-+    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
-+                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
-+                           $tmp3$$Register, $tmp4$$Register, false /* isU */) ;
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    __ decode_heap_oop(d, s);
 +  %}
-+  ins_pipe(pipe_class_memory);
++  ins_pipe(ialu_reg);
 +%}
 +
++instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
++            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
++  match(Set dst (DecodeN src));
 +
-+instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-+                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
-+%{
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
-+  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
++  ins_cost(0);
++  format %{ "decode_heap_oop_not_null $dst, $src\t#@decodeHeapOop_not_null" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    __ decode_heap_oop_not_null(d, s);
++  %}
++  ins_pipe(ialu_reg);
++%}
 +
-+  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
++// Convert klass pointer into compressed form.
++instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
++  match(Set dst (EncodePKlass src));
++
++  ins_cost(ALU_COST);
++  format %{ "encode_klass_not_null  $dst, $src\t#@encodeKlass_not_null" %}
 +
 +  ins_encode %{
-+    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
-+                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
-+                           $tmp3$$Register, $tmp4$$Register, true /* isL */);
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    __ encode_klass_not_null(dst_reg, src_reg, t0);
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++   ins_pipe(ialu_reg);
 +%}
 +
-+// clearing of an array
-+instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV);
-+  match(Set dummy (ClearArray cnt base));
-+  effect(USE_KILL cnt, USE_KILL base, KILL cr);
++instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src, iRegPNoSp tmp) %{
++  match(Set dst (DecodeNKlass src));
 +
-+  ins_cost(4 * DEFAULT_COST);
-+  format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
++  effect(TEMP tmp);
++
++  ins_cost(ALU_COST);
++  format %{ "decode_klass_not_null  $dst, $src\t#@decodeKlass_not_null" %}
 +
 +  ins_encode %{
-+    address tpc = __ zero_words($base$$Register, $cnt$$Register);
-+    if (tpc == NULL) {
-+      ciEnv::current()->record_failure("CodeCache is full");
-+      return;
-+    }
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    Register tmp_reg = as_Register($tmp$$reg);
++    __ decode_klass_not_null(dst_reg, src_reg, tmp_reg);
 +  %}
 +
-+  ins_pipe(pipe_class_memory);
++   ins_pipe(ialu_reg);
 +%}
 +
-+instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && (uint64_t)n->in(2)->get_long()
-+            < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
-+  match(Set dummy (ClearArray cnt base));
-+  effect(USE_KILL base, KILL cr);
++// stack <-> reg and reg <-> reg shuffles with no conversion
 +
-+  ins_cost(4 * DEFAULT_COST);
-+  format %{ "ClearArray $cnt, $base\t#@clearArray_imm_reg" %}
++instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
++
++  match(Set dst (MoveF2I src));
++
++  effect(DEF dst, USE src);
++
++  ins_cost(LOAD_COST);
++
++  format %{ "lw  $dst, $src\t#@MoveF2I_stack_reg" %}
 +
 +  ins_encode %{
-+    __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ lw(as_Register($dst$$reg), Address(sp, $src$$disp));
 +  %}
 +
-+  ins_pipe(pipe_class_memory);
++  ins_pipe(iload_reg_reg);
++
 +%}
 +
-+instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-+                        iRegI_R10 result, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (StrEquals (Binary str1 str2) cnt));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
++instruct MoveI2F_stack_reg(fRegF dst, stackSlotI src) %{
++
++  match(Set dst (MoveI2F src));
++
++  effect(DEF dst, USE src);
++
++  ins_cost(LOAD_COST);
++
++  format %{ "flw  $dst, $src\t#@MoveI2F_stack_reg" %}
 +
-+  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
 +  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_equals($str1$$Register, $str2$$Register,
-+                     $result$$Register, $cnt$$Register, 1);
++    __ flw(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
 +  %}
++
 +  ins_pipe(pipe_class_memory);
++
 +%}
 +
-+instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-+                        iRegI_R10 result, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (StrEquals (Binary str1 str2) cnt));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
++instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
++
++  match(Set dst (MoveD2L src));
++
++  effect(DEF dst, USE src);
++
++  ins_cost(LOAD_COST);
++
++  format %{ "ld  $dst, $src\t#@MoveD2L_stack_reg" %}
 +
-+  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
 +  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_equals($str1$$Register, $str2$$Register,
-+                     $result$$Register, $cnt$$Register, 2);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ ld(as_Register($dst$$reg), Address(sp, $src$$disp));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(iload_reg_reg);
++
 +%}
 +
-+instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-+                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
-+                       iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (AryEq ary1 ary2));
-+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
++instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{
++
++  match(Set dst (MoveL2D src));
++
++  effect(DEF dst, USE src);
++
++  ins_cost(LOAD_COST);
++
++  format %{ "fld  $dst, $src\t#@MoveL2D_stack_reg" %}
 +
-+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
 +  ins_encode %{
-+    address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
-+                                   $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
-+                                   $result$$Register, $tmp$$Register, 1);
-+    if (tpc == NULL) {
-+      ciEnv::current()->record_failure("CodeCache is full");
-+      return;
-+    }
++    Assembler::CompressibleRegion cr(&_masm);
++    __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
 +  %}
++
 +  ins_pipe(pipe_class_memory);
++
 +%}
 +
-+instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-+                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
-+                       iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (AryEq ary1 ary2));
-+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
++instruct MoveF2I_reg_stack(stackSlotI dst, fRegF src) %{
++
++  match(Set dst (MoveF2I src));
++
++  effect(DEF dst, USE src);
++
++  ins_cost(STORE_COST);
++
++  format %{ "fsw  $src, $dst\t#@MoveF2I_reg_stack" %}
 +
-+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
 +  ins_encode %{
-+    address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
-+                                   $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
-+                                   $result$$Register, $tmp$$Register, 2);
-+    if (tpc == NULL) {
-+      ciEnv::current()->record_failure("CodeCache is full");
-+      return;
-+    }
++    __ fsw(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
 +  %}
++
 +  ins_pipe(pipe_class_memory);
++
 +%}
 +
-+// ============================================================================
-+// Safepoint Instructions
++instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
 +
-+instruct safePoint(iRegP poll)
-+%{
-+  match(SafePoint poll);
++  match(Set dst (MoveI2F src));
++
++  effect(DEF dst, USE src);
++
++  ins_cost(STORE_COST);
++
++  format %{ "sw  $src, $dst\t#@MoveI2F_reg_stack" %}
 +
-+  ins_cost(2 * LOAD_COST);
-+  format %{
-+    "lwu zr, [$poll]\t# Safepoint: poll for GC, #@safePoint"
-+  %}
 +  ins_encode %{
-+    __ read_polling_page(as_Register($poll$$reg), 0, relocInfo::poll_type);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sw(as_Register($src$$reg), Address(sp, $dst$$disp));
 +  %}
-+  ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
++
++  ins_pipe(istore_reg_reg);
++
 +%}
 +
-+// ============================================================================
-+// This name is KNOWN by the ADLC and cannot be changed.
-+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
-+// for this guy.
-+instruct tlsLoadP(javaThread_RegP dst)
-+%{
-+  match(Set dst (ThreadLocal));
++instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{
 +
-+  ins_cost(0);
++  match(Set dst (MoveD2L src));
 +
-+  format %{ " -- \t// $dst=Thread::current(), empty, #@tlsLoadP" %}
++  effect(DEF dst, USE src);
 +
-+  size(0);
++  ins_cost(STORE_COST);
 +
-+  ins_encode( /*empty*/ );
++  format %{ "fsd  $dst, $src\t#@MoveD2L_reg_stack" %}
++
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
++  %}
++
++  ins_pipe(pipe_class_memory);
 +
-+  ins_pipe(pipe_class_empty);
 +%}
 +
-+// inlined locking and unlocking
-+// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
-+instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
-+%{
-+  match(Set cr (FastLock object box));
-+  effect(TEMP tmp, TEMP tmp2);
++instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
 +
-+  ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3);
-+  format %{ "fastlock $object,$box\t! kills $tmp,$tmp2, #@cmpFastLock" %}
++  match(Set dst (MoveL2D src));
 +
-+  ins_encode(riscv_enc_fast_lock(object, box, tmp, tmp2));
++  effect(DEF dst, USE src);
 +
-+  ins_pipe(pipe_serial);
-+%}
++  ins_cost(STORE_COST);
 +
-+// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
-+instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
-+%{
-+  match(Set cr (FastUnlock object box));
-+  effect(TEMP tmp, TEMP tmp2);
++  format %{ "sd  $src, $dst\t#@MoveL2D_reg_stack" %}
 +
-+  ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4);
-+  format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2, #@cmpFastUnlock" %}
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sd(as_Register($src$$reg), Address(sp, $dst$$disp));
++  %}
 +
-+  ins_encode(riscv_enc_fast_unlock(object, box, tmp, tmp2));
++  ins_pipe(istore_reg_reg);
 +
-+  ins_pipe(pipe_serial);
 +%}
 +
-+// Tail Call; Jump from runtime stub to Java code.
-+// Also known as an 'interprocedural jump'.
-+// Target of jump will eventually return to caller.
-+// TailJump below removes the return address.
-+instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
-+%{
-+  match(TailCall jump_target method_oop);
++instruct MoveF2I_reg_reg(iRegINoSp dst, fRegF src) %{
 +
-+  ins_cost(BRANCH_COST);
++  match(Set dst (MoveF2I src));
 +
-+  format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %}
++  effect(DEF dst, USE src);
 +
-+  ins_encode(riscv_enc_tail_call(jump_target));
++  ins_cost(XFER_COST);
++
++  format %{ "fmv.x.w  $dst, $src\t#@MoveL2D_reg_stack" %}
++
++  ins_encode %{
++    __ fmv_x_w(as_Register($dst$$reg), as_FloatRegister($src$$reg));
++  %}
++
++  ins_pipe(fp_f2i);
 +
-+  ins_pipe(pipe_class_call);
 +%}
 +
-+instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop)
-+%{
-+  match(TailJump jump_target ex_oop);
++instruct MoveI2F_reg_reg(fRegF dst, iRegI src) %{
 +
-+  ins_cost(ALU_COST + BRANCH_COST);
++  match(Set dst (MoveI2F src));
 +
-+  format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %}
++  effect(DEF dst, USE src);
 +
-+  ins_encode(riscv_enc_tail_jmp(jump_target));
++  ins_cost(XFER_COST);
++
++  format %{ "fmv.w.x  $dst, $src\t#@MoveI2F_reg_reg" %}
++
++  ins_encode %{
++    __ fmv_w_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
++  %}
++
++  ins_pipe(fp_i2f);
 +
-+  ins_pipe(pipe_class_call);
 +%}
 +
-+// Create exception oop: created by stack-crawling runtime code.
-+// Created exception is now available to this handler, and is setup
-+// just prior to jumping to this handler. No code emitted.
-+instruct CreateException(iRegP_R10 ex_oop)
-+%{
-+  match(Set ex_oop (CreateEx));
++instruct MoveD2L_reg_reg(iRegLNoSp dst, fRegD src) %{
 +
-+  ins_cost(0);
-+  format %{ " -- \t// exception oop; no code emitted, #@CreateException" %}
++  match(Set dst (MoveD2L src));
 +
-+  size(0);
++  effect(DEF dst, USE src);
 +
-+  ins_encode( /*empty*/ );
++  ins_cost(XFER_COST);
++
++  format %{ "fmv.x.d $dst, $src\t#@MoveD2L_reg_reg" %}
++
++  ins_encode %{
++    __ fmv_x_d(as_Register($dst$$reg), as_FloatRegister($src$$reg));
++  %}
++
++  ins_pipe(fp_d2l);
 +
-+  ins_pipe(pipe_class_empty);
 +%}
 +
-+// Rethrow exception: The exception oop will come in the first
-+// argument position. Then JUMP (not call) to the rethrow stub code.
-+instruct RethrowException()
-+%{
-+  match(Rethrow);
++instruct MoveL2D_reg_reg(fRegD dst, iRegL src) %{
 +
-+  ins_cost(BRANCH_COST);
++  match(Set dst (MoveL2D src));
 +
-+  format %{ "j rethrow_stub\t#@RethrowException" %}
++  effect(DEF dst, USE src);
 +
-+  ins_encode( riscv_enc_rethrow() );
++  ins_cost(XFER_COST);
 +
-+  ins_pipe(pipe_class_call);
++  format %{ "fmv.d.x  $dst, $src\t#@MoveD2L_reg_reg" %}
++
++  ins_encode %{
++    __ fmv_d_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
++  %}
++
++  ins_pipe(fp_l2d);
 +%}
 +
-+// Return Instruction
-+// epilog node loads ret address into ra as part of frame pop
-+instruct Ret()
++// ============================================================================
++// Compare Instructions which set the result float comparisons in dest register.
++
++instruct cmpF3_reg_reg(iRegINoSp dst, fRegF op1, fRegF op2)
 +%{
-+  match(Return);
++  match(Set dst (CmpF3 op1 op2));
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "ret\t// return register, #@Ret" %}
++  ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
++  format %{ "flt.s  $dst, $op2, $op1\t#@cmpF3_reg_reg\n\t"
++            "bgtz   $dst, done\n\t"
++            "feq.s  $dst, $op1, $op2\n\t"
++            "addi   $dst, $dst, -1\t#@cmpF3_reg_reg"
++  %}
 +
-+  ins_encode(riscv_enc_ret());
++  ins_encode %{
++    // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
++    __ float_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg),
++                     as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
++  %}
 +
-+  ins_pipe(pipe_branch);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+// Die now.
-+instruct ShouldNotReachHere() %{
-+  match(Halt);
-+
-+  ins_cost(BRANCH_COST);
++instruct cmpD3_reg_reg(iRegINoSp dst, fRegD op1, fRegD op2)
++%{
++  match(Set dst (CmpD3 op1 op2));
 +
-+  format %{ "#@ShouldNotReachHere" %}
++  ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
++  format %{ "flt.d  $dst, $op2, $op1\t#@cmpD3_reg_reg\n\t"
++            "bgtz   $dst, done\n\t"
++            "feq.d  $dst, $op1, $op2\n\t"
++            "addi   $dst, $dst, -1\t#@cmpD3_reg_reg"
++  %}
 +
 +  ins_encode %{
-+    if (is_reachable()) {
-+      __ halt();
-+    }
++    // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
++    __ double_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
 +  %}
 +
 +  ins_pipe(pipe_class_default);
 +%}
 +
++instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2)
++%{
++  match(Set dst (CmpL3 op1 op2));
 +
-+//----------PEEPHOLE RULES-----------------------------------------------------
-+// These must follow all instruction definitions as they use the names
-+// defined in the instructions definitions.
-+//
-+// peepmatch ( root_instr_name [preceding_instruction]* );
-+//
-+// peepconstraint %{
-+// (instruction_number.operand_name relational_op instruction_number.operand_name
-+//  [, ...] );
-+// // instruction numbers are zero-based using left to right order in peepmatch
-+//
-+// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
-+// // provide an instruction_number.operand_name for each operand that appears
-+// // in the replacement instruction's match rule
-+//
-+// ---------VM FLAGS---------------------------------------------------------
-+//
-+// All peephole optimizations can be turned off using -XX:-OptoPeephole
-+//
-+// Each peephole rule is given an identifying number starting with zero and
-+// increasing by one in the order seen by the parser.  An individual peephole
-+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
-+// on the command-line.
-+//
-+// ---------CURRENT LIMITATIONS----------------------------------------------
-+//
-+// Only match adjacent instructions in same basic block
-+// Only equality constraints
-+// Only constraints between operands, not (0.dest_reg == RAX_enc)
-+// Only one replacement instruction
-+//
-+//----------SMARTSPILL RULES---------------------------------------------------
-+// These must follow all instruction definitions as they use the names
-+// defined in the instructions definitions.
-+
-+// Local Variables:
-+// mode: c++
-+// End:
-diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad
-new file mode 100644
-index 000000000..6f7055a39
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/riscv_b.ad
-@@ -0,0 +1,605 @@
-+//
-+// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
-+// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
-+//
-+//
-+
-+// RISCV Bit-Manipulation Extension Architecture Description File
-+
-+instruct rorI_imm_b(iRegINoSp dst, iRegI src, immI rshift, immI lshift) %{
-+  match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
-+  predicate(UseZbb && ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) == 32));
-+  effect(DEF dst, USE src);
-+  
-+  format %{ "roriw  $dst, $src, ($rshift & 0x1f)\t#@rorI_imm_b" %}
-+
-+  ins_cost(ALU_COST);
++  ins_cost(ALU_COST * 3 + BRANCH_COST);
++  format %{ "slt   $dst, $op2, $op1\t#@cmpL3_reg_reg\n\t"
++            "bnez  $dst, done\n\t"
++            "slt  $dst, $op1, $op2\n\t"
++            "neg   $dst, $dst\t#@cmpL3_reg_reg"
++  %}
 +  ins_encode %{
-+    __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $rshift$$constant & 0x1f);
++    __ cmp_l2i(t0, as_Register($op1$$reg), as_Register($op2$$reg));
++    __ mv(as_Register($dst$$reg), t0);
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+instruct rorL_imm_b(iRegLNoSp dst, iRegL src, immI rshift, immI lshift) %{
-+  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
-+  predicate(UseZbb && ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) == 64));
-+  effect(DEF dst, USE src);
++instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q)
++%{
++  match(Set dst (CmpLTMask p q));
 +
-+  format %{ "rori  $dst, $src, ($rshift & 0x3f)\t#@rorL_imm_b" %}
++  ins_cost(2 * ALU_COST);
 +
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ rori(as_Register($dst$$reg), as_Register($src$$reg), $rshift$$constant & 0x3f);
++  format %{ "slt $dst, $p, $q\t#@cmpLTMask_reg_reg\n\t"
++            "subw $dst, zr, $dst\t#@cmpLTMask_reg_reg"
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
-+%}
-+
-+// ror expander
-+instruct rorI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{
-+  effect(DEF dst, USE src, USE shift);
-+
-+  format %{ "rorw  $dst, $src, $shift\t#@rorI_reg_b" %}
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
++    __ slt(as_Register($dst$$reg), as_Register($p$$reg), as_Register($q$$reg));
++    __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
 +  %}
 +
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+// ror expander
-+instruct rorL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{
-+  effect(DEF dst, USE src, USE shift);
++instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I op, immI0 zero)
++%{
++  match(Set dst (CmpLTMask op zero));
 +
-+  format %{ "ror  $dst, $src, $shift\t#@rorL_reg_b" %}
 +  ins_cost(ALU_COST);
++
++  format %{ "sraiw $dst, $dst, 31\t#@cmpLTMask_reg_reg" %}
++
 +  ins_encode %{
-+    __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
++    __ sraiw(as_Register($dst$$reg), as_Register($op$$reg), 31);
 +  %}
-+  ins_pipe(ialu_reg_reg);
++
++  ins_pipe(ialu_reg_shift);
 +%}
 +
 +
-+instruct rorI_rReg_Var_C_32_b(iRegINoSp dst, iRegI src, iRegI shift, immI_32 imm32) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI imm32 shift))));
++// ============================================================================
++// Max and Min
 +
-+  expand %{
-+    rorI_reg_b(dst, src, shift);
-+  %}
-+%}
++instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
++%{
++  match(Set dst (MinI src1 src2));
 +
-+instruct rorI_rReg_Var_C0_b(iRegINoSp dst, iRegI src, iRegI shift, immI0 zero) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI zero shift))));
++  effect(DEF dst, USE src1, USE src2);
 +
-+  expand %{
-+    rorI_reg_b(dst, src, shift);
++  ins_cost(BRANCH_COST + ALU_COST * 2);
++  format %{
++    "ble $src1, $src2, Lsrc1.\t#@minI_rReg\n\t"
++    "mv $dst, $src2\n\t"
++    "j Ldone\n\t"
++    "bind Lsrc1\n\t"
++    "mv $dst, $src1\n\t"
++    "bind\t#@minI_rReg"
 +  %}
-+%}
-+
-+instruct rorL_rReg_Var_C_64_b(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 imm64) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI imm64 shift))));
 +
-+  expand %{
-+    rorL_reg_b(dst, src, shift);
++  ins_encode %{
++    Label Lsrc1, Ldone;
++    __ ble(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
++    __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
++    __ j(Ldone);
++    __ bind(Lsrc1);
++    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
++    __ bind(Ldone);
 +  %}
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct rorL_rReg_Var_C0_b(iRegLNoSp dst, iRegL src, iRegI shift, immI0 zero) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI zero shift))));
++instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
++%{
++  match(Set dst (MaxI src1 src2));
 +
-+  expand %{
-+    rorL_reg_b(dst, src, shift);
-+  %}
-+%}
++  effect(DEF dst, USE src1, USE src2);
 +
-+// rol expander
-+instruct rolI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{
-+  effect(DEF dst, USE src, USE shift);
++  ins_cost(BRANCH_COST + ALU_COST * 2);
++  format %{
++    "bge $src1, $src2, Lsrc1\t#@maxI_rReg\n\t"
++    "mv $dst, $src2\n\t"
++    "j Ldone\n\t"
++    "bind Lsrc1\n\t"
++    "mv $dst, $src1\n\t"
++    "bind\t#@maxI_rReg"
++  %}
 +
-+  format %{ "rolw  $dst, $src, $shift\t#@rolI_reg_b" %}
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
++    Label Lsrc1, Ldone;
++    __ bge(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
++    __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
++    __ j(Ldone);
++    __ bind(Lsrc1);
++    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
++    __ bind(Ldone);
++
 +  %}
 +
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+// rol expander
-+instruct rolL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{
-+  effect(DEF dst, USE src, USE shift);
++// ============================================================================
++// Branch Instructions
++// Direct Branch.
++instruct branch(label lbl)
++%{
++  match(Goto);
 +
-+  format %{ "rol  $dst, $src, $shift\t#@rolL_reg_b" %}
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
-+  %}
-+  
-+  ins_pipe(ialu_reg_reg);
-+%}
++  effect(USE lbl);
 +
-+instruct rolI_rReg_Var_C_32_b(iRegINoSp dst, iRegI src, iRegI shift, immI_32 imm32) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI imm32 shift))));
++  ins_cost(BRANCH_COST);
++  format %{ "j  $lbl\t#@branch" %}
 +
-+  expand %{
-+    rolI_reg_b(dst, src, shift);
-+  %}
++  ins_encode(riscv_enc_j(lbl));
++
++  ins_pipe(pipe_branch);
 +%}
 +
-+instruct rolI_rReg_Var_C0_b(iRegINoSp dst, iRegI src, iRegI shift, immI0 zero) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI zero shift))));
++// ============================================================================
++// Compare and Branch Instructions
++
++// Patterns for short (< 12KiB) variants
 +
-+  expand %{
-+    rolI_reg_b(dst, src, shift);
-+  %}
-+%}
++// Compare flags and branch near instructions.
++instruct cmpFlag_branch(cmpOpEqNe cmp, rFlagsReg cr, label lbl) %{
++  match(If cmp cr);
++  effect(USE lbl);
 +
-+instruct rolL_rReg_Var_C_64_b(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 imm64) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI imm64 shift))));
++  ins_cost(BRANCH_COST);
++  format %{ "b$cmp  $cr, zr, $lbl\t#@cmpFlag_branch" %}
 +
-+  expand %{
-+    rolL_reg_b(dst, src, shift);
++  ins_encode %{
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label));
 +  %}
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct rolL_rReg_Var_C0_b(iRegLNoSp dst, iRegL src, iRegI shift, immI0 zero) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI zero shift))));
++// Compare signed int and branch near instructions
++instruct cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
++%{
++  // Same match rule as `far_cmpI_branch'.
++  match(If cmp (CmpI op1 op2));
 +
-+  expand %{
-+    rolL_reg_b(dst, src, shift);
-+  %}
-+%}
++  effect(USE lbl);
 +
-+// Convert oop into int for vectors alignment masking
-+instruct convP2I_b(iRegINoSp dst, iRegP src) %{
-+  predicate(UseZba);
-+  match(Set dst (ConvL2I (CastP2X src)));
++  ins_cost(BRANCH_COST);
 +
-+  format %{ "zext.w  $dst, $src\t# ptr -> int @convP2I_b" %}
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpI_branch" %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+// byte to int
-+instruct convB2I_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{
-+  predicate(UseZbb);
-+  match(Set dst (RShiftI (LShiftI src lshift) rshift));
++instruct cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
++%{
++  // Same match rule as `far_cmpI_loop'.
++  match(CountedLoopEnd cmp (CmpI op1 op2));
++
++  effect(USE lbl);
 +
-+  format %{ "sext.b  $dst, $src\t# b2i, #@convB2I_reg_reg_b" %}
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpI_loop" %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ sext_b(as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+// int to short
-+instruct convI2S_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{
-+  predicate(UseZbb);
-+  match(Set dst (RShiftI (LShiftI src lshift) rshift));
++// Compare unsigned int and branch near instructions
++instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
++%{
++  // Same match rule as `far_cmpU_branch'.
++  match(If cmp (CmpU op1 op2));
 +
-+  format %{ "sext.h  $dst, $src\t# i2s, #@convI2S_reg_reg_b" %}
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_branch" %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ sext_h(as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+// short to unsigned int
-+instruct convS2UI_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{
-+  predicate(UseZbb);
-+  match(Set dst (AndI src mask));
++instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
++%{
++  // Same match rule as `far_cmpU_loop'.
++  match(CountedLoopEnd cmp (CmpU op1 op2));
 +
-+  format %{ "zext.h  $dst, $src\t# s2ui, #@convS2UI_reg_reg_b" %}
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_loop" %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ zext_h(as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+// int to unsigned long (zero extend)
-+instruct convI2UL_reg_reg_b(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{
-+  predicate(UseZba);
-+  match(Set dst (AndL (ConvI2L src) mask));
++// Compare signed long and branch near instructions
++instruct cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
++%{
++  // Same match rule as `far_cmpL_branch'.
++  match(If cmp (CmpL op1 op2));
 +
-+  format %{ "zext.w  $dst, $src\t# i2ul, #@convI2UL_reg_reg_b" %}
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpL_branch" %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+// BSWAP instructions
-+instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseZbb);
-+  match(Set dst (ReverseBytesI src));
++instruct cmpL_loop(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
++%{
++  // Same match rule as `far_cmpL_loop'.
++  match(CountedLoopEnd cmp (CmpL op1 op2));
 +
-+  ins_cost(ALU_COST * 2);
-+  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int_b" %}
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpL_loop" %}
 +
 +  ins_encode %{
-+    __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct bytes_reverse_long_b(iRegLNoSp dst, iRegL src) %{
-+  predicate(UseZbb);
-+  match(Set dst (ReverseBytesL src));
++// Compare unsigned long and branch near instructions
++instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
++%{
++  // Same match rule as `far_cmpUL_branch'.
++  match(If cmp (CmpUL op1 op2));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "rev8  $dst, $src\t#@bytes_reverse_long_b" %}
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_branch" %}
 +
 +  ins_encode %{
-+    __ rev8(as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseZbb);
-+  match(Set dst (ReverseBytesUS src));
++instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
++%{
++  // Same match rule as `far_cmpUL_loop'.
++  match(CountedLoopEnd cmp (CmpUL op1 op2));
 +
-+  ins_cost(ALU_COST * 2);
-+  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short_b" %}
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_loop" %}
 +
 +  ins_encode %{
-+    __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseZbb);
-+  match(Set dst (ReverseBytesS src));
++// Compare pointer and branch near instructions
++instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
++%{
++  // Same match rule as `far_cmpP_branch'.
++  match(If cmp (CmpP op1 op2));
 +
-+  ins_cost(ALU_COST * 2);
-+  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short_b" %}
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_branch" %}
 +
 +  ins_encode %{
-+    __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+// Shift Add Pointer
-+instruct shaddP_reg_reg_b(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{
-+  predicate(UseZba);
-+  match(Set dst (AddP src1 (LShiftL src2 imm)));
++instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
++%{
++  // Same match rule as `far_cmpP_loop'.
++  match(CountedLoopEnd cmp (CmpP op1 op2));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_b" %}
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_loop" %}
 +
 +  ins_encode %{
-+    __ shadd(as_Register($dst$$reg),
-+             as_Register($src2$$reg),
-+             as_Register($src1$$reg),
-+             t0,
-+             $imm$$constant);
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct shaddP_reg_reg_ext_b(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{
-+  predicate(UseZba);
-+  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm)));
++// Compare narrow pointer and branch near instructions
++instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
++%{
++  // Same match rule as `far_cmpN_branch'.
++  match(If cmp (CmpN op1 op2));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_b" %}
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_branch" %}
 +
 +  ins_encode %{
-+    __ shadd(as_Register($dst$$reg),
-+             as_Register($src2$$reg),
-+             as_Register($src1$$reg),
-+             t0,
-+             $imm$$constant);
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+// Shift Add Long
-+instruct shaddL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{
-+  predicate(UseZba);
-+  match(Set dst (AddL src1 (LShiftL src2 imm)));
++instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
++%{
++  // Same match rule as `far_cmpN_loop'.
++  match(CountedLoopEnd cmp (CmpN op1 op2));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_b" %}
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_loop" %}
 +
 +  ins_encode %{
-+    __ shadd(as_Register($dst$$reg),
-+             as_Register($src2$$reg),
-+             as_Register($src1$$reg),
-+             t0,
-+             $imm$$constant);
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct shaddL_reg_reg_ext_b(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{
-+  predicate(UseZba);
-+  match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm)));
++// Compare float and branch near instructions
++instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
++%{
++  // Same match rule as `far_cmpF_branch'.
++  match(If cmp (CmpF op1 op2));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_b" %}
++  effect(USE lbl);
++
++  ins_cost(XFER_COST + BRANCH_COST);
++  format %{ "float_b$cmp $op1, $op2 \t#@cmpF_branch"%}
 +
 +  ins_encode %{
-+    __ shadd(as_Register($dst$$reg),
-+             as_Register($src2$$reg),
-+             as_Register($src1$$reg),
-+             t0,
-+             $imm$$constant);
++    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_class_compare);
++  ins_short_branch(1);
 +%}
 +
-+// Zeros Count instructions
-+instruct countLeadingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseZbb);
-+  match(Set dst (CountLeadingZerosI src));
++instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
++%{
++  // Same match rule as `far_cmpF_loop'.
++  match(CountedLoopEnd cmp (CmpF op1 op2));
++  effect(USE lbl);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "clzw  $dst, $src\t#@countLeadingZerosI_b" %}
++  ins_cost(XFER_COST + BRANCH_COST);
++  format %{ "float_b$cmp $op1, $op2\t#@cmpF_loop"%}
 +
 +  ins_encode %{
-+    __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
++    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_class_compare);
++  ins_short_branch(1);
 +%}
 +
-+instruct countLeadingZerosL_b(iRegINoSp dst, iRegL src) %{
-+  predicate(UseZbb);
-+  match(Set dst (CountLeadingZerosL src));
++// Compare double and branch near instructions
++instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
++%{
++  // Same match rule as `far_cmpD_branch'.
++  match(If cmp (CmpD op1 op2));
++  effect(USE lbl);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "clz  $dst, $src\t#@countLeadingZerosL_b" %}
++  ins_cost(XFER_COST + BRANCH_COST);
++  format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%}
 +
 +  ins_encode %{
-+    __ clz(as_Register($dst$$reg), as_Register($src$$reg));
++    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++                        as_FloatRegister($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_class_compare);
++  ins_short_branch(1);
 +%}
 +
-+instruct countTrailingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseZbb);
-+  match(Set dst (CountTrailingZerosI src));
++instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
++%{
++  // Same match rule as `far_cmpD_loop'.
++  match(CountedLoopEnd cmp (CmpD op1 op2));
++  effect(USE lbl);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "ctzw  $dst, $src\t#@countTrailingZerosI_b" %}
++  ins_cost(XFER_COST + BRANCH_COST);
++  format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%}
 +
 +  ins_encode %{
-+    __ ctzw(as_Register($dst$$reg), as_Register($src$$reg));
++    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++                        as_FloatRegister($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_class_compare);
++  ins_short_branch(1);
 +%}
 +
-+instruct countTrailingZerosL_b(iRegINoSp dst, iRegL src) %{
-+  predicate(UseZbb);
-+  match(Set dst (CountTrailingZerosL src));
++// Compare signed int with zero and branch near instructions
++instruct cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  // Same match rule as `far_cmpI_reg_imm0_branch'.
++  match(If cmp (CmpI op1 zero));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "ctz  $dst, $src\t#@countTrailingZerosL_b" %}
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST);
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpI_reg_imm0_branch" %}
 +
 +  ins_encode %{
-+    __ ctz(as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+// Population Count instructions
-+instruct popCountI_b(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UsePopCountInstruction);
-+  match(Set dst (PopCountI src));
++instruct cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  // Same match rule as `far_cmpI_reg_imm0_loop'.
++  match(CountedLoopEnd cmp (CmpI op1 zero));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "cpopw  $dst, $src\t#@popCountI_b" %}
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpI_reg_imm0_loop" %}
 +
 +  ins_encode %{
-+    __ cpopw(as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+// Note: Long/bitCount(long) returns an int.
-+instruct popCountL_b(iRegINoSp dst, iRegL src) %{
-+  predicate(UsePopCountInstruction);
-+  match(Set dst (PopCountL src));
++// Compare unsigned int with zero and branch near instructions
++instruct cmpUEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_branch'.
++  match(If cmp (CmpU op1 zero));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "cpop  $dst, $src\t#@popCountL_b" %}
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_branch" %}
 +
 +  ins_encode %{
-+    __ cpop(as_Register($dst$$reg), as_Register($src$$reg));
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+// Max and Min
-+instruct minI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
-+  predicate(UseZbb);
-+  match(Set dst (MinI src1 src2));
++instruct cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_loop'.
++  match(CountedLoopEnd cmp (CmpU op1 zero));
++
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_loop" %}
 +
-+  ins_cost(ALU_COST);
-+  format %{ "min  $dst, $src1, $src2\t#@minI_reg_b" %}
 +
 +  ins_encode %{
-+    __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct maxI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
-+  predicate(UseZbb);
-+  match(Set dst (MaxI src1 src2));
++// Compare signed long with zero and branch near instructions
++instruct cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  // Same match rule as `far_cmpL_reg_imm0_branch'.
++  match(If cmp (CmpL op1 zero));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "max  $dst, $src1, $src2\t#@maxI_reg_b" %}
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpL_reg_imm0_branch" %}
 +
 +  ins_encode %{
-+    __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+// Abs
-+instruct absI_reg_b(iRegINoSp dst, iRegI src) %{
-+  predicate(UseZbb);
-+  match(Set dst (AbsI src));
++instruct cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  // Same match rule as `far_cmpL_reg_imm0_loop'.
++  match(CountedLoopEnd cmp (CmpL op1 zero));
 +
-+  ins_cost(ALU_COST * 2);
-+  format %{
-+    "negw  t0, $src\n\t"
-+    "max  $dst, $src, t0\t#@absI_reg_b"
-+  %}
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpL_reg_imm0_loop" %}
 +
 +  ins_encode %{
-+    __ negw(t0, as_Register($src$$reg));
-+    __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct absL_reg_b(iRegLNoSp dst, iRegL src) %{
-+  predicate(UseZbb);
-+  match(Set dst (AbsL src));
++// Compare unsigned long with zero and branch near instructions
++instruct cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_branch'.
++  match(If cmp (CmpUL op1 zero));
 +
-+  ins_cost(ALU_COST * 2);
-+  format %{
-+    "neg  t0, $src\n\t"
-+    "max $dst, $src, t0\t#@absL_reg_b"
-+  %}
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_branch" %}
 +
 +  ins_encode %{
-+    __ neg(t0, as_Register($src$$reg));
-+    __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+// And Not
-+instruct andnI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
-+  predicate(UseZbb);
-+  match(Set dst (AndI src1 (XorI src2 m1)));
++instruct cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_loop'.
++  match(CountedLoopEnd cmp (CmpUL op1 zero));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "andn  $dst, $src1, $src2\t#@andnI_reg_reg_b" %}
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_loop" %}
 +
 +  ins_encode %{
-+    __ andn(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct andnL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
-+  predicate(UseZbb);
-+  match(Set dst (AndL src1 (XorL src2 m1)));
++// Compare pointer with zero and branch near instructions
++instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
++  // Same match rule as `far_cmpP_reg_imm0_branch'.
++  match(If cmp (CmpP op1 zero));
++  effect(USE lbl);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "andn  $dst, $src1, $src2\t#@andnL_reg_reg_b" %}
++  ins_cost(BRANCH_COST);
++  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_imm0_branch" %}
 +
 +  ins_encode %{
-+    __ andn(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+// Or Not
-+instruct ornI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrI src1 (XorI src2 m1)));
++instruct cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
++  // Same match rule as `far_cmpP_reg_imm0_loop'.
++  match(CountedLoopEnd cmp (CmpP op1 zero));
++  effect(USE lbl);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "orn  $dst, $src1, $src2\t#@ornI_reg_reg_b" %}
++  ins_cost(BRANCH_COST);
++  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_imm0_loop" %}
 +
 +  ins_encode %{
-+    __ orn(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct ornL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrL src1 (XorL src2 m1)));
++// Compare narrow pointer with zero and branch near instructions
++instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
++  // Same match rule as `far_cmpN_reg_imm0_branch'.
++  match(If cmp (CmpN op1 zero));
++  effect(USE lbl);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "orn  $dst, $src1, $src2\t#@ornL_reg_reg_b" %}
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpN_imm0_branch" %}
 +
 +  ins_encode %{
-+    __ orn(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
-diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
-new file mode 100644
-index 000000000..905041890
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/riscv_v.ad
-@@ -0,0 +1,1723 @@
-+//
-+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-+// Copyright (c) 2020, Arm Limited. All rights reserved.
-+// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
-+//
-+//
 +
-+// RISCV VEC Architecture Description File
++instruct cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
++  // Same match rule as `far_cmpN_reg_imm0_loop'.
++  match(CountedLoopEnd cmp (CmpN op1 zero));
++  effect(USE lbl);
 +
-+opclass vmemA(indirect);
++  ins_cost(BRANCH_COST);
 +
-+source_hpp %{
-+  bool op_vec_supported(int opcode);
-+%}
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpN_imm0_loop" %}
 +
-+source %{
++  ins_encode %{
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++  %}
 +
-+  static inline BasicType vector_element_basic_type(const MachNode* n) {
-+    const TypeVect* vt = n->bottom_type()->is_vect();
-+    return vt->element_basic_type();
-+  }
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
++%}
 +
-+  static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
-+    int def_idx = use->operand_index(opnd);
-+    Node* def = use->in(def_idx);
-+    const TypeVect* vt = def->bottom_type()->is_vect();
-+    return vt->element_basic_type();
-+  }
++// Compare narrow pointer with pointer zero and branch near instructions
++instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
++  // Same match rule as `far_cmpP_narrowOop_imm0_branch'.
++  match(If cmp (CmpP (DecodeN op1) zero));
++  effect(USE lbl);
 +
-+  static void loadStore(MacroAssembler masm, bool is_store,
-+                        VectorRegister reg, BasicType bt, Register base) {
-+    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
-+    masm.vsetvli(t0, x0, sew);
-+    if (is_store) {
-+      masm.vsex_v(reg, base, sew);
-+    } else {
-+      masm.vlex_v(reg, base, sew);
-+    }
-+  }
++  ins_cost(BRANCH_COST);
++  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_branch" %}
 +
-+  bool op_vec_supported(int opcode) {
-+    switch (opcode) {
-+      // No multiply reduction instructions
-+      case Op_MulReductionVD:
-+      case Op_MulReductionVF:
-+      case Op_MulReductionVI:
-+      case Op_MulReductionVL:
-+      // Others
-+      case Op_Extract:
-+      case Op_ExtractB:
-+      case Op_ExtractC:
-+      case Op_ExtractD:
-+      case Op_ExtractF:
-+      case Op_ExtractI:
-+      case Op_ExtractL:
-+      case Op_ExtractS:
-+      case Op_ExtractUB:
-+        return false;
-+      default:
-+        return UseRVV;
-+    }
-+  }
++  ins_encode %{
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++  %}
 +
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+definitions %{
-+  int_def VEC_COST             (200, 200);
-+%}
++instruct cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
++  // Same match rule as `far_cmpP_narrowOop_imm0_loop'.
++  match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
++  effect(USE lbl);
 +
-+// All VEC instructions
++  ins_cost(BRANCH_COST);
++  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_loop" %}
 +
-+// vector load/store
-+instruct loadV(vReg dst, vmemA mem) %{
-+  match(Set dst (LoadVector mem));
-+  ins_cost(VEC_COST);
-+  format %{ "vle $dst, $mem\t#@loadV" %}
 +  ins_encode %{
-+    VectorRegister dst_reg = as_VectorRegister($dst$$reg);
-+    loadStore(MacroAssembler(&cbuf), false, dst_reg,
-+              vector_element_basic_type(this), as_Register($mem$$base));
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct storeV(vReg src, vmemA mem) %{
-+  match(Set mem (StoreVector mem src));
-+  ins_cost(VEC_COST);
-+  format %{ "vse $src, $mem\t#@storeV" %}
++// Patterns for far (20KiB) variants
++
++instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{
++  match(If cmp cr);
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++  format %{ "far_b$cmp $cr, zr, L\t#@far_cmpFlag_branch"%}
++
 +  ins_encode %{
-+    VectorRegister src_reg = as_VectorRegister($src$$reg);
-+    loadStore(MacroAssembler(&cbuf), true, src_reg,
-+              vector_element_basic_type(this, $src), as_Register($mem$$base));
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// vector abs
++// Compare signed int and branch far instructions
++instruct far_cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
++  match(If cmp (CmpI op1 op2));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  // the format instruction [far_b$cmp] here is be used as two insructions
++  // in macroassembler: b$not_cmp(op1, op2, done), j($lbl), bind(done)
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpI_branch" %}
 +
-+instruct vabsB(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVB src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct vabsS(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVS src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
++instruct far_cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
++  match(CountedLoopEnd cmp (CmpI op1 op2));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpI_loop" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct vabsI(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVI src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
++instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
++  match(If cmp (CmpU op1 op2));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct vabsL(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVL src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
++instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
++  match(CountedLoopEnd cmp (CmpU op1 op2));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct vabsF(vReg dst, vReg src) %{
-+  match(Set dst (AbsVF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %}
++instruct far_cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
++  match(If cmp (CmpL op1 op2));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpL_branch" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct vabsD(vReg dst, vReg src) %{
-+  match(Set dst (AbsVD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %}
++instruct far_cmpLloop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
++  match(CountedLoopEnd cmp (CmpL op1 op2));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpL_loop" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+// vector add
++instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
++  match(If cmp (CmpUL op1 op2));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_branch" %}
 +
-+instruct vaddB(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVB src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct vaddS(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVS src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %}
++instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
++  match(CountedLoopEnd cmp (CmpUL op1 op2));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_loop" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct vaddI(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVI src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %}
++instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
++%{
++  match(If cmp (CmpP op1 op2));
++
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_branch" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct vaddL(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVL src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %}
++instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpP op1 op2));
++
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_loop" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct vaddF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %}
++instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
++%{
++  match(If cmp (CmpN op1 op2));
++
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_branch" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfadd_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct vaddD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %}
++instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpN op1 op2));
++
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_loop" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfadd_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
++    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+// vector and
++// Float compare and branch instructions
++instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
++%{
++  match(If cmp (CmpF op1 op2));
++
++  effect(USE lbl);
++
++  ins_cost(XFER_COST + BRANCH_COST * 2);
++  format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_branch"%}
 +
-+instruct vand(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AndV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vand.vv  $dst, $src1, $src2\t#@vand" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vand_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
++                        *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+// vector or
++instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpF op1 op2));
++  effect(USE lbl);
++
++  ins_cost(XFER_COST + BRANCH_COST * 2);
++  format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_loop"%}
 +
-+instruct vor(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (OrV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vor.vv  $dst, $src1, $src2\t#@vor" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vor_vv(as_VectorRegister($dst$$reg),
-+              as_VectorRegister($src1$$reg),
-+              as_VectorRegister($src2$$reg));
++    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
++                        *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+// vector xor
++// Double compare and branch instructions
++instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
++%{
++  match(If cmp (CmpD op1 op2));
++  effect(USE lbl);
++
++  ins_cost(XFER_COST + BRANCH_COST * 2);
++  format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%}
 +
-+instruct vxor(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (XorV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vxor.vv  $dst, $src1, $src2\t#@vxor" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vxor_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+// vector float div
++instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpD op1 op2));
++  effect(USE lbl);
++
++  ins_cost(XFER_COST + BRANCH_COST * 2);
++  format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%}
 +
-+instruct vdivF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (DivVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivF" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfdiv_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
++    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+instruct vdivD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (DivVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivD" %}
++instruct far_cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  match(If cmp (CmpI op1 zero));
++
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpI_reg_imm0_branch" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfdiv_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// vector fmla
++instruct far_cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpI op1 zero));
++
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpI_reg_imm0_loop" %}
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %}
++instruct far_cmpUEqNeLeGt_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  match(If cmp (CmpU op1 zero));
++
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_imm0_branch" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// vector fmls
++instruct far_cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpU op1 zero));
++
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_reg_imm0_loop" %}
++
 +
-+// dst_src1 = dst_src1 + -src2 * src3
-+// dst_src1 = dst_src1 + src2 * -src3
-+instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
-+  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// dst_src1 = dst_src1 + -src2 * src3
-+// dst_src1 = dst_src1 + src2 * -src3
-+instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
-+  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %}
++// compare lt/ge unsigned instructs has no short instruct with same match
++instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  match(If cmp (CmpU op1 zero));
++
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %}
++
++  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
++
++  ins_pipe(pipe_cmpz_branch);
++%}
++
++instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpU op1 zero));
++
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %}
++
++  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
++
++  ins_pipe(pipe_cmpz_branch);
++%}
++
++instruct far_cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  match(If cmp (CmpL op1 zero));
++
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpL_reg_imm0_branch" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// vector fnmla
++instruct far_cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpL op1 zero));
++
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpL_reg_imm0_loop" %}
 +
-+// dst_src1 = -dst_src1 + -src2 * src3
-+// dst_src1 = -dst_src1 + src2 * -src3
-+instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
-+  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// dst_src1 = -dst_src1 + -src2 * src3
-+// dst_src1 = -dst_src1 + src2 * -src3
-+instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
-+  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %}
++instruct far_cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  match(If cmp (CmpUL op1 zero));
++
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_branch" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// vector fnmls
++instruct far_cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpUL op1 zero));
++
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_loop" %}
 +
-+// dst_src1 = -dst_src1 + src2 * src3
-+instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// dst_src1 = -dst_src1 + src2 * src3
-+instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %}
++// compare lt/ge unsigned instructs has no short instruct with same match
++instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  match(If cmp (CmpUL op1 zero));
++
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %}
++
++  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
++
++  ins_pipe(pipe_cmpz_branch);
++%}
++
++instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpUL op1 zero));
++
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %}
++
++  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
++
++  ins_pipe(pipe_cmpz_branch);
++%}
++
++instruct far_cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_imm0_branch" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// vector mla
++instruct far_cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
++  match(CountedLoopEnd cmp (CmpP op1 zero));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_imm0_loop" %}
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %}
++instruct far_cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
++  match(If cmp (CmpN op1 zero));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpN_imm0_branch" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %}
++instruct far_cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
++  match(CountedLoopEnd cmp (CmpN op1 zero));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpN_imm0_loop" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %}
++instruct far_cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_branch" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// vector mls
++instruct far_cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
++  match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_loop" %}
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %}
++// ============================================================================
++// Conditional Move Instructions
++instruct cmovI_cmpI(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOp cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
++
++  format %{
++             "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t"
++             "mv $dst, $src\n\t"
++             "skip:"
++         %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ enc_cmove($cop$$cmpcode,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
++
 +  ins_pipe(pipe_slow);
 +%}
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %}
++instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpU op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
++
++  format %{
++             "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t"
++             "mv $dst, $src\n\t"
++             "skip:"
++         %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
++
 +  ins_pipe(pipe_slow);
 +%}
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %}
++instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
++
++  format %{
++             "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t"
++             "mv $dst, $src\n\t"
++             "skip:"
++         %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ enc_cmove($cop$$cmpcode,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
++
 +  ins_pipe(pipe_slow);
 +%}
 +
-+// vector mul
++instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
++
++  format %{
++             "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t"
++             "mv $dst, $src\n\t"
++             "skip:"
++         %}
 +
-+instruct vmulB(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVB src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    __ enc_cmove($cop$$cmpcode,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
++
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct vmulS(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVS src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %}
++instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
++
++  format %{
++             "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t"
++             "mv $dst, $src\n\t"
++             "skip:"
++         %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
++
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct vmulI(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVI src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %}
++instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
++  format %{
++             "bneg$cop $op1, $op2\t#@cmovI_cmpUL\n\t"
++             "mv $dst, $src\n\t"
++             "skip:"
++         %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
++
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct vmulL(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVL src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++
++// ============================================================================
++// Procedure Call/Return Instructions
++
++// Call Java Static Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallStaticJavaDirect(method meth)
++%{
++  match(CallStaticJava);
++
++  effect(USE meth);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %}
++
++  ins_encode(riscv_enc_java_static_call(meth),
++             riscv_enc_call_epilog);
++
++  ins_pipe(pipe_class_call);
++  ins_alignment(4);
 +%}
 +
-+instruct vmulF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++// TO HERE
++
++// Call Java Dynamic Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallDynamicJavaDirect(method meth, rFlagsReg cr)
++%{
++  match(CallDynamicJava);
++
++  effect(USE meth, KILL cr);
++
++  ins_cost(BRANCH_COST + ALU_COST * 6);
++
++  format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %}
++
++  ins_encode(riscv_enc_java_dynamic_call(meth),
++             riscv_enc_call_epilog);
++
++  ins_pipe(pipe_class_call);
++  ins_alignment(4);
 +%}
 +
-+instruct vmulD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++// Call Runtime Instruction
++
++instruct CallRuntimeDirect(method meth, rFlagsReg cr)
++%{
++  match(CallRuntime);
++
++  effect(USE meth, KILL cr);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %}
++
++  ins_encode(riscv_enc_java_to_runtime(meth));
++
++  ins_pipe(pipe_class_call);
 +%}
 +
-+// vector fneg
++// Call Runtime Instruction
 +
-+instruct vnegF(vReg dst, vReg src) %{
-+  match(Set dst (NegVF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++instruct CallLeafDirect(method meth, rFlagsReg cr)
++%{
++  match(CallLeaf);
++
++  effect(USE meth, KILL cr);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %}
++
++  ins_encode(riscv_enc_java_to_runtime(meth));
++
++  ins_pipe(pipe_class_call);
 +%}
 +
-+instruct vnegD(vReg dst, vReg src) %{
-+  match(Set dst (NegVD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++// Call Runtime Instruction
++
++instruct CallLeafNoFPDirect(method meth, rFlagsReg cr)
++%{
++  match(CallLeafNoFP);
++
++  effect(USE meth, KILL cr);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %}
++
++  ins_encode(riscv_enc_java_to_runtime(meth));
++
++  ins_pipe(pipe_class_call);
 +%}
 +
-+// popcount vector
++// ============================================================================
++// Partial Subtype Check
++//
++// superklass array for an instance of the superklass.  Set a hidden
++// internal cache on a hit (cache is checked with exposed code in
++// gen_subtype_check()).  Return zero for a hit.  The encoding
++// ALSO sets flags.
 +
-+instruct vpopcountI(iRegINoSp dst, vReg src) %{
-+  match(Set dst (PopCountVI src));
-+  format %{ "vpopc.m $dst, $src\t#@vpopcountI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++instruct partialSubtypeCheck(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, rFlagsReg cr)
++%{
++  match(Set result (PartialSubtypeCheck sub super));
++  effect(KILL tmp, KILL cr);
++
++  ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
++  format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %}
++
++  ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result));
++
++  opcode(0x1); // Force zero of result reg on hit
++
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+// vector add reduction
++instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp,
++                                   immP0 zero, rFlagsReg cr)
++%{
++  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
++  effect(KILL tmp, KILL result);
 +
-+instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
-+  match(Set dst (AddReductionVI src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
++  format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %}
++
++  ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result));
++
++  opcode(0x0); // Don't zero result reg on hit
++
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
-+  match(Set dst (AddReductionVI src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
++instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
++%{
++  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++
++  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
++                      StrIntrinsicNode::UU);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
-+  match(Set dst (AddReductionVI src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
++instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
++%{
++  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++
++  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
++                      StrIntrinsicNode::LL);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
-+  match(Set dst (AddReductionVL src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
++instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
++%{
++  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++
++  format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
++                      StrIntrinsicNode::UL);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{
-+  match(Set src1_dst (AddReductionVF src1_dst src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t"
-+            "vfredosum.vs $tmp, $src2, $tmp\n\t"
-+            "vfmv.f.s $src1_dst, $tmp" %}
++instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3,
++                          rFlagsReg cr)
++%{
++  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
++  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++
++  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
-+    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+         as_VectorRegister($tmp$$reg));
-+    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
++                      StrIntrinsicNode::LU);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{
-+  match(Set src1_dst (AddReductionVD src1_dst src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t"
-+            "vfredosum.vs $tmp, $src2, $tmp\n\t"
-+            "vfmv.f.s $src1_dst, $tmp" %}
++instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                          iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
++                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
++
++  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
-+    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                    as_VectorRegister($tmp$$reg));
-+    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
++    __ string_indexof($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register,
++                      $tmp1$$Register, $tmp2$$Register,
++                      $tmp3$$Register, $tmp4$$Register,
++                      $tmp5$$Register, $tmp6$$Register,
++                      $result$$Register, StrIntrinsicNode::UU);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+// vector replicate
++instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                          iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
++                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
 +
-+instruct replicateB(vReg dst, iRegIorL2I src) %{
-+  match(Set dst (ReplicateB src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateB" %}
++  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
++    __ string_indexof($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register,
++                      $tmp1$$Register, $tmp2$$Register,
++                      $tmp3$$Register, $tmp4$$Register,
++                      $tmp5$$Register, $tmp6$$Register,
++                      $result$$Register, StrIntrinsicNode::LL);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct replicateS(vReg dst, iRegIorL2I src) %{
-+  match(Set dst (ReplicateS src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateS" %}
++instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                          iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
++                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
++  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
++    __ string_indexof($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register,
++                      $tmp1$$Register, $tmp2$$Register,
++                      $tmp3$$Register, $tmp4$$Register,
++                      $tmp5$$Register, $tmp6$$Register,
++                      $result$$Register, StrIntrinsicNode::UL);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct replicateI(vReg dst, iRegIorL2I src) %{
-+  match(Set dst (ReplicateI src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateI" %}
++instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
++                              immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
++                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
++
++  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
++    int icnt2 = (int)$int_cnt2$$constant;
++    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
++                                 $cnt1$$Register, zr,
++                                 $tmp1$$Register, $tmp2$$Register,
++                                 $tmp3$$Register, $tmp4$$Register,
++                                 icnt2, $result$$Register, StrIntrinsicNode::UU);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct replicateL(vReg dst, iRegL src) %{
-+  match(Set dst (ReplicateL src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateL" %}
++instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
++                              immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
++                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
++
++  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
++    int icnt2 = (int)$int_cnt2$$constant;
++    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
++                                 $cnt1$$Register, zr,
++                                 $tmp1$$Register, $tmp2$$Register,
++                                 $tmp3$$Register, $tmp4$$Register,
++                                 icnt2, $result$$Register, StrIntrinsicNode::LL);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct replicateB_imm5(vReg dst, immI5 con) %{
-+  match(Set dst (ReplicateB con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateB_imm5" %}
++instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
++                              immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
++                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
++
++  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
++    int icnt2 = (int)$int_cnt2$$constant;
++    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
++                                 $cnt1$$Register, zr,
++                                 $tmp1$$Register, $tmp2$$Register,
++                                 $tmp3$$Register, $tmp4$$Register,
++                                 icnt2, $result$$Register, StrIntrinsicNode::UL);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct replicateS_imm5(vReg dst, immI5 con) %{
-+  match(Set dst (ReplicateS con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateS_imm5" %}
++instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
++                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
++                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
++%{
++  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
++  predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
++  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
++
++  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
++    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
++                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
++                           $tmp3$$Register, $tmp4$$Register, false /* isU */);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct replicateI_imm5(vReg dst, immI5 con) %{
-+  match(Set dst (ReplicateI con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateI_imm5" %}
++
++instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
++                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
++                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
++%{
++  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
++  predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
++  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
++
++  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
++    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
++                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
++                           $tmp3$$Register, $tmp4$$Register, true /* isL */);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct replicateL_imm5(vReg dst, immL5 con) %{
-+  match(Set dst (ReplicateL con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateL_imm5" %}
++// clearing of an array
++instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
++%{
++  predicate(!UseRVV);
++  match(Set dummy (ClearArray cnt base));
++  effect(USE_KILL cnt, USE_KILL base);
++
++  ins_cost(4 * DEFAULT_COST);
++  format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
++    address tpc = __ zero_words($base$$Register, $cnt$$Register);
++    if (tpc == NULL) {
++      ciEnv::current()->record_failure("CodeCache is full");
++      return;
++    }
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct replicateF(vReg dst, fRegF src) %{
-+  match(Set dst (ReplicateF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.v.f  $dst, $src\t#@replicateF" %}
++instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
++%{
++  predicate(!UseRVV && (uint64_t)n->in(2)->get_long()
++            < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
++  match(Set dummy (ClearArray cnt base));
++  effect(USE_KILL base, KILL cr);
++
++  ins_cost(4 * DEFAULT_COST);
++  format %{ "ClearArray $cnt, $base\t#@clearArray_imm_reg" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
++    __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct replicateD(vReg dst, fRegD src) %{
-+  match(Set dst (ReplicateD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.v.f  $dst, $src\t#@replicateD" %}
++instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
++                        iRegI_R10 result, rFlagsReg cr)
++%{
++  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
++
++  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
++    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
++    __ string_equals($str1$$Register, $str2$$Register,
++                     $result$$Register, $cnt$$Register, 1);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+// vector shift
++instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
++                        iRegI_R10 result, rFlagsReg cr)
++%{
++  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
 +
-+instruct vasrB(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t"
-+            "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
++  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               BitsPerByte - 1, Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
++    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
++    __ string_equals($str1$$Register, $str2$$Register,
++                     $result$$Register, $cnt$$Register, 2);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct vasrS(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t"
-+            "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
++instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
++                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
++                       iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
++%{
++  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (AryEq ary1 ary2));
++  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
++
++  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp5" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               BitsPerShort - 1, Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
++    __ arrays_equals($ary1$$Register, $ary2$$Register,
++                     $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
++                     $result$$Register, $tmp5$$Register, 1);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct vasrI(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %}
++instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
++                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
++                       iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
++%{
++  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (AryEq ary1 ary2));
++  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
++
++  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp5" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
++    __ arrays_equals($ary1$$Register, $ary2$$Register,
++                     $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
++                     $result$$Register, $tmp5$$Register, 2);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct vasrL(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %}
++// ============================================================================
++// Safepoint Instructions
++
++instruct safePoint(iRegP poll)
++%{
++  match(SafePoint poll);
++
++  ins_cost(2 * LOAD_COST);
++  format %{
++    "lwu zr, [$poll]\t# Safepoint: poll for GC, #@safePoint"
++  %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+         as_VectorRegister($shift$$reg));
++    __ read_polling_page(as_Register($poll$$reg), 0, relocInfo::poll_type);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct vlslB(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  effect( TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
++// ============================================================================
++// This name is KNOWN by the ADLC and cannot be changed.
++// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
++// for this guy.
++instruct tlsLoadP(javaThread_RegP dst)
++%{
++  match(Set dst (ThreadLocal));
++
++  ins_cost(0);
++
++  format %{ " -- \t// $dst=Thread::current(), empty, #@tlsLoadP" %}
++
++  size(0);
++
++  ins_encode( /*empty*/ );
++
++  ins_pipe(pipe_class_empty);
++%}
++
++// inlined locking and unlocking
++// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
++instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2)
++%{
++  match(Set cr (FastLock object box));
++  effect(TEMP tmp1, TEMP tmp2);
++
++  ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3);
++  format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2, #@cmpFastLock" %}
++
++  ins_encode(riscv_enc_fast_lock(object, box, tmp1, tmp2));
++
++  ins_pipe(pipe_serial);
++%}
++
++// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
++instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2)
++%{
++  match(Set cr (FastUnlock object box));
++  effect(TEMP tmp1, TEMP tmp2);
++
++  ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4);
++  format %{ "fastunlock $object,$box\t! kills $tmp1, $tmp2, #@cmpFastUnlock" %}
++
++  ins_encode(riscv_enc_fast_unlock(object, box, tmp1, tmp2));
++
++  ins_pipe(pipe_serial);
++%}
++
++// Tail Call; Jump from runtime stub to Java code.
++// Also known as an 'interprocedural jump'.
++// Target of jump will eventually return to caller.
++// TailJump below removes the return address.
++instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
++%{
++  match(TailCall jump_target method_oop);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %}
++
++  ins_encode(riscv_enc_tail_call(jump_target));
++
++  ins_pipe(pipe_class_call);
++%}
++
++instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop)
++%{
++  match(TailJump jump_target ex_oop);
++
++  ins_cost(ALU_COST + BRANCH_COST);
++
++  format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %}
++
++  ins_encode(riscv_enc_tail_jmp(jump_target));
++
++  ins_pipe(pipe_class_call);
++%}
++
++// Create exception oop: created by stack-crawling runtime code.
++// Created exception is now available to this handler, and is setup
++// just prior to jumping to this handler. No code emitted.
++instruct CreateException(iRegP_R10 ex_oop)
++%{
++  match(Set ex_oop (CreateEx));
++
++  ins_cost(0);
++  format %{ " -- \t// exception oop; no code emitted, #@CreateException" %}
++
++  size(0);
++
++  ins_encode( /*empty*/ );
++
++  ins_pipe(pipe_class_empty);
++%}
++
++// Rethrow exception: The exception oop will come in the first
++// argument position. Then JUMP (not call) to the rethrow stub code.
++instruct RethrowException()
++%{
++  match(Rethrow);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "j rethrow_stub\t#@RethrowException" %}
++
++  ins_encode(riscv_enc_rethrow());
++
++  ins_pipe(pipe_class_call);
++%}
++
++// Return Instruction
++// epilog node loads ret address into ra as part of frame pop
++instruct Ret()
++%{
++  match(Return);
++
++  ins_cost(BRANCH_COST);
++  format %{ "ret\t// return register, #@Ret" %}
++
++  ins_encode(riscv_enc_ret());
++
++  ins_pipe(pipe_branch);
++%}
++
++// Die now.
++instruct ShouldNotReachHere() %{
++  match(Halt);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "#@ShouldNotReachHere" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    // if shift > BitsPerByte - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
++    Assembler::CompressibleRegion cr(&_masm);
++    if (is_reachable()) {
++      __ halt();
++    }
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_class_default);
 +%}
 +
-+instruct vlslS(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
++
++//----------PEEPHOLE RULES-----------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++//
++// peepmatch ( root_instr_name [preceding_instruction]* );
++//
++// peepconstraint %{
++// (instruction_number.operand_name relational_op instruction_number.operand_name
++//  [, ...] );
++// // instruction numbers are zero-based using left to right order in peepmatch
++//
++// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
++// // provide an instruction_number.operand_name for each operand that appears
++// // in the replacement instruction's match rule
++//
++// ---------VM FLAGS---------------------------------------------------------
++//
++// All peephole optimizations can be turned off using -XX:-OptoPeephole
++//
++// Each peephole rule is given an identifying number starting with zero and
++// increasing by one in the order seen by the parser.  An individual peephole
++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
++// on the command-line.
++//
++// ---------CURRENT LIMITATIONS----------------------------------------------
++//
++// Only match adjacent instructions in same basic block
++// Only equality constraints
++// Only constraints between operands, not (0.dest_reg == RAX_enc)
++// Only one replacement instruction
++//
++//----------SMARTSPILL RULES---------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++
++// Local Variables:
++// mode: c++
++// End:
+diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad
+new file mode 100644
+index 00000000000..4488c1c4031
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/riscv_b.ad
+@@ -0,0 +1,527 @@
++//
++// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// RISCV Bit-Manipulation Extension Architecture Description File
++
++instruct rorI_imm_rvb(iRegINoSp dst, iRegI src, immI shift) %{
++  predicate(UseRVB);
++  match(Set dst (RotateRight src shift));
++
++  format %{ "roriw  $dst, $src, ($shift & 0x1f)\t#@rorI_imm_rvb" %}
++
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    // if shift > BitsPerShort - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
++    __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x1f);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+instruct vlslI(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %}
++instruct rorL_imm_rvb(iRegLNoSp dst, iRegL src, immI shift) %{
++  predicate(UseRVB);
++  match(Set dst (RotateRight src shift));
++
++  format %{ "rori  $dst, $src, ($shift & 0x3f)\t#@rorL_imm_rvb" %}
++
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
++    __ rori(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x3f);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+instruct vlslL(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %}
++instruct rorI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{
++  predicate(UseRVB);
++  match(Set dst (RotateRight src shift));
++
++  format %{ "rorw  $dst, $src, $shift\t#@rorI_reg_rvb" %}
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
++    __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vlsrB(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
++instruct rorL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{
++  predicate(UseRVB);
++  match(Set dst (RotateRight src shift));
++
++  format %{ "ror  $dst, $src, $shift\t#@rorL_reg_rvb" %}
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    // if shift > BitsPerByte - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
++    __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vlsrS(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
++instruct rolI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{
++  predicate(UseRVB);
++  match(Set dst (RotateLeft src shift));
++
++  format %{ "rolw  $dst, $src, $shift\t#@rolI_reg_rvb" %}
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    // if shift > BitsPerShort - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
++    __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
++instruct rolL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{
++  predicate(UseRVB);
++  match(Set dst (RotateLeft src shift));
 +
-+instruct vlsrI(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %}
++  format %{ "rol  $dst, $src, $shift\t#@rolL_reg_rvb" %}
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
++    __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
++// Convert oop into int for vectors alignment masking
++instruct convP2I_rvb(iRegINoSp dst, iRegP src) %{
++  predicate(UseRVB);
++  match(Set dst (ConvL2I (CastP2X src)));
++
++  format %{ "zext.w  $dst, $src\t# ptr -> int @convP2I_rvb" %}
 +
-+instruct vlsrL(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %}
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
++    __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (RShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %}
++// byte to int
++instruct convB2I_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{
++  predicate(UseRVB);
++  match(Set dst (RShiftI (LShiftI src lshift) rshift));
++
++  format %{ "sext.b  $dst, $src\t# b2i, #@convB2I_reg_reg_rvb" %}
++
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con >= BitsPerByte) con = BitsPerByte - 1;
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++    __ sext_b(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (RShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %}
++// int to short
++instruct convI2S_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{
++  predicate(UseRVB);
++  match(Set dst (RShiftI (LShiftI src lshift) rshift));
++
++  format %{ "sext.h  $dst, $src\t# i2s, #@convI2S_reg_reg_rvb" %}
++
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con >= BitsPerShort) con = BitsPerShort - 1;
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++    __ sext_h(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (RShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %}
++// short to unsigned int
++instruct convS2UI_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{
++  predicate(UseRVB);
++  match(Set dst (AndI src mask));
++
++  format %{ "zext.h  $dst, $src\t# s2ui, #@convS2UI_reg_reg_rvb" %}
++
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++    __ zext_h(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vasrL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{
-+  predicate((n->in(2)->get_int() & 0x3f) < 64);
-+  match(Set dst (RShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %}
++// int to unsigned long (zero extend)
++instruct convI2UL_reg_reg_rvb(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{
++  predicate(UseRVB);
++  match(Set dst (AndL (ConvI2L src) mask));
++
++  format %{ "zext.w  $dst, $src\t# i2ul, #@convI2UL_reg_reg_rvb" %}
++
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x3f;
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con < 32) {
-+      __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+    } else {
-+      __ li(t0, con);
-+      __ vmv_v_x(as_VectorRegister($tmp$$reg), t0);
-+      __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg));
-+    }
++    __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (URShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %}
++// BSWAP instructions
++instruct bytes_reverse_int_rvb(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseRVB);
++  match(Set dst (ReverseBytesI src));
++
++  ins_cost(ALU_COST * 2);
++  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int_rvb" %}
++
 +  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con >= BitsPerByte) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++    __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (URShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %}
++instruct bytes_reverse_long_rvb(iRegLNoSp dst, iRegL src) %{
++  predicate(UseRVB);
++  match(Set dst (ReverseBytesL src));
++
++  ins_cost(ALU_COST);
++  format %{ "rev8  $dst, $src\t#@bytes_reverse_long_rvb" %}
++
 +  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con >= BitsPerShort) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++    __ rev8(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (URShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %}
++instruct bytes_reverse_unsigned_short_rvb(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseRVB);
++  match(Set dst (ReverseBytesUS src));
++
++  ins_cost(ALU_COST * 2);
++  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short_rvb" %}
++
 +  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++    __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vlsrL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{
-+  predicate((n->in(2)->get_int() & 0x3f) < 64);
-+  match(Set dst (URShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %}
++instruct bytes_reverse_short_rvb(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseRVB);
++  match(Set dst (ReverseBytesS src));
++
++  ins_cost(ALU_COST * 2);
++  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short_rvb" %}
++
 +  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x3f;
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con < 32) {
-+      __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+    } else {
-+      __ li(t0, con);
-+      __ vmv_v_x(as_VectorRegister($tmp$$reg), t0);
-+      __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg));
-+    }
++    __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (LShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %}
++// Shift Add Pointer
++instruct shaddP_reg_reg_rvb(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{
++  predicate(UseRVB);
++  match(Set dst (AddP src1 (LShiftL src2 imm)));
++
++  ins_cost(ALU_COST);
++  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_rvb" %}
++
 +  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    if (con >= BitsPerByte) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++    __ shadd(as_Register($dst$$reg),
++             as_Register($src2$$reg),
++             as_Register($src1$$reg),
++             t0,
++             $imm$$constant);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (LShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %}
++instruct shaddP_reg_reg_ext_rvb(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{
++  predicate(UseRVB);
++  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm)));
++
++  ins_cost(ALU_COST);
++  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_rvb" %}
++
 +  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    if (con >= BitsPerShort) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++    __ shadd(as_Register($dst$$reg),
++             as_Register($src2$$reg),
++             as_Register($src1$$reg),
++             t0,
++             $imm$$constant);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (LShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %}
++// Shift Add Long
++instruct shaddL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{
++  predicate(UseRVB);
++  match(Set dst (AddL src1 (LShiftL src2 imm)));
++
++  ins_cost(ALU_COST);
++  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_rvb" %}
++
 +  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++    __ shadd(as_Register($dst$$reg),
++             as_Register($src2$$reg),
++             as_Register($src1$$reg),
++             t0,
++             $imm$$constant);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vlslL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{
-+  predicate((n->in(2)->get_int() & 0x3f) < 64);
-+  match(Set dst (LShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %}
++instruct shaddL_reg_reg_ext_rvb(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{
++  predicate(UseRVB);
++  match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm)));
++
++  ins_cost(ALU_COST);
++  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_rvb" %}
++
 +  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x3f;
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    if (con < 32) {
-+      __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+    } else {
-+      __ li(t0, con);
-+      __ vmv_v_x(as_VectorRegister($tmp$$reg), t0);
-+      __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg));
-+    }
++    __ shadd(as_Register($dst$$reg),
++             as_Register($src2$$reg),
++             as_Register($src1$$reg),
++             t0,
++             $imm$$constant);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %}
++// Zeros Count instructions
++instruct countLeadingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseRVB);
++  match(Set dst (CountLeadingZerosI src));
++
++  ins_cost(ALU_COST);
++  format %{ "clzw  $dst, $src\t#@countLeadingZerosI_rvb" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
++    __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
-+            n->bottom_type()->is_vect()->element_basic_type() == T_CHAR);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %}
++instruct countLeadingZerosL_rvb(iRegINoSp dst, iRegL src) %{
++  predicate(UseRVB);
++  match(Set dst (CountLeadingZerosL src));
++
++  ins_cost(ALU_COST);
++  format %{ "clz  $dst, $src\t#@countLeadingZerosL_rvb" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
++    __ clz(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %}
++instruct countTrailingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseRVB);
++  match(Set dst (CountTrailingZerosI src));
++
++  ins_cost(ALU_COST);
++  format %{ "ctzw  $dst, $src\t#@countTrailingZerosI_rvb" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
++    __ ctzw(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %}
++instruct countTrailingZerosL_rvb(iRegINoSp dst, iRegL src) %{
++  predicate(UseRVB);
++  match(Set dst (CountTrailingZerosL src));
++
++  ins_cost(ALU_COST);
++  format %{ "ctz  $dst, $src\t#@countTrailingZerosL_rvb" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
++    __ ctz(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+// vector sqrt
++// Population Count instructions
++instruct popCountI_rvb(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UsePopCountInstruction);
++  match(Set dst (PopCountI src));
++
++  ins_cost(ALU_COST);
++  format %{ "cpopw  $dst, $src\t#@popCountI_rvb" %}
 +
-+instruct vsqrtF(vReg dst, vReg src) %{
-+  match(Set dst (SqrtVF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
++    __ cpopw(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vsqrtD(vReg dst, vReg src) %{
-+  match(Set dst (SqrtVD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %}
++// Note: Long/bitCount(long) returns an int.
++instruct popCountL_rvb(iRegINoSp dst, iRegL src) %{
++  predicate(UsePopCountInstruction);
++  match(Set dst (PopCountL src));
++
++  ins_cost(ALU_COST);
++  format %{ "cpop  $dst, $src\t#@popCountL_rvb" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
++    __ cpop(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+// vector sub
++// Max and Min
++instruct minI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{
++  predicate(UseRVB);
++  match(Set dst (MinI src1 src2));
++
++  ins_cost(ALU_COST);
++  format %{ "min  $dst, $src1, $src2\t#@minI_reg_rvb" %}
 +
-+instruct vsubB(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVB src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vsubS(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVS src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %}
++instruct maxI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{
++  predicate(UseRVB);
++  match(Set dst (MaxI src1 src2));
++
++  ins_cost(ALU_COST);
++  format %{ "max  $dst, $src1, $src2\t#@maxI_reg_rvb" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vsubI(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVI src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++// Abs
++instruct absI_reg_rvb(iRegINoSp dst, iRegI src) %{
++  predicate(UseRVB);
++  match(Set dst (AbsI src));
++
++  ins_cost(ALU_COST * 2);
++  format %{
++    "negw  t0, $src\n\t"
++    "max  $dst, $src, t0\t#@absI_reg_rvb"
 +  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct vsubL(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVL src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    __ negw(t0, as_Register($src$$reg));
++    __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vsubF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
++instruct absL_reg_rvb(iRegLNoSp dst, iRegL src) %{
++  predicate(UseRVB);
++  match(Set dst (AbsL src));
++
++  ins_cost(ALU_COST * 2);
++  format %{
++    "neg  t0, $src\n\t"
++    "max $dst, $src, t0\t#@absL_reg_rvb"
 +  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct vsubD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
++    __ neg(t0, as_Register($src$$reg));
++    __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-+                         iRegI_R10 result, vReg_V1 v1,
-+                         vReg_V2 v2, vReg_V3 v3, rFlagsReg r6)
-+%{
-+  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (StrEquals (Binary str1 str2) cnt));
-+  effect(DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3);
++// And Not
++instruct andnI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
++  predicate(UseRVB);
++  match(Set dst (AndI src1 (XorI src2 m1)));
++
++  ins_cost(ALU_COST);
++  format %{ "andn  $dst, $src1, $src2\t#@andnI_reg_reg_rvb" %}
 +
-+  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
 +  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_equals_v($str1$$Register, $str2$$Register,
-+                       $result$$Register, $cnt$$Register, 1);
++    __ andn(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-+                         iRegI_R10 result, vReg_V1 v1,
-+                         vReg_V2 v2, vReg_V3 v3, rFlagsReg r6)
-+%{
-+  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (StrEquals (Binary str1 str2) cnt));
-+  effect(DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3);
++instruct andnL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
++  predicate(UseRVB);
++  match(Set dst (AndL src1 (XorL src2 m1)));
++
++  ins_cost(ALU_COST);
++  format %{ "andn  $dst, $src1, $src2\t#@andnL_reg_reg_rvb" %}
 +
-+  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
 +  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_equals_v($str1$$Register, $str2$$Register,
-+                       $result$$Register, $cnt$$Register, 2);
++    __ andn(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
-+  ins_pipe(pipe_class_memory);
-+%}
-+
-+instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-+                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg r6)
-+%{
-+  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (AryEq ary1 ary2));
-+  effect(DEF result, KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6);
 +
-+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
-+  ins_encode %{
-+    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
-+                       $result$$Register, $tmp$$Register, 1);
-+    %}
-+  ins_pipe(pipe_class_memory);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-+                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg r6)
-+%{
-+  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (AryEq ary1 ary2));
-+  effect(DEF result, KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6);
++// Or Not
++instruct ornI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
++  predicate(UseRVB);
++  match(Set dst (OrI src1 (XorI src2 m1)));
++
++  ins_cost(ALU_COST);
++  format %{ "orn  $dst, $src1, $src2\t#@ornI_reg_reg_rvb" %}
 +
-+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
 +  ins_encode %{
-+    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
-+                       $result$$Register, $tmp$$Register, 2);
++    __ orn(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                          iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
++instruct ornL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
++  predicate(UseRVB);
++  match(Set dst (OrL src1 (XorL src2 m1)));
++
++  ins_cost(ALU_COST);
++  format %{ "orn  $dst, $src1, $src2\t#@ornL_reg_reg_rvb" %}
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
 +  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::UU);
++    __ orn(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
-+  ins_pipe(pipe_class_memory);
-+%}
-+instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                          iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
++  ins_pipe(ialu_reg_reg);
++%}
+\ No newline at end of file
+diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
+new file mode 100644
+index 00000000000..3828e096b21
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/riscv_v.ad
+@@ -0,0 +1,2065 @@
++//
++// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2020, Arm Limited. All rights reserved.
++// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// RISCV Vector Extension Architecture Description File
++
++opclass vmemA(indirect);
++
++source_hpp %{
++  bool op_vec_supported(int opcode);
++%}
++
++source %{
++
++  static void loadStore(C2_MacroAssembler masm, bool is_store,
++                        VectorRegister reg, BasicType bt, Register base) {
++    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
++    masm.vsetvli(t0, x0, sew);
++    if (is_store) {
++      masm.vsex_v(reg, base, sew);
++    } else {
++      masm.vlex_v(reg, base, sew);
++    }
++  }
++
++  bool op_vec_supported(int opcode) {
++    switch (opcode) {
++      // No multiply reduction instructions
++      case Op_MulReductionVD:
++      case Op_MulReductionVF:
++      case Op_MulReductionVI:
++      case Op_MulReductionVL:
++      // Others
++      case Op_Extract:
++      case Op_ExtractB:
++      case Op_ExtractC:
++      case Op_ExtractD:
++      case Op_ExtractF:
++      case Op_ExtractI:
++      case Op_ExtractL:
++      case Op_ExtractS:
++      case Op_ExtractUB:
++      // Vector API specific
++      case Op_AndReductionV:
++      case Op_OrReductionV:
++      case Op_XorReductionV:
++      case Op_LoadVectorGather:
++      case Op_StoreVectorScatter:
++      case Op_VectorBlend:
++      case Op_VectorCast:
++      case Op_VectorCastB2X:
++      case Op_VectorCastD2X:
++      case Op_VectorCastF2X:
++      case Op_VectorCastI2X:
++      case Op_VectorCastL2X:
++      case Op_VectorCastS2X:
++      case Op_VectorInsert:
++      case Op_VectorLoadConst:
++      case Op_VectorLoadMask:
++      case Op_VectorLoadShuffle:
++      case Op_VectorMaskCmp:
++      case Op_VectorRearrange:
++      case Op_VectorReinterpret:
++      case Op_VectorStoreMask:
++      case Op_VectorTest:
++        return false;
++      default:
++        return UseRVV;
++    }
++  }
++
++%}
++
++definitions %{
++  int_def VEC_COST             (200, 200);
++%}
++
++// All VEC instructions
++
++// vector load/store
++instruct loadV(vReg dst, vmemA mem) %{
++  match(Set dst (LoadVector mem));
++  ins_cost(VEC_COST);
++  format %{ "vle $dst, $mem\t#@loadV" %}
 +  ins_encode %{
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::LL);
++    VectorRegister dst_reg = as_VectorRegister($dst$$reg);
++    loadStore(C2_MacroAssembler(&cbuf), false, dst_reg,
++              Matcher::vector_element_basic_type(this), as_Register($mem$$base));
 +  %}
-+  ins_pipe(pipe_class_memory);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                           iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
++instruct storeV(vReg src, vmemA mem) %{
++  match(Set mem (StoreVector mem src));
++  ins_cost(VEC_COST);
++  format %{ "vse $src, $mem\t#@storeV" %}
++  ins_encode %{
++    VectorRegister src_reg = as_VectorRegister($src$$reg);
++    loadStore(C2_MacroAssembler(&cbuf), true, src_reg,
++              Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
++// vector abs
++
++instruct vabsB(vReg dst, vReg src, vReg tmp) %{
++  match(Set dst (AbsVB src));
++  ins_cost(VEC_COST);
++  effect(TEMP tmp);
++  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t"
++            "vmax.vv $dst, $tmp, $src" %}
 +  ins_encode %{
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::UL);
++    __ vsetvli(t0, x0, Assembler::e8);
++    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
++    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
 +  %}
-+  ins_pipe(pipe_class_memory);
++  ins_pipe(pipe_slow);
 +%}
-+instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                           iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
++instruct vabsS(vReg dst, vReg src, vReg tmp) %{
++  match(Set dst (AbsVS src));
++  ins_cost(VEC_COST);
++  effect(TEMP tmp);
++  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t"
++            "vmax.vv $dst, $tmp, $src" %}
 +  ins_encode %{
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::LU);
++    __ vsetvli(t0, x0, Assembler::e16);
++    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
++    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
 +  %}
-+  ins_pipe(pipe_class_memory);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// fast byte[] to char[] inflation
-+instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len,
-+                         vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set dummy (StrInflatedCopy src (Binary dst len)));
-+  effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len);
++instruct vabsI(vReg dst, vReg src, vReg tmp) %{
++  match(Set dst (AbsVI src));
++  ins_cost(VEC_COST);
++  effect(TEMP tmp);
++  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t"
++            "vmax.vv $dst, $tmp, $src" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
++    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  format %{ "String Inflate $src,$dst" %}
++instruct vabsL(vReg dst, vReg src, vReg tmp) %{
++  match(Set dst (AbsVL src));
++  ins_cost(VEC_COST);
++  effect(TEMP tmp);
++  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t"
++            "vmax.vv $dst, $tmp, $src" %}
 +  ins_encode %{
-+    address tpc = __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register);
-+    if (tpc == NULL) {
-+      ciEnv::current()->record_failure("CodeCache is full");
-+      return;
-+    }
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
++    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
 +  %}
-+  ins_pipe(pipe_class_memory);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// encode char[] to byte[] in ISO_8859_1
-+instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
-+                           vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set result (EncodeISOArray src (Binary dst len)));
-+  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
++instruct vabsF(vReg dst, vReg src) %{
++  match(Set dst (AbsVF src));
++  ins_cost(VEC_COST);
++  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  format %{ "Encode array $src,$dst,$len -> $result" %}
++instruct vabsD(vReg dst, vReg src) %{
++  match(Set dst (AbsVD src));
++  ins_cost(VEC_COST);
++  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %}
 +  ins_encode %{
-+    __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register,
-+                          $result$$Register, $tmp$$Register);
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
 +  %}
-+  ins_pipe( pipe_class_memory );
++  ins_pipe(pipe_slow);
 +%}
 +
-+// fast char[] to byte[] compression
-+instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
-+                          vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set result (StrCompressedCopy src (Binary dst len)));
-+  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
++// vector add
 +
-+  format %{ "String Compress $src,$dst -> $result    // KILL R11, R12, R13" %}
++instruct vaddB(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (AddVB src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %}
 +  ins_encode %{
-+    __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register,
-+                             $result$$Register, $tmp$$Register);
++    __ vsetvli(t0, x0, Assembler::e8);
++    __ vadd_vv(as_VectorRegister($dst$$reg),
++               as_VectorRegister($src1$$reg),
++               as_VectorRegister($src2$$reg));
 +  %}
-+  ins_pipe( pipe_slow );
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct vhas_negatives(iRegP_R11 ary1, iRegI_R12 len, iRegI_R10 result, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set result (HasNegatives ary1 len));
-+  effect(USE_KILL ary1, USE_KILL len, TEMP tmp);
-+  format %{ "has negatives byte[] $ary1,$len -> $result" %}
++instruct vaddS(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (AddVS src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %}
 +  ins_encode %{
-+    address tpc = __ has_negatives_v($ary1$$Register, $len$$Register, $result$$Register, $tmp$$Register);
-+    if (tpc == NULL) {
-+      ciEnv::current()->record_failure("CodeCache is full");
-+      return;
-+    }
++    __ vsetvli(t0, x0, Assembler::e16);
++    __ vadd_vv(as_VectorRegister($dst$$reg),
++               as_VectorRegister($src1$$reg),
++               as_VectorRegister($src2$$reg));
 +  %}
-+  ins_pipe( pipe_slow );
++  ins_pipe(pipe_slow);
 +%}
 +
-+// clearing of an array
-+instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
-+                             vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3)
-+%{
-+  predicate(UseRVV);
-+  match(Set dummy (ClearArray cnt base));
-+  effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3);
++instruct vaddI(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (AddVI src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vadd_vv(as_VectorRegister($dst$$reg),
++               as_VectorRegister($src1$$reg),
++               as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
++instruct vaddL(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (AddVL src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vadd_vv(as_VectorRegister($dst$$reg),
++               as_VectorRegister($src1$$reg),
++               as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
++instruct vaddF(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (AddVF src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %}
 +  ins_encode %{
-+    __ clear_array_v($base$$Register, $cnt$$Register);
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vfadd_vv(as_VectorRegister($dst$$reg),
++                as_VectorRegister($src1$$reg),
++                as_VectorRegister($src2$$reg));
 +  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  ins_pipe(pipe_class_memory);
++instruct vaddD(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (AddVD src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vfadd_vv(as_VectorRegister($dst$$reg),
++                as_VectorRegister($src1$$reg),
++                as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
 +%}
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-new file mode 100644
-index 000000000..9922ff4cf
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -0,0 +1,2738 @@
-+/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "code/debugInfoRec.hpp"
-+#include "code/icBuffer.hpp"
-+#include "code/vtableStubs.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "logging/log.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "oops/compiledICHolder.hpp"
-+#include "runtime/safepointMechanism.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/vframeArray.hpp"
-+#include "utilities/align.hpp"
-+#include "vmreg_riscv.inline.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_Runtime1.hpp"
-+#endif
-+#ifdef COMPILER2
-+#include "adfiles/ad_riscv.hpp"
-+#include "opto/runtime.hpp"
-+#endif
++// vector and
 +
-+#define __ masm->
++instruct vand(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (AndV src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vand.vv  $dst, $src1, $src2\t#@vand" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vand_vv(as_VectorRegister($dst$$reg),
++               as_VectorRegister($src1$$reg),
++               as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
++// vector or
 +
-+class SimpleRuntimeFrame {
-+public:
++instruct vor(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (OrV src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vor.vv  $dst, $src1, $src2\t#@vor" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vor_vv(as_VectorRegister($dst$$reg),
++              as_VectorRegister($src1$$reg),
++              as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Most of the runtime stubs have this simple frame layout.
-+  // This class exists to make the layout shared in one place.
-+  // Offsets are for compiler stack slots, which are jints.
-+  enum layout {
-+    // The frame sender code expects that fp will be in the "natural" place and
-+    // will override any oopMap setting for it. We must therefore force the layout
-+    // so that it agrees with the frame sender code.
-+    // we don't expect any arg reg save area so riscv asserts that
-+    // frame::arg_reg_save_area_bytes == 0
-+    fp_off = 0, fp_off2,
-+    return_off, return_off2,
-+    framesize
-+  };
-+};
++// vector xor
 +
-+class RegisterSaver {
-+  const bool _save_vectors;
-+ public:
-+  RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {}
-+  ~RegisterSaver() {}
-+  OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
-+  void restore_live_registers(MacroAssembler* masm);
++instruct vxor(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (XorV src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vxor.vv  $dst, $src1, $src2\t#@vxor" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vxor_vv(as_VectorRegister($dst$$reg),
++               as_VectorRegister($src1$$reg),
++               as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Offsets into the register save area
-+  // Used by deoptimization when it is managing result register
-+  // values on its own
-+  // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4)
-+  // |---v0---|<---SP
-+  // |---v1---|save vectors only in generate_handler_blob
-+  // |-- .. --|
-+  // |---v31--|-----
-+  // |---f0---|
-+  // |---f1---|
-+  // |   ..   |
-+  // |---f31--|
-+  // |---reserved slot for stack alignment---|
-+  // |---x5---|
-+  // |   x6   |
-+  // |---.. --|
-+  // |---x31--|
-+  // |---fp---|
-+  // |---ra---|
-+  int v0_offset_in_bytes(void) { return 0; }
-+  int f0_offset_in_bytes(void) {
-+    int f0_offset = 0;
-+#ifdef COMPILER2
-+    if (_save_vectors) {
-+      f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers *
-+                   BytesPerInt;
-+    }
-+#endif
-+    return f0_offset;
-+  }
-+  int reserved_slot_offset_in_bytes(void) {
-+    return f0_offset_in_bytes() +
-+           FloatRegisterImpl::max_slots_per_register *
-+           FloatRegisterImpl::number_of_registers *
-+           BytesPerInt;
-+  }
++// vector float div
 +
-+  int reg_offset_in_bytes(Register r) {
-+    assert(r->encoding() > 4, "ra, sp, gp and tp not saved");
-+    return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize;
-+  }
++instruct vdivF(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (DivVF src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivF" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vfdiv_vv(as_VectorRegister($dst$$reg),
++                as_VectorRegister($src1$$reg),
++                as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  int freg_offset_in_bytes(FloatRegister f) {
-+    return f0_offset_in_bytes() + f->encoding() * wordSize;
-+  }
++instruct vdivD(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (DivVD src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivD" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vfdiv_vv(as_VectorRegister($dst$$reg),
++                as_VectorRegister($src1$$reg),
++                as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  int ra_offset_in_bytes(void) {
-+    return reserved_slot_offset_in_bytes() +
-+           (RegisterImpl::number_of_registers - 3) *
-+           RegisterImpl::max_slots_per_register *
-+           BytesPerInt;
-+  }
++// vector integer max/min
 +
-+  // During deoptimization only the result registers need to be restored,
-+  // all the other values have already been extracted.
-+  void restore_result_registers(MacroAssembler* masm);
-+};
++instruct vmax(vReg dst, vReg src1, vReg src2) %{
++  predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
++            n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
++  match(Set dst (MaxV src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vmax.vv $dst, $src1, $src2\t#@vmax" %}
++  ins_encode %{
++    BasicType bt = Matcher::vector_element_basic_type(this);
++    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
++    __ vsetvli(t0, x0, sew);
++    __ vmax_vv(as_VectorRegister($dst$$reg),
++               as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
-+  int vector_size_in_bytes = 0;
-+  int vector_size_in_slots = 0;
-+#ifdef COMPILER2
-+  if (_save_vectors) {
-+    vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE);
-+    vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT);
-+  }
-+#endif
-+
-+  int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16);
-+  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
-+  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
-+  // The caller will allocate additional_frame_words
-+  int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
-+  // CodeBlob frame size is in words.
-+  int frame_size_in_words = frame_size_in_bytes / wordSize;
-+  *total_frame_words = frame_size_in_words;
-+
-+  // Save Integer, Float and Vector registers.
-+  __ enter();
-+  __ push_CPU_state(_save_vectors, vector_size_in_bytes);
++instruct vmin(vReg dst, vReg src1, vReg src2) %{
++  predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
++            n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
++  match(Set dst (MinV src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vmin.vv $dst, $src1, $src2\t#@vmin" %}
++  ins_encode %{
++    BasicType bt = Matcher::vector_element_basic_type(this);
++    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
++    __ vsetvli(t0, x0, sew);
++    __ vmin_vv(as_VectorRegister($dst$$reg),
++               as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Set an oopmap for the call site.  This oopmap will map all
-+  // oop-registers and debug-info registers as callee-saved.  This
-+  // will allow deoptimization at this safepoint to find all possible
-+  // debug-info recordings, as well as let GC find all oops.
++// vector float-point max/min
 +
-+  OopMapSet *oop_maps = new OopMapSet();
-+  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
-+  assert_cond(oop_maps != NULL && oop_map != NULL);
++instruct vmaxF(vReg dst, vReg src1, vReg src2) %{
++  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MaxV src1 src2));
++  effect(TEMP_DEF dst);
++  ins_cost(VEC_COST);
++  format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %}
++  ins_encode %{
++    __ minmax_FD_v(as_VectorRegister($dst$$reg),
++                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
++                   false /* is_double */, false /* is_min */);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  int sp_offset_in_slots = 0;
-+  int step_in_slots = 0;
-+  if (_save_vectors) {
-+    step_in_slots = vector_size_in_slots;
-+    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
-+      VectorRegister r = as_VectorRegister(i);
-+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
-+    }
-+  }
++instruct vmaxD(vReg dst, vReg src1, vReg src2) %{
++  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MaxV src1 src2));
++  effect(TEMP_DEF dst);
++  ins_cost(VEC_COST);
++  format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %}
++  ins_encode %{
++    __ minmax_FD_v(as_VectorRegister($dst$$reg),
++                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
++                   true /* is_double */, false /* is_min */);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  step_in_slots = FloatRegisterImpl::max_slots_per_register;
-+  for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
-+    FloatRegister r = as_FloatRegister(i);
-+    oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
-+  }
++instruct vminF(vReg dst, vReg src1, vReg src2) %{
++  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MinV src1 src2));
++  effect(TEMP_DEF dst);
++  ins_cost(VEC_COST);
++  format %{ "vminF $dst, $src1, $src2\t#@vminF" %}
++  ins_encode %{
++    __ minmax_FD_v(as_VectorRegister($dst$$reg),
++                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
++                   false /* is_double */, true /* is_min */);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  step_in_slots = RegisterImpl::max_slots_per_register;
-+  // skip the slot reserved for alignment, see MacroAssembler::push_reg;
-+  // also skip x5 ~ x6 on the stack because they are caller-saved registers.
-+  sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3;
-+  // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack.
-+  for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
-+    Register r = as_Register(i);
-+    if (r != xthread) {
-+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg());
-+    }
-+  }
++instruct vminD(vReg dst, vReg src1, vReg src2) %{
++  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MinV src1 src2));
++  effect(TEMP_DEF dst);
++  ins_cost(VEC_COST);
++  format %{ "vminD $dst, $src1, $src2\t#@vminD" %}
++  ins_encode %{
++    __ minmax_FD_v(as_VectorRegister($dst$$reg),
++                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
++                   true /* is_double */, true /* is_min */);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  return oop_map;
-+}
++// vector fmla
 +
-+void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
-+#ifdef COMPILER2
-+  __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE));
-+#else
-+  __ pop_CPU_state(_save_vectors);
-+#endif
-+  __ leave();
-+}
++// dst_src1 = dst_src1 + src2 * src3
++instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
++  predicate(UseFMA);
++  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
++  ins_cost(VEC_COST);
++  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
++                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
-+  // Just restore result register. Only used by deoptimization. By
-+  // now any callee save register that needs to be restored to a c2
-+  // caller of the deoptee has been extracted into the vframeArray
-+  // and will be stuffed into the c2i adapter we create for later
-+  // restoration so only result registers need to be restored here.
-+  // Restore fp result register
-+  __ fld(f10, Address(sp, freg_offset_in_bytes(f10)));
-+  // Restore integer result register
-+  __ ld(x10, Address(sp, reg_offset_in_bytes(x10)));
++// dst_src1 = dst_src1 + src2 * src3
++instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
++  predicate(UseFMA);
++  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
++  ins_cost(VEC_COST);
++  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
++                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Pop all of the register save are off the stack
-+  __ add(sp, sp, align_up(ra_offset_in_bytes(), 16));
-+}
++// vector fmls
 +
-+// Is vector's size (in bytes) bigger than a size saved by default?
-+// riscv does not ovlerlay the floating-point registers on vector registers like aarch64.
-+bool SharedRuntime::is_wide_vector(int size) {
-+  return UseRVV;
-+}
++// dst_src1 = dst_src1 + -src2 * src3
++// dst_src1 = dst_src1 + src2 * -src3
++instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
++  predicate(UseFMA);
++  match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
++  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
++  ins_cost(VEC_COST);
++  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
++                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+size_t SharedRuntime::trampoline_size() {
-+  // Byte size of function generate_trampoline. movptr_with_offset: 5 instructions, jalr: 1 instrction
-+  return 6 * NativeInstruction::instruction_size; // lui + addi + slli + addi + slli + jalr
-+}
++// dst_src1 = dst_src1 + -src2 * src3
++// dst_src1 = dst_src1 + src2 * -src3
++instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
++  predicate(UseFMA);
++  match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
++  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
++  ins_cost(VEC_COST);
++  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
++                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
-+  int32_t offset = 0;
-+  __ movptr_with_offset(t0, destination, offset); // lui + addi + slli + addi + slli
-+  __ jalr(x0, t0, offset);
-+}
++// vector fnmla
 +
-+// ---------------------------------------------------------------------------
-+// Read the array of BasicTypes from a signature, and compute where the
-+// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
-+// quantities.  Values less than VMRegImpl::stack0 are registers, those above
-+// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
-+// as framesizes are fixed.
-+// VMRegImpl::stack0 refers to the first slot 0(sp).
-+// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
-+// up to RegisterImpl::number_of_registers) are the 64-bit
-+// integer registers.
++// dst_src1 = -dst_src1 + -src2 * src3
++// dst_src1 = -dst_src1 + src2 * -src3
++instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
++  predicate(UseFMA);
++  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
++  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
++  ins_cost(VEC_COST);
++  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
++                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+// Note: the INPUTS in sig_bt are in units of Java argument words,
-+// which are 64-bit.  The OUTPUTS are in 32-bit units.
++// dst_src1 = -dst_src1 + -src2 * src3
++// dst_src1 = -dst_src1 + src2 * -src3
++instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
++  predicate(UseFMA);
++  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
++  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
++  ins_cost(VEC_COST);
++  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
++                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+// The Java calling convention is a "shifted" version of the C ABI.
-+// By skipping the first C ABI register we can call non-static jni
-+// methods with small numbers of arguments without having to shuffle
-+// the arguments at all. Since we control the java ABI we ought to at
-+// least get some advantage out of it.
++// vector fnmls
 +
-+int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
-+                                           VMRegPair *regs,
-+                                           int total_args_passed,
-+                                           int is_outgoing) {
-+  assert_cond(sig_bt != NULL && regs != NULL);
-+  // Create the mapping between argument positions and
-+  // registers.
-+  static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
-+    j_rarg0, j_rarg1, j_rarg2, j_rarg3,
-+    j_rarg4, j_rarg5, j_rarg6, j_rarg7
-+  };
-+  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
-+    j_farg0, j_farg1, j_farg2, j_farg3,
-+    j_farg4, j_farg5, j_farg6, j_farg7
-+  };
++// dst_src1 = -dst_src1 + src2 * src3
++instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
++  predicate(UseFMA);
++  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
++  ins_cost(VEC_COST);
++  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
++                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  uint int_args = 0;
-+  uint fp_args = 0;
-+  uint stk_args = 0; // inc by 2 each time
++// dst_src1 = -dst_src1 + src2 * src3
++instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
++  predicate(UseFMA);
++  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
++  ins_cost(VEC_COST);
++  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
++                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  for (int i = 0; i < total_args_passed; i++) {
-+    switch (sig_bt[i]) {
-+      case T_BOOLEAN: // fall through
-+      case T_CHAR:    // fall through
-+      case T_BYTE:    // fall through
-+      case T_SHORT:   // fall through
-+      case T_INT:
-+        if (int_args < Argument::n_int_register_parameters_j) {
-+          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_VOID:
-+        // halves of T_LONG or T_DOUBLE
-+        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
-+        regs[i].set_bad();
-+        break;
-+      case T_LONG:      // fall through
-+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
-+      case T_OBJECT:    // fall through
-+      case T_ARRAY:     // fall through
-+      case T_ADDRESS:
-+        if (int_args < Argument::n_int_register_parameters_j) {
-+          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_FLOAT:
-+        if (fp_args < Argument::n_float_register_parameters_j) {
-+          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
-+        } else {
-+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_DOUBLE:
-+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
-+        if (fp_args < Argument::n_float_register_parameters_j) {
-+          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
-+        } else {
-+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      default:
-+        ShouldNotReachHere();
-+    }
-+  }
++// vector mla
 +
-+  return align_up(stk_args, 2);
-+}
++// dst_src1 = dst_src1 + src2 * src3
++instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{
++  match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
++  ins_cost(VEC_COST);
++  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e8);
++    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
++                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+// Patch the callers callsite with entry to compiled code if it exists.
-+static void patch_callers_callsite(MacroAssembler *masm) {
-+  Label L;
-+  __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
-+  __ beqz(t0, L);
++// dst_src1 = dst_src1 + src2 * src3
++instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{
++  match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
++  ins_cost(VEC_COST);
++  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e16);
++    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
++                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  __ enter();
-+  __ push_CPU_state();
++// dst_src1 = dst_src1 + src2 * src3
++instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{
++  match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
++  ins_cost(VEC_COST);
++  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
++                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // VM needs caller's callsite
-+  // VM needs target method
-+  // This needs to be a long call since we will relocate this adapter to
-+  // the codeBuffer and it may not reach
++// dst_src1 = dst_src1 + src2 * src3
++instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{
++  match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
++  ins_cost(VEC_COST);
++  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
++                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+#ifndef PRODUCT
-+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
-+#endif
++// vector mls
 +
-+  __ mv(c_rarg0, xmethod);
-+  __ mv(c_rarg1, ra);
-+  int32_t offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset);
-+  __ jalr(x1, t0, offset);
-+  __ pop_CPU_state();
-+  // restore sp
-+  __ leave();
-+  __ bind(L);
-+}
++// dst_src1 = dst_src1 - src2 * src3
++instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{
++  match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
++  ins_cost(VEC_COST);
++  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e8);
++    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
++                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+static void gen_c2i_adapter(MacroAssembler *masm,
-+                            int total_args_passed,
-+                            int comp_args_on_stack,
-+                            const BasicType *sig_bt,
-+                            const VMRegPair *regs,
-+                            Label& skip_fixup) {
-+  // Before we get into the guts of the C2I adapter, see if we should be here
-+  // at all.  We've come from compiled code and are attempting to jump to the
-+  // interpreter, which means the caller made a static call to get here
-+  // (vcalls always get a compiled target if there is one).  Check for a
-+  // compiled target.  If there is one, we need to patch the caller's call.
-+  patch_callers_callsite(masm);
++// dst_src1 = dst_src1 - src2 * src3
++instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{
++  match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
++  ins_cost(VEC_COST);
++  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e16);
++    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
++                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  __ bind(skip_fixup);
++// dst_src1 = dst_src1 - src2 * src3
++instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{
++  match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
++  ins_cost(VEC_COST);
++  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
++                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  int words_pushed = 0;
++// dst_src1 = dst_src1 - src2 * src3
++instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{
++  match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
++  ins_cost(VEC_COST);
++  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
++                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Since all args are passed on the stack, total_args_passed *
-+  // Interpreter::stackElementSize is the space we need.
++// vector mul
 +
-+  int extraspace = total_args_passed * Interpreter::stackElementSize;
++instruct vmulB(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (MulVB src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e8);
++    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
++               as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  __ mv(x30, sp);
++instruct vmulS(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (MulVS src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e16);
++    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
++               as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // stack is aligned, keep it that way
-+  extraspace = align_up(extraspace, 2 * wordSize);
++instruct vmulI(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (MulVI src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
++               as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  if (extraspace) {
-+    __ sub(sp, sp, extraspace);
-+  }
++instruct vmulL(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (MulVL src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
++               as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Now write the args into the outgoing interpreter space
-+  for (int i = 0; i < total_args_passed; i++) {
-+    if (sig_bt[i] == T_VOID) {
-+      assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
-+      continue;
-+    }
++instruct vmulF(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (MulVF src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
++                as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+    // offset to start parameters
-+    int st_off   = (total_args_passed - i - 1) * Interpreter::stackElementSize;
-+    int next_off = st_off - Interpreter::stackElementSize;
-+
-+    // Say 4 args:
-+    // i   st_off
-+    // 0   32 T_LONG
-+    // 1   24 T_VOID
-+    // 2   16 T_OBJECT
-+    // 3    8 T_BOOL
-+    // -    0 return address
-+    //
-+    // However to make thing extra confusing. Because we can fit a Java long/double in
-+    // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
-+    // leaves one slot empty and only stores to a single slot. In this case the
-+    // slot that is occupied is the T_VOID slot. See I said it was confusing.
++instruct vmulD(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (MulVD src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
++                as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+    VMReg r_1 = regs[i].first();
-+    VMReg r_2 = regs[i].second();
-+    if (!r_1->is_valid()) {
-+      assert(!r_2->is_valid(), "");
-+      continue;
-+    }
-+    if (r_1->is_stack()) {
-+      // memory to memory use t0
-+      int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
-+                    + extraspace
-+                    + words_pushed * wordSize);
-+      if (!r_2->is_valid()) {
-+        __ lwu(t0, Address(sp, ld_off));
-+        __ sd(t0, Address(sp, st_off), /*temp register*/esp);
-+      } else {
-+        __ ld(t0, Address(sp, ld_off), /*temp register*/esp);
++// vector fneg
 +
-+        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
-+        // T_DOUBLE and T_LONG use two slots in the interpreter
-+        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
-+          // ld_off == LSW, ld_off+wordSize == MSW
-+          // st_off == MSW, next_off == LSW
-+          __ sd(t0, Address(sp, next_off), /*temp register*/esp);
-+#ifdef ASSERT
-+          // Overwrite the unused slot with known junk
-+          __ mv(t0, 0xdeadffffdeadaaaaul);
-+          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
-+#endif /* ASSERT */
-+        } else {
-+          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
-+        }
-+      }
-+    } else if (r_1->is_Register()) {
-+      Register r = r_1->as_Register();
-+      if (!r_2->is_valid()) {
-+        // must be only an int (or less ) so move only 32bits to slot
-+        __ sd(r, Address(sp, st_off));
-+      } else {
-+        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
-+        // T_DOUBLE and T_LONG use two slots in the interpreter
-+        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
-+          // jlong/double in gpr
-+#ifdef ASSERT
-+          // Overwrite the unused slot with known junk
-+          __ mv(t0, 0xdeadffffdeadaaabul);
-+          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
-+#endif /* ASSERT */
-+          __ sd(r, Address(sp, next_off));
-+        } else {
-+          __ sd(r, Address(sp, st_off));
-+        }
-+      }
-+    } else {
-+      assert(r_1->is_FloatRegister(), "");
-+      if (!r_2->is_valid()) {
-+        // only a float use just part of the slot
-+        __ fsw(r_1->as_FloatRegister(), Address(sp, st_off));
-+      } else {
-+#ifdef ASSERT
-+        // Overwrite the unused slot with known junk
-+        __ mv(t0, 0xdeadffffdeadaaacul);
-+        __ sd(t0, Address(sp, st_off), /*temp register*/esp);
-+#endif /* ASSERT */
-+        __ fsd(r_1->as_FloatRegister(), Address(sp, next_off));
-+      }
-+    }
-+  }
++instruct vnegF(vReg dst, vReg src) %{
++  match(Set dst (NegVF src));
++  ins_cost(VEC_COST);
++  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  __ mv(esp, sp); // Interp expects args on caller's expression stack
++instruct vnegD(vReg dst, vReg src) %{
++  match(Set dst (NegVD src));
++  ins_cost(VEC_COST);
++  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset())));
-+  __ jr(t0);
-+}
++// popcount vector
 +
-+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
-+                                    int total_args_passed,
-+                                    int comp_args_on_stack,
-+                                    const BasicType *sig_bt,
-+                                    const VMRegPair *regs) {
-+  // Cut-out for having no stack args.
-+  int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord;
-+  if (comp_args_on_stack != 0) {
-+    __ sub(t0, sp, comp_words_on_stack * wordSize);
-+    __ andi(sp, t0, -16);
-+  }
++instruct vpopcountI(iRegINoSp dst, vReg src) %{
++  match(Set dst (PopCountVI src));
++  format %{ "vpopc.m $dst, $src\t#@vpopcountI" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Will jump to the compiled code just as if compiled code was doing it.
-+  // Pre-load the register-jump target early, to schedule it better.
-+  __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset())));
++// vector add reduction
 +
-+  // Now generate the shuffle code.
-+  for (int i = 0; i < total_args_passed; i++) {
-+    if (sig_bt[i] == T_VOID) {
-+      assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
-+      continue;
-+    }
++instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (AddReductionVI src1 src2));
++  effect(TEMP tmp);
++  ins_cost(VEC_COST);
++  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t"
++            "vredsum.vs $tmp, $src2, $tmp\n\t"
++            "vmv.x.s  $dst, $tmp" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e8);
++    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
++    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
++                  as_VectorRegister($tmp$$reg));
++    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+    // Pick up 0, 1 or 2 words from SP+offset.
++instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (AddReductionVI src1 src2));
++  effect(TEMP tmp);
++  ins_cost(VEC_COST);
++  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t"
++            "vredsum.vs $tmp, $src2, $tmp\n\t"
++            "vmv.x.s  $dst, $tmp" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e16);
++    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
++    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
++                  as_VectorRegister($tmp$$reg));
++    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
-+           "scrambled load targets?");
-+    // Load in argument order going down.
-+    int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
-+    // Point to interpreter value (vs. tag)
-+    int next_off = ld_off - Interpreter::stackElementSize;
++instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (AddReductionVI src1 src2));
++  effect(TEMP tmp);
++  ins_cost(VEC_COST);
++  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t"
++            "vredsum.vs $tmp, $src2, $tmp\n\t"
++            "vmv.x.s  $dst, $tmp" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
++    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
++                  as_VectorRegister($tmp$$reg));
++    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+    VMReg r_1 = regs[i].first();
-+    VMReg r_2 = regs[i].second();
-+    if (!r_1->is_valid()) {
-+      assert(!r_2->is_valid(), "");
-+      continue;
-+    }
-+    if (r_1->is_stack()) {
-+      // Convert stack slot to an SP offset (+ wordSize to account for return address )
-+      int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size;
-+      if (!r_2->is_valid()) {
-+        __ lw(t0, Address(esp, ld_off));
-+        __ sd(t0, Address(sp, st_off), /*temp register*/t2);
-+      } else {
-+        //
-+        // We are using two optoregs. This can be either T_OBJECT,
-+        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
-+        // two slots but only uses one for thr T_LONG or T_DOUBLE case
-+        // So we must adjust where to pick up the data to match the
-+        // interpreter.
-+        //
-+        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
-+        // are accessed as negative so LSW is at LOW address
++instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (AddReductionVL src1 src2));
++  effect(TEMP tmp);
++  ins_cost(VEC_COST);
++  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t"
++            "vredsum.vs $tmp, $src2, $tmp\n\t"
++            "vmv.x.s  $dst, $tmp" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
++    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
++                  as_VectorRegister($tmp$$reg));
++    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+        // ld_off is MSW so get LSW
-+        const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
-+                           next_off : ld_off;
-+        __ ld(t0, Address(esp, offset));
-+        // st_off is LSW (i.e. reg.first())
-+        __ sd(t0, Address(sp, st_off), /*temp register*/t2);
-+      }
-+    } else if (r_1->is_Register()) {  // Register argument
-+      Register r = r_1->as_Register();
-+      if (r_2->is_valid()) {
-+        //
-+        // We are using two VMRegs. This can be either T_OBJECT,
-+        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
-+        // two slots but only uses one for thr T_LONG or T_DOUBLE case
-+        // So we must adjust where to pick up the data to match the
-+        // interpreter.
++instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{
++  match(Set src1_dst (AddReductionVF src1_dst src2));
++  effect(TEMP tmp);
++  ins_cost(VEC_COST);
++  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t"
++            "vfredosum.vs $tmp, $src2, $tmp\n\t"
++            "vfmv.f.s $src1_dst, $tmp" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
++    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
++                    as_VectorRegister($tmp$$reg));
++    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+        const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
-+                           next_off : ld_off;
++instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{
++  match(Set src1_dst (AddReductionVD src1_dst src2));
++  effect(TEMP tmp);
++  ins_cost(VEC_COST);
++  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t"
++            "vfredosum.vs $tmp, $src2, $tmp\n\t"
++            "vfmv.f.s $src1_dst, $tmp" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
++    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
++                    as_VectorRegister($tmp$$reg));
++    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+        // this can be a misaligned move
-+        __ ld(r, Address(esp, offset));
-+      } else {
-+        // sign extend and use a full word?
-+        __ lw(r, Address(esp, ld_off));
-+      }
-+    } else {
-+      if (!r_2->is_valid()) {
-+        __ flw(r_1->as_FloatRegister(), Address(esp, ld_off));
-+      } else {
-+        __ fld(r_1->as_FloatRegister(), Address(esp, next_off));
-+      }
-+    }
-+  }
++// vector integer max reduction
++instruct vreduce_maxB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MaxReductionV src1 src2));
++  ins_cost(VEC_COST);
++  effect(TEMP tmp);
++  format %{ "vreduce_maxB $dst, $src1, $src2, $tmp" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e8);
++    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
++    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++    Label Ldone;
++    __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
++    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
++    __ bind(Ldone);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // 6243940 We might end up in handle_wrong_method if
-+  // the callee is deoptimized as we race thru here. If that
-+  // happens we don't want to take a safepoint because the
-+  // caller frame will look interpreted and arguments are now
-+  // "compiled" so it is much better to make this transition
-+  // invisible to the stack walking code. Unfortunately if
-+  // we try and find the callee by normal means a safepoint
-+  // is possible. So we stash the desired callee in the thread
-+  // and the vm will find there should this case occur.
++instruct vreduce_maxS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MaxReductionV src1 src2));
++  ins_cost(VEC_COST);
++  effect(TEMP tmp);
++  format %{ "vreduce_maxS $dst, $src1, $src2, $tmp" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e16);
++    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
++    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++    Label Ldone;
++    __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
++    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
++    __ bind(Ldone);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset()));
++instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MaxReductionV src1 src2));
++  ins_cost(VEC_COST);
++  effect(TEMP tmp);
++  format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
++    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
++    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  __ jr(t1);
-+}
++instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MaxReductionV src1 src2));
++  ins_cost(VEC_COST);
++  effect(TEMP tmp);
++  format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
++    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
++    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+// ---------------------------------------------------------------
-+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
-+                                                            int total_args_passed,
-+                                                            int comp_args_on_stack,
-+                                                            const BasicType *sig_bt,
-+                                                            const VMRegPair *regs,
-+                                                            AdapterFingerPrint* fingerprint) {
-+  address i2c_entry = __ pc();
-+  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
++// vector integer min reduction
++instruct vreduce_minB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MinReductionV src1 src2));
++  ins_cost(VEC_COST);
++  effect(TEMP tmp);
++  format %{ "vreduce_minB $dst, $src1, $src2, $tmp" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e8);
++    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
++    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++    Label Ldone;
++    __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
++    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
++    __ bind(Ldone);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  address c2i_unverified_entry = __ pc();
-+  Label skip_fixup;
++instruct vreduce_minS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MinReductionV src1 src2));
++  ins_cost(VEC_COST);
++  effect(TEMP tmp);
++  format %{ "vreduce_minS $dst, $src1, $src2, $tmp" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e16);
++    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
++    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++    Label Ldone;
++    __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
++    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
++    __ bind(Ldone);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  Label ok;
++instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MinReductionV src1 src2));
++  ins_cost(VEC_COST);
++  effect(TEMP tmp);
++  format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
++    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
++    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  const Register holder = t1;
-+  const Register receiver = j_rarg0;
-+  const Register tmp = t2;  // A call-clobbered register not used for arg passing
++instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MinReductionV src1 src2));
++  ins_cost(VEC_COST);
++  effect(TEMP tmp);
++  format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
++    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
++    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // -------------------------------------------------------------------------
-+  // Generate a C2I adapter.  On entry we know xmethod holds the Method* during calls
-+  // to the interpreter.  The args start out packed in the compiled layout.  They
-+  // need to be unpacked into the interpreter layout.  This will almost always
-+  // require some stack space.  We grow the current (compiled) stack, then repack
-+  // the args.  We  finally end in a jump to the generic interpreter entry point.
-+  // On exit from the interpreter, the interpreter will restore our SP (lest the
-+  // compiled code, which relys solely on SP and not FP, get sick).
++// vector float max reduction
 +
-+  {
-+    __ block_comment("c2i_unverified_entry {");
-+    __ load_klass(t0, receiver);
-+    __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset()));
-+    __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset()));
-+    __ beq(t0, tmp, ok);
-+    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
++instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MaxReductionV src1 src2));
++  ins_cost(VEC_COST);
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %}
++  ins_encode %{
++    __ reduce_minmax_FD_v($dst$$FloatRegister,
++                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
++                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
++                          false /* is_double */, false /* is_min */);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+    __ bind(ok);
-+    // Method might have been compiled since the call site was patched to
-+    // interpreted; if that is the case treat it as a miss so we can get
-+    // the call site corrected.
-+    __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
-+    __ beqz(t0, skip_fixup);
-+    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
-+    __ block_comment("} c2i_unverified_entry");
-+  }
++instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MaxReductionV src1 src2));
++  ins_cost(VEC_COST);
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %}
++  ins_encode %{
++    __ reduce_minmax_FD_v($dst$$FloatRegister,
++                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
++                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
++                          true /* is_double */, false /* is_min */);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  address c2i_entry = __ pc();
++// vector float min reduction
 +
-+  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
++instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MinReductionV src1 src2));
++  ins_cost(VEC_COST);
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %}
++  ins_encode %{
++    __ reduce_minmax_FD_v($dst$$FloatRegister,
++                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
++                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
++                          false /* is_double */, true /* is_min */);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  __ flush();
-+  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
-+}
++instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MinReductionV src1 src2));
++  ins_cost(VEC_COST);
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %}
++  ins_encode %{
++    __ reduce_minmax_FD_v($dst$$FloatRegister,
++                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
++                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
++                          true /* is_double */, true /* is_min */);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
-+                                         VMRegPair *regs,
-+                                         VMRegPair *regs2,
-+                                         int total_args_passed) {
-+  assert(regs2 == NULL, "not needed on riscv");
-+  assert_cond(sig_bt != NULL && regs != NULL);
++// vector Math.rint, floor, ceil
 +
-+  // We return the amount of VMRegImpl stack slots we need to reserve for all
-+  // the arguments NOT counting out_preserve_stack_slots.
++instruct vroundD(vReg dst, vReg src, immI rmode) %{
++  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (RoundDoubleModeV src rmode));
++  format %{ "vroundD $dst, $src, $rmode" %}
++  ins_encode %{
++    switch ($rmode$$constant) {
++      case RoundDoubleModeNode::rmode_rint:
++        __ csrwi(CSR_FRM, C2_MacroAssembler::rne);
++        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
++        break;
++      case RoundDoubleModeNode::rmode_floor:
++        __ csrwi(CSR_FRM, C2_MacroAssembler::rdn);
++        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
++        break;
++      case RoundDoubleModeNode::rmode_ceil:
++        __ csrwi(CSR_FRM, C2_MacroAssembler::rup);
++        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
++        break;
++      default:
++        ShouldNotReachHere();
++        break;
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
-+    c_rarg0, c_rarg1, c_rarg2, c_rarg3,
-+    c_rarg4, c_rarg5,  c_rarg6,  c_rarg7
-+  };
-+  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
-+    c_farg0, c_farg1, c_farg2, c_farg3,
-+    c_farg4, c_farg5, c_farg6, c_farg7
-+  };
++// vector replicate
 +
-+  uint int_args = 0;
-+  uint fp_args = 0;
-+  uint stk_args = 0; // inc by 2 each time
++instruct replicateB(vReg dst, iRegIorL2I src) %{
++  match(Set dst (ReplicateB src));
++  ins_cost(VEC_COST);
++  format %{ "vmv.v.x  $dst, $src\t#@replicateB" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e8);
++    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  for (int i = 0; i < total_args_passed; i++) {
-+    switch (sig_bt[i]) {
-+      case T_BOOLEAN:  // fall through
-+      case T_CHAR:     // fall through
-+      case T_BYTE:     // fall through
-+      case T_SHORT:    // fall through
-+      case T_INT:
-+        if (int_args < Argument::n_int_register_parameters_c) {
-+          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_LONG:      // fall through
-+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
-+      case T_OBJECT:    // fall through
-+      case T_ARRAY:     // fall through
-+      case T_ADDRESS:   // fall through
-+      case T_METADATA:
-+        if (int_args < Argument::n_int_register_parameters_c) {
-+          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_FLOAT:
-+        if (fp_args < Argument::n_float_register_parameters_c) {
-+          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
-+        } else if (int_args < Argument::n_int_register_parameters_c) {
-+          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_DOUBLE:
-+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
-+        if (fp_args < Argument::n_float_register_parameters_c) {
-+          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
-+        } else if (int_args < Argument::n_int_register_parameters_c) {
-+          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_VOID: // Halves of longs and doubles
-+        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
-+        regs[i].set_bad();
-+        break;
-+      default:
-+        ShouldNotReachHere();
-+    }
-+  }
-+
-+  return stk_args;
-+}
-+
-+void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
-+  // We always ignore the frame_slots arg and just use the space just below frame pointer
-+  // which by this time is free to use
-+  switch (ret_type) {
-+    case T_FLOAT:
-+      __ fsw(f10, Address(fp, -3 * wordSize));
-+      break;
-+    case T_DOUBLE:
-+      __ fsd(f10, Address(fp, -3 * wordSize));
-+      break;
-+    case T_VOID:  break;
-+    default: {
-+      __ sd(x10, Address(fp, -3 * wordSize));
-+    }
-+  }
-+}
-+
-+void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
-+  // We always ignore the frame_slots arg and just use the space just below frame pointer
-+  // which by this time is free to use
-+  switch (ret_type) {
-+    case T_FLOAT:
-+      __ flw(f10, Address(fp, -3 * wordSize));
-+      break;
-+    case T_DOUBLE:
-+      __ fld(f10, Address(fp, -3 * wordSize));
-+      break;
-+    case T_VOID:  break;
-+    default: {
-+      __ ld(x10, Address(fp, -3 * wordSize));
-+    }
-+  }
-+}
++instruct replicateS(vReg dst, iRegIorL2I src) %{
++  match(Set dst (ReplicateS src));
++  ins_cost(VEC_COST);
++  format %{ "vmv.v.x  $dst, $src\t#@replicateS" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e16);
++    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
-+  RegSet x;
-+  for ( int i = first_arg ; i < arg_count ; i++ ) {
-+    if (args[i].first()->is_Register()) {
-+      x = x + args[i].first()->as_Register();
-+    } else if (args[i].first()->is_FloatRegister()) {
-+      __ addi(sp, sp, -2 * wordSize);
-+      __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0));
-+    }
-+  }
-+  __ push_reg(x, sp);
-+}
++instruct replicateI(vReg dst, iRegIorL2I src) %{
++  match(Set dst (ReplicateI src));
++  ins_cost(VEC_COST);
++  format %{ "vmv.v.x  $dst, $src\t#@replicateI" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
-+  RegSet x;
-+  for ( int i = first_arg ; i < arg_count ; i++ ) {
-+    if (args[i].first()->is_Register()) {
-+      x = x + args[i].first()->as_Register();
-+    } else {
-+      ;
-+    }
-+  }
-+  __ pop_reg(x, sp);
-+  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
-+    if (args[i].first()->is_Register()) {
-+      ;
-+    } else if (args[i].first()->is_FloatRegister()) {
-+      __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0));
-+      __ add(sp, sp, 2 * wordSize);
-+    }
-+  }
-+}
++instruct replicateL(vReg dst, iRegL src) %{
++  match(Set dst (ReplicateL src));
++  ins_cost(VEC_COST);
++  format %{ "vmv.v.x  $dst, $src\t#@replicateL" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+// Check GCLocker::needs_gc and enter the runtime if it's true.  This
-+// keeps a new JNI critical region from starting until a GC has been
-+// forced.  Save down any oops in registers and describe them in an
-+// OopMap.
-+static void check_needs_gc_for_critical_native(MacroAssembler* masm,
-+                                               int stack_slots,
-+                                               int total_c_args,
-+                                               int total_in_args,
-+                                               int arg_save_area,
-+                                               OopMapSet* oop_maps,
-+                                               VMRegPair* in_regs,
-+                                               BasicType* in_sig_bt) { Unimplemented(); }
-+
-+// Unpack an array argument into a pointer to the body and the length
-+// if the array is non-null, otherwise pass 0 for both.
-+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); }
-+
-+class ComputeMoveOrder: public StackObj {
-+  class MoveOperation: public ResourceObj {
-+    friend class ComputeMoveOrder;
-+   private:
-+    VMRegPair        _src;
-+    VMRegPair        _dst;
-+    int              _src_index;
-+    int              _dst_index;
-+    bool             _processed;
-+    MoveOperation*   _next;
-+    MoveOperation*   _prev;
-+
-+    static int get_id(VMRegPair r) { Unimplemented(); return 0; }
-+
-+   public:
-+    MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst):
-+      _src(src)
-+    , _dst(dst)
-+    , _src_index(src_index)
-+    , _dst_index(dst_index)
-+    , _processed(false)
-+    , _next(NULL)
-+    , _prev(NULL) { Unimplemented(); }
-+
-+    ~MoveOperation() {
-+      _next = NULL;
-+      _prev = NULL;
-+    }
-+
-+    VMRegPair src() const              { Unimplemented(); return _src; }
-+    int src_id() const                 { Unimplemented(); return 0; }
-+    int src_index() const              { Unimplemented(); return 0; }
-+    VMRegPair dst() const              { Unimplemented(); return _src; }
-+    void set_dst(int i, VMRegPair dst) { Unimplemented(); }
-+    int dst_index() const              { Unimplemented(); return 0; }
-+    int dst_id() const                 { Unimplemented(); return 0; }
-+    MoveOperation* next() const        { Unimplemented(); return 0; }
-+    MoveOperation* prev() const        { Unimplemented(); return 0; }
-+    void set_processed()               { Unimplemented(); }
-+    bool is_processed() const          { Unimplemented(); return 0; }
-+
-+    // insert
-+    void break_cycle(VMRegPair temp_register) { Unimplemented(); }
-+
-+    void link(GrowableArray<MoveOperation*>& killer) { Unimplemented(); }
-+  };
++instruct replicateB_imm5(vReg dst, immI5 con) %{
++  match(Set dst (ReplicateB con));
++  ins_cost(VEC_COST);
++  format %{ "vmv.v.i  $dst, $con\t#@replicateB_imm5" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e8);
++    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+ private:
-+  GrowableArray<MoveOperation*> edges;
++instruct replicateS_imm5(vReg dst, immI5 con) %{
++  match(Set dst (ReplicateS con));
++  ins_cost(VEC_COST);
++  format %{ "vmv.v.i  $dst, $con\t#@replicateS_imm5" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e16);
++    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+ public:
-+  ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs,
-+                   BasicType* in_sig_bt, GrowableArray<int>& arg_order, VMRegPair tmp_vmreg) { Unimplemented(); }
++instruct replicateI_imm5(vReg dst, immI5 con) %{
++  match(Set dst (ReplicateI con));
++  ins_cost(VEC_COST);
++  format %{ "vmv.v.i  $dst, $con\t#@replicateI_imm5" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  ~ComputeMoveOrder() {}
-+  // Collected all the move operations
-+  void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { Unimplemented(); }
++instruct replicateL_imm5(vReg dst, immL5 con) %{
++  match(Set dst (ReplicateL con));
++  ins_cost(VEC_COST);
++  format %{ "vmv.v.i  $dst, $con\t#@replicateL_imm5" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Walk the edges breaking cycles between moves.  The result list
-+  // can be walked in order to produce the proper set of loads
-+  GrowableArray<MoveOperation*>* get_store_order(VMRegPair temp_register) { Unimplemented(); return 0; }
-+};
++instruct replicateF(vReg dst, fRegF src) %{
++  match(Set dst (ReplicateF src));
++  ins_cost(VEC_COST);
++  format %{ "vfmv.v.f  $dst, $src\t#@replicateF" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+static void verify_oop_args(MacroAssembler* masm,
-+                            const methodHandle& method,
-+                            const BasicType* sig_bt,
-+                            const VMRegPair* regs) {
-+  const Register temp_reg = x9;  // not part of any compiled calling seq
-+  if (VerifyOops) {
-+    for (int i = 0; i < method->size_of_parameters(); i++) {
-+      if (sig_bt[i] == T_OBJECT ||
-+          sig_bt[i] == T_ARRAY) {
-+        VMReg r = regs[i].first();
-+        assert(r->is_valid(), "bad oop arg");
-+        if (r->is_stack()) {
-+          __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
-+          __ verify_oop(temp_reg);
-+        } else {
-+          __ verify_oop(r->as_Register());
-+        }
-+      }
-+    }
-+  }
-+}
++instruct replicateD(vReg dst, fRegD src) %{
++  match(Set dst (ReplicateD src));
++  ins_cost(VEC_COST);
++  format %{ "vfmv.v.f  $dst, $src\t#@replicateD" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+static void gen_special_dispatch(MacroAssembler* masm,
-+                                 const methodHandle& method,
-+                                 const BasicType* sig_bt,
-+                                 const VMRegPair* regs) {
-+  verify_oop_args(masm, method, sig_bt, regs);
-+  vmIntrinsics::ID iid = method->intrinsic_id();
++// vector shift
 +
-+  // Now write the args into the outgoing interpreter space
-+  bool     has_receiver   = false;
-+  Register receiver_reg   = noreg;
-+  int      member_arg_pos = -1;
-+  Register member_reg     = noreg;
-+  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
-+  if (ref_kind != 0) {
-+    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
-+    member_reg = x9;  // known to be free at this point
-+    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
-+  } else if (iid == vmIntrinsics::_invokeBasic) {
-+    has_receiver = true;
-+  } else {
-+    fatal("unexpected intrinsic id %d", iid);
-+  }
++instruct vasrB(vReg dst, vReg src, vReg shift) %{
++  match(Set dst (RShiftVB src shift));
++  ins_cost(VEC_COST);
++  effect(TEMP_DEF dst);
++  format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t"
++            "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t"
++            "vmnot.m v0, v0\n\t"
++            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e8);
++    // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits
++    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
++    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               BitsPerByte - 1, Assembler::v0_t);
++    // otherwise, shift
++    __ vmnot_m(v0, v0);
++    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               as_VectorRegister($shift$$reg), Assembler::v0_t);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  if (member_reg != noreg) {
-+    // Load the member_arg into register, if necessary.
-+    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
-+    VMReg r = regs[member_arg_pos].first();
-+    if (r->is_stack()) {
-+      __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
-+    } else {
-+      // no data motion is needed
-+      member_reg = r->as_Register();
-+    }
-+  }
++instruct vasrS(vReg dst, vReg src, vReg shift) %{
++  match(Set dst (RShiftVS src shift));
++  ins_cost(VEC_COST);
++  effect(TEMP_DEF dst);
++  format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t"
++            "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t"
++            "vmnot.m v0, v0\n\t"
++            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e16);
++    // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits
++    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
++    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               BitsPerShort - 1, Assembler::v0_t);
++    // otherwise, shift
++    __ vmnot_m(v0, v0);
++    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               as_VectorRegister($shift$$reg), Assembler::v0_t);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  if (has_receiver) {
-+    // Make sure the receiver is loaded into a register.
-+    assert(method->size_of_parameters() > 0, "oob");
-+    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
-+    VMReg r = regs[0].first();
-+    assert(r->is_valid(), "bad receiver arg");
-+    if (r->is_stack()) {
-+      // Porting note:  This assumes that compiled calling conventions always
-+      // pass the receiver oop in a register.  If this is not true on some
-+      // platform, pick a temp and load the receiver from stack.
-+      fatal("receiver always in a register");
-+      receiver_reg = x12;  // known to be free at this point
-+      __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
-+    } else {
-+      // no data motion is needed
-+      receiver_reg = r->as_Register();
-+    }
-+  }
++instruct vasrI(vReg dst, vReg src, vReg shift) %{
++  match(Set dst (RShiftVI src shift));
++  ins_cost(VEC_COST);
++  format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               as_VectorRegister($shift$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Figure out which address we are really jumping to:
-+  MethodHandles::generate_method_handle_dispatch(masm, iid,
-+                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
-+}
++instruct vasrL(vReg dst, vReg src, vReg shift) %{
++  match(Set dst (RShiftVL src shift));
++  ins_cost(VEC_COST);
++  format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++         as_VectorRegister($shift$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+// ---------------------------------------------------------------------------
-+// Generate a native wrapper for a given method.  The method takes arguments
-+// in the Java compiled code convention, marshals them to the native
-+// convention (handlizes oops, etc), transitions to native, makes the call,
-+// returns to java state (possibly blocking), unhandlizes any result and
-+// returns.
-+//
-+// Critical native functions are a shorthand for the use of
-+// GetPrimtiveArrayCritical and disallow the use of any other JNI
-+// functions.  The wrapper is expected to unpack the arguments before
-+// passing them to the callee and perform checks before and after the
-+// native call to ensure that they GCLocker
-+// lock_critical/unlock_critical semantics are followed.  Some other
-+// parts of JNI setup are skipped like the tear down of the JNI handle
-+// block and the check for pending exceptions it's impossible for them
-+// to be thrown.
-+//
-+// They are roughly structured like this:
-+//    if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical()
-+//    tranistion to thread_in_native
-+//    unpack arrray arguments and call native entry point
-+//    check for safepoint in progress
-+//    check if any thread suspend flags are set
-+//      call into JVM and possible unlock the JNI critical
-+//      if a GC was suppressed while in the critical native.
-+//    transition back to thread_in_Java
-+//    return to caller
-+//
-+nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-+                                                const methodHandle& method,
-+                                                int compile_id,
-+                                                BasicType* in_sig_bt,
-+                                                VMRegPair* in_regs,
-+                                                BasicType ret_type,
-+                                                address critical_entry) {
-+  if (method->is_method_handle_intrinsic()) {
-+    vmIntrinsics::ID iid = method->intrinsic_id();
-+    intptr_t start = (intptr_t)__ pc();
-+    int vep_offset = ((intptr_t)__ pc()) - start;
++instruct vlslB(vReg dst, vReg src, vReg shift) %{
++  match(Set dst (LShiftVB src shift));
++  ins_cost(VEC_COST);
++  effect( TEMP_DEF dst);
++  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t"
++            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
++            "vmnot.m v0, v0\n\t"
++            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e8);
++    // if shift > BitsPerByte - 1, clear the element
++    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
++    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               as_VectorRegister($src$$reg), Assembler::v0_t);
++    // otherwise, shift
++    __ vmnot_m(v0, v0);
++    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               as_VectorRegister($shift$$reg), Assembler::v0_t);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+    // First instruction must be a nop as it may need to be patched on deoptimisation
-+    __ nop();
-+    gen_special_dispatch(masm,
-+                         method,
-+                         in_sig_bt,
-+                         in_regs);
-+    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
-+    __ flush();
-+    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
-+    return nmethod::new_native_nmethod(method,
-+                                       compile_id,
-+                                       masm->code(),
-+                                       vep_offset,
-+                                       frame_complete,
-+                                       stack_slots / VMRegImpl::slots_per_word,
-+                                       in_ByteSize(-1),
-+                                       in_ByteSize(-1),
-+                                       (OopMapSet*)NULL);
-+  }
-+  bool is_critical_native = true;
-+  address native_func = critical_entry;
-+  if (native_func == NULL) {
-+    native_func = method->native_function();
-+    is_critical_native = false;
-+  }
-+  assert(native_func != NULL, "must have function");
++instruct vlslS(vReg dst, vReg src, vReg shift) %{
++  match(Set dst (LShiftVS src shift));
++  ins_cost(VEC_COST);
++  effect(TEMP_DEF dst);
++  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t"
++            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
++            "vmnot.m v0, v0\n\t"
++            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e16);
++    // if shift > BitsPerShort - 1, clear the element
++    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
++    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               as_VectorRegister($src$$reg), Assembler::v0_t);
++    // otherwise, shift
++    __ vmnot_m(v0, v0);
++    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               as_VectorRegister($shift$$reg), Assembler::v0_t);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // An OopMap for lock (and class if static)
-+  OopMapSet *oop_maps = new OopMapSet();
-+  assert_cond(oop_maps != NULL);
-+  intptr_t start = (intptr_t)__ pc();
++instruct vlslI(vReg dst, vReg src, vReg shift) %{
++  match(Set dst (LShiftVI src shift));
++  ins_cost(VEC_COST);
++  format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               as_VectorRegister($shift$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // We have received a description of where all the java arg are located
-+  // on entry to the wrapper. We need to convert these args to where
-+  // the jni function will expect them. To figure out where they go
-+  // we convert the java signature to a C signature by inserting
-+  // the hidden arguments as arg[0] and possibly arg[1] (static method)
++instruct vlslL(vReg dst, vReg src, vReg shift) %{
++  match(Set dst (LShiftVL src shift));
++  ins_cost(VEC_COST);
++  format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               as_VectorRegister($shift$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  const int total_in_args = method->size_of_parameters();
-+  int total_c_args = total_in_args;
-+  if (!is_critical_native) {
-+    total_c_args += 1;
-+    if (method->is_static()) {
-+      total_c_args++;
-+    }
-+  } else {
-+    for (int i = 0; i < total_in_args; i++) {
-+      if (in_sig_bt[i] == T_ARRAY) {
-+        total_c_args++;
-+      }
-+    }
-+  }
++instruct vlsrB(vReg dst, vReg src, vReg shift) %{
++  match(Set dst (URShiftVB src shift));
++  ins_cost(VEC_COST);
++  effect(TEMP_DEF dst);
++  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t"
++            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
++            "vmnot.m v0, v0, v0\n\t"
++            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e8);
++    // if shift > BitsPerByte - 1, clear the element
++    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
++    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               as_VectorRegister($src$$reg), Assembler::v0_t);
++    // otherwise, shift
++    __ vmnot_m(v0, v0);
++    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               as_VectorRegister($shift$$reg), Assembler::v0_t);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
-+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
-+  assert_cond(out_sig_bt != NULL && out_regs != NULL);
-+  BasicType* in_elem_bt = NULL;
-+
-+  int argc = 0;
-+  if (!is_critical_native) {
-+    out_sig_bt[argc++] = T_ADDRESS;
-+    if (method->is_static()) {
-+      out_sig_bt[argc++] = T_OBJECT;
-+    }
++instruct vlsrS(vReg dst, vReg src, vReg shift) %{
++  match(Set dst (URShiftVS src shift));
++  ins_cost(VEC_COST);
++  effect(TEMP_DEF dst);
++  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t"
++            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
++            "vmnot.m v0, v0\n\t"
++            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e16);
++    // if shift > BitsPerShort - 1, clear the element
++    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
++    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               as_VectorRegister($src$$reg), Assembler::v0_t);
++    // otherwise, shift
++    __ vmnot_m(v0, v0);
++    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               as_VectorRegister($shift$$reg), Assembler::v0_t);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+    for (int i = 0; i < total_in_args ; i++) {
-+      out_sig_bt[argc++] = in_sig_bt[i];
-+    }
-+  } else {
-+    Thread* THREAD = Thread::current();
-+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
-+    assert_cond(in_elem_bt != NULL);
-+    SignatureStream ss(method->signature());
-+    for (int i = 0; i < total_in_args ; i++) {
-+      if (in_sig_bt[i] == T_ARRAY) {
-+        // Arrays are passed as int, elem* pair
-+        out_sig_bt[argc++] = T_INT;
-+        out_sig_bt[argc++] = T_ADDRESS;
-+        Symbol* atype = ss.as_symbol(CHECK_NULL);
-+        const char* at = atype->as_C_string();
-+        if (strlen(at) == 2) {
-+          assert(at[0] == '[', "must be");
-+          switch (at[1]) {
-+            case 'B': in_elem_bt[i]  = T_BYTE; break;
-+            case 'C': in_elem_bt[i]  = T_CHAR; break;
-+            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
-+            case 'F': in_elem_bt[i]  = T_FLOAT; break;
-+            case 'I': in_elem_bt[i]  = T_INT; break;
-+            case 'J': in_elem_bt[i]  = T_LONG; break;
-+            case 'S': in_elem_bt[i]  = T_SHORT; break;
-+            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
-+            default: ShouldNotReachHere();
-+          }
-+        }
-+      } else {
-+        out_sig_bt[argc++] = in_sig_bt[i];
-+        in_elem_bt[i] = T_VOID;
-+      }
-+      if (in_sig_bt[i] != T_VOID) {
-+        assert(in_sig_bt[i] == ss.type(), "must match");
-+        ss.next();
-+      }
-+    }
-+  }
 +
-+  // Now figure out where the args must be stored and how much stack space
-+  // they require.
-+  int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++instruct vlsrI(vReg dst, vReg src, vReg shift) %{
++  match(Set dst (URShiftVI src shift));
++  ins_cost(VEC_COST);
++  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               as_VectorRegister($shift$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Compute framesize for the wrapper.  We need to handlize all oops in
-+  // incoming registers
 +
-+  // Calculate the total number of stack slots we will need.
++instruct vlsrL(vReg dst, vReg src, vReg shift) %{
++  match(Set dst (URShiftVL src shift));
++  ins_cost(VEC_COST);
++  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++               as_VectorRegister($shift$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // First count the abi requirement plus all of the outgoing args
-+  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
++  match(Set dst (RShiftVB src (RShiftCntV shift)));
++  ins_cost(VEC_COST);
++  format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %}
++  ins_encode %{
++    uint32_t con = (unsigned)$shift$$constant & 0x1f;
++    __ vsetvli(t0, x0, Assembler::e8);
++    if (con == 0) {
++      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++                as_VectorRegister($src$$reg));
++      return;
++    }
++    if (con >= BitsPerByte) con = BitsPerByte - 1;
++    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Now the space for the inbound oop handle area
-+  int total_save_slots = 8 * VMRegImpl::slots_per_word;  // 8 arguments passed in registers
-+  if (is_critical_native) {
-+    // Critical natives may have to call out so they need a save area
-+    // for register arguments.
-+    int double_slots = 0;
-+    int single_slots = 0;
-+    for ( int i = 0; i < total_in_args; i++) {
-+      if (in_regs[i].first()->is_Register()) {
-+        const Register reg = in_regs[i].first()->as_Register();
-+        switch (in_sig_bt[i]) {
-+          case T_BOOLEAN:
-+          case T_BYTE:
-+          case T_SHORT:
-+          case T_CHAR:
-+          case T_INT:  single_slots++; break;
-+          case T_ARRAY:  // specific to LP64 (7145024)
-+          case T_LONG: double_slots++; break;
-+          default:  ShouldNotReachHere();
-+        }
-+      } else if (in_regs[i].first()->is_FloatRegister()) {
-+        ShouldNotReachHere();
-+      }
++instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
++  match(Set dst (RShiftVS src (RShiftCntV shift)));
++  ins_cost(VEC_COST);
++  format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %}
++  ins_encode %{
++    uint32_t con = (unsigned)$shift$$constant & 0x1f;
++    __ vsetvli(t0, x0, Assembler::e16);
++    if (con == 0) {
++      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++                as_VectorRegister($src$$reg));
++      return;
 +    }
-+    total_save_slots = double_slots * 2 + single_slots;
-+    // align the save area
-+    if (double_slots != 0) {
-+      stack_slots = align_up(stack_slots, 2);
++    if (con >= BitsPerShort) con = BitsPerShort - 1;
++    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
++  match(Set dst (RShiftVI src (RShiftCntV shift)));
++  ins_cost(VEC_COST);
++  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %}
++  ins_encode %{
++    uint32_t con = (unsigned)$shift$$constant & 0x1f;
++    __ vsetvli(t0, x0, Assembler::e32);
++    if (con == 0) {
++      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++                as_VectorRegister($src$$reg));
++      return;
 +    }
-+  }
++    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  int oop_handle_offset = stack_slots;
-+  stack_slots += total_save_slots;
++instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
++  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
++  match(Set dst (RShiftVL src (RShiftCntV shift)));
++  ins_cost(VEC_COST);
++  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %}
++  ins_encode %{
++    uint32_t con = (unsigned)$shift$$constant & 0x1f;
++    __ vsetvli(t0, x0, Assembler::e64);
++    if (con == 0) {
++      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++                as_VectorRegister($src$$reg));
++      return;
++    }
++    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Now any space we need for handlizing a klass if static method
++instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
++  match(Set dst (URShiftVB src (RShiftCntV shift)));
++  ins_cost(VEC_COST);
++  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %}
++  ins_encode %{
++    uint32_t con = (unsigned)$shift$$constant & 0x1f;
++    __ vsetvli(t0, x0, Assembler::e8);
++    if (con == 0) {
++      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++                as_VectorRegister($src$$reg));
++      return;
++    }
++    if (con >= BitsPerByte) {
++      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++                 as_VectorRegister($src$$reg));
++      return;
++    }
++    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  int klass_slot_offset = 0;
-+  int klass_offset = -1;
-+  int lock_slot_offset = 0;
-+  bool is_static = false;
++instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
++  match(Set dst (URShiftVS src (RShiftCntV shift)));
++  ins_cost(VEC_COST);
++  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %}
++  ins_encode %{
++    uint32_t con = (unsigned)$shift$$constant & 0x1f;
++    __ vsetvli(t0, x0, Assembler::e16);
++    if (con == 0) {
++      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++                as_VectorRegister($src$$reg));
++      return;
++    }
++    if (con >= BitsPerShort) {
++      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++                 as_VectorRegister($src$$reg));
++      return;
++    }
++    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  if (method->is_static()) {
-+    klass_slot_offset = stack_slots;
-+    stack_slots += VMRegImpl::slots_per_word;
-+    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
-+    is_static = true;
-+  }
++instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
++  match(Set dst (URShiftVI src (RShiftCntV shift)));
++  ins_cost(VEC_COST);
++  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %}
++  ins_encode %{
++    uint32_t con = (unsigned)$shift$$constant & 0x1f;
++    __ vsetvli(t0, x0, Assembler::e32);
++    if (con == 0) {
++      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++                as_VectorRegister($src$$reg));
++      return;
++    }
++    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Plus a lock if needed
++instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
++  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
++  match(Set dst (URShiftVL src (RShiftCntV shift)));
++  ins_cost(VEC_COST);
++  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %}
++  ins_encode %{
++    uint32_t con = (unsigned)$shift$$constant & 0x1f;
++    __ vsetvli(t0, x0, Assembler::e64);
++    if (con == 0) {
++      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++                as_VectorRegister($src$$reg));
++      return;
++    }
++    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  if (method->is_synchronized()) {
-+    lock_slot_offset = stack_slots;
-+    stack_slots += VMRegImpl::slots_per_word;
-+  }
++instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
++  match(Set dst (LShiftVB src (LShiftCntV shift)));
++  ins_cost(VEC_COST);
++  format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %}
++  ins_encode %{
++    uint32_t con = (unsigned)$shift$$constant & 0x1f;
++    __ vsetvli(t0, x0, Assembler::e8);
++    if (con >= BitsPerByte) {
++      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++                 as_VectorRegister($src$$reg));
++      return;
++    }
++    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Now a place (+2) to save return values or temp during shuffling
-+  // + 4 for return address (which we own) and saved fp
-+  stack_slots += 6;
++instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
++  match(Set dst (LShiftVS src (LShiftCntV shift)));
++  ins_cost(VEC_COST);
++  format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %}
++  ins_encode %{
++    uint32_t con = (unsigned)$shift$$constant & 0x1f;
++    __ vsetvli(t0, x0, Assembler::e16);
++    if (con >= BitsPerShort) {
++      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
++                 as_VectorRegister($src$$reg));
++      return;
++    }
++    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Ok The space we have allocated will look like:
-+  //
-+  //
-+  // FP-> |                     |
-+  //      | 2 slots (ra)        |
-+  //      | 2 slots (fp)        |
-+  //      |---------------------|
-+  //      | 2 slots for moves   |
-+  //      |---------------------|
-+  //      | lock box (if sync)  |
-+  //      |---------------------| <- lock_slot_offset
-+  //      | klass (if static)   |
-+  //      |---------------------| <- klass_slot_offset
-+  //      | oopHandle area      |
-+  //      |---------------------| <- oop_handle_offset (8 java arg registers)
-+  //      | outbound memory     |
-+  //      | based arguments     |
-+  //      |                     |
-+  //      |---------------------|
-+  //      |                     |
-+  // SP-> | out_preserved_slots |
-+  //
-+  //
++instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
++  match(Set dst (LShiftVI src (LShiftCntV shift)));
++  ins_cost(VEC_COST);
++  format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %}
++  ins_encode %{
++    uint32_t con = (unsigned)$shift$$constant & 0x1f;
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
++instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
++  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
++  match(Set dst (LShiftVL src (LShiftCntV shift)));
++  ins_cost(VEC_COST);
++  format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %}
++  ins_encode %{
++    uint32_t con = (unsigned)$shift$$constant & 0x1f;
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Now compute actual number of stack words we need rounding to make
-+  // stack properly aligned.
-+  stack_slots = align_up(stack_slots, StackAlignmentInSlots);
++instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
++  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (LShiftCntV cnt));
++  match(Set dst (RShiftCntV cnt));
++  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e8);
++    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
++  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
++            n->bottom_type()->is_vect()->element_basic_type() == T_CHAR);
++  match(Set dst (LShiftCntV cnt));
++  match(Set dst (RShiftCntV cnt));
++  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e16);
++    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // First thing make an ic check to see if we should even be here
++instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
++  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (LShiftCntV cnt));
++  match(Set dst (RShiftCntV cnt));
++  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // We are free to use all registers as temps without saving them and
-+  // restoring them except fp. fp is the only callee save register
-+  // as far as the interpreter and the compiler(s) are concerned.
++instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
++  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (LShiftCntV cnt));
++  match(Set dst (RShiftCntV cnt));
++  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
++// vector sqrt
 +
-+  const Register ic_reg = t1;
-+  const Register receiver = j_rarg0;
++instruct vsqrtF(vReg dst, vReg src) %{
++  match(Set dst (SqrtVF src));
++  ins_cost(VEC_COST);
++  format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  Label hit;
-+  Label exception_pending;
++instruct vsqrtD(vReg dst, vReg src) %{
++  match(Set dst (SqrtVD src));
++  ins_cost(VEC_COST);
++  format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  assert_different_registers(ic_reg, receiver, t0);
-+  __ verify_oop(receiver);
-+  __ cmp_klass(receiver, ic_reg, t0, hit);
++// vector sub
 +
-+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
++instruct vsubB(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (SubVB src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e8);
++    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
++               as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Verified entry point must be aligned
-+  __ align(8);
++instruct vsubS(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (SubVS src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e16);
++    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
++               as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  __ bind(hit);
++instruct vsubI(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (SubVI src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
++               as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  int vep_offset = ((intptr_t)__ pc()) - start;
++instruct vsubL(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (SubVL src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
++               as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // If we have to make this method not-entrant we'll overwrite its
-+  // first instruction with a jump.
-+  __ nop();
++instruct vsubF(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (SubVF src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e32);
++    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
++                as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Generate stack overflow check
-+  if (UseStackBanging) {
-+    __ bang_stack_with_offset(checked_cast<int>(JavaThread::stack_shadow_zone_size()));
-+  } else {
-+    Unimplemented();
-+  }
++instruct vsubD(vReg dst, vReg src1, vReg src2) %{
++  match(Set dst (SubVD src1 src2));
++  ins_cost(VEC_COST);
++  format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %}
++  ins_encode %{
++    __ vsetvli(t0, x0, Assembler::e64);
++    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
++                as_VectorRegister($src2$$reg));
++  %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  // Generate a new frame for the wrapper.
-+  __ enter();
-+  // -2 because return address is already present and so is saved fp
-+  __ sub(sp, sp, stack_size - 2 * wordSize);
++instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
++                         iRegI_R10 result, vReg_V1 v1,
++                         vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
++%{
++  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
 +
-+  // Frame is now completed as far as size and linkage.
-+  int frame_complete = ((intptr_t)__ pc()) - start;
++  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
++  ins_encode %{
++    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
++    __ string_equals_v($str1$$Register, $str2$$Register,
++                       $result$$Register, $cnt$$Register, 1);
++  %}
++  ins_pipe(pipe_class_memory);
++%}
 +
-+  // We use x18 as the oop handle for the receiver/klass
-+  // It is callee save so it survives the call to native
++instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
++                         iRegI_R10 result, vReg_V1 v1,
++                         vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
++%{
++  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
 +
-+  const Register oop_handle_reg = x18;
++  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
++  ins_encode %{
++    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
++    __ string_equals_v($str1$$Register, $str2$$Register,
++                       $result$$Register, $cnt$$Register, 2);
++  %}
++  ins_pipe(pipe_class_memory);
++%}
 +
-+  if (is_critical_native) {
-+    check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
-+                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
-+  }
++instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
++                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
++%{
++  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (AryEq ary1 ary2));
++  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
 +
-+  //
-+  // We immediately shuffle the arguments so that any vm call we have to
-+  // make from here on out (sync slow path, jvmti, etc.) we will have
-+  // captured the oops from our caller and have a valid oopMap for
-+  // them.
++  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
++  ins_encode %{
++    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
++                       $result$$Register, $tmp$$Register, 1);
++    %}
++  ins_pipe(pipe_class_memory);
++%}
 +
-+  // -----------------
-+  // The Grand Shuffle
++instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
++                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
++%{
++  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (AryEq ary1 ary2));
++  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
 +
-+  // The Java calling convention is either equal (linux) or denser (win64) than the
-+  // c calling convention. However the because of the jni_env argument the c calling
-+  // convention always has at least one more (and two for static) arguments than Java.
-+  // Therefore if we move the args from java -> c backwards then we will never have
-+  // a register->register conflict and we don't have to build a dependency graph
-+  // and figure out how to break any cycles.
-+  //
-+
-+  // Record esp-based slot for receiver on stack for non-static methods
-+  int receiver_offset = -1;
-+
-+  // This is a trick. We double the stack slots so we can claim
-+  // the oops in the caller's frame. Since we are sure to have
-+  // more args than the caller doubling is enough to make
-+  // sure we can capture all the incoming oop args from the
-+  // caller.
-+  //
-+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
-+  assert_cond(map != NULL);
++  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
++  ins_encode %{
++    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
++                       $result$$Register, $tmp$$Register, 2);
++  %}
++  ins_pipe(pipe_class_memory);
++%}
 +
-+  int float_args = 0;
-+  int int_args = 0;
++instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
++                          iRegP_R28 tmp1, iRegL_R29 tmp2)
++%{
++  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
++         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
 +
-+#ifdef ASSERT
-+  bool reg_destroyed[RegisterImpl::number_of_registers];
-+  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
-+  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
-+    reg_destroyed[r] = false;
-+  }
-+  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
-+    freg_destroyed[f] = false;
-+  }
++  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
++  ins_encode %{
++    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
++    __ string_compare_v($str1$$Register, $str2$$Register,
++                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                        $tmp1$$Register, $tmp2$$Register,
++                        StrIntrinsicNode::UU);
++  %}
++  ins_pipe(pipe_class_memory);
++%}
++instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
++                          iRegP_R28 tmp1, iRegL_R29 tmp2)
++%{
++  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
++         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
 +
-+#endif /* ASSERT */
++  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
++  ins_encode %{
++    __ string_compare_v($str1$$Register, $str2$$Register,
++                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                        $tmp1$$Register, $tmp2$$Register,
++                        StrIntrinsicNode::LL);
++  %}
++  ins_pipe(pipe_class_memory);
++%}
 +
-+  // This may iterate in two different directions depending on the
-+  // kind of native it is.  The reason is that for regular JNI natives
-+  // the incoming and outgoing registers are offset upwards and for
-+  // critical natives they are offset down.
-+  GrowableArray<int> arg_order(2 * total_in_args);
-+  VMRegPair tmp_vmreg;
-+  tmp_vmreg.set2(x9->as_VMReg());
++instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
++                           iRegP_R28 tmp1, iRegL_R29 tmp2)
++%{
++  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
++         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
 +
-+  if (!is_critical_native) {
-+    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
-+      arg_order.push(i);
-+      arg_order.push(c_arg);
-+    }
-+  } else {
-+    // Compute a valid move order, using tmp_vmreg to break any cycles
-+    ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
-+  }
++  format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
++  ins_encode %{
++    __ string_compare_v($str1$$Register, $str2$$Register,
++                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                        $tmp1$$Register, $tmp2$$Register,
++                        StrIntrinsicNode::UL);
++  %}
++  ins_pipe(pipe_class_memory);
++%}
++instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
++                           iRegP_R28 tmp1, iRegL_R29 tmp2)
++%{
++  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
++  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
++         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
 +
-+  int temploc = -1;
-+  for (int ai = 0; ai < arg_order.length(); ai += 2) {
-+    int i = arg_order.at(ai);
-+    int c_arg = arg_order.at(ai + 1);
-+    __ block_comment(err_msg("mv %d -> %d", i, c_arg));
-+    if (c_arg == -1) {
-+      assert(is_critical_native, "should only be required for critical natives");
-+      // This arg needs to be moved to a temporary
-+      __ mv(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
-+      in_regs[i] = tmp_vmreg;
-+      temploc = i;
-+      continue;
-+    } else if (i == -1) {
-+      assert(is_critical_native, "should only be required for critical natives");
-+      // Read from the temporary location
-+      assert(temploc != -1, "must be valid");
-+      i = temploc;
-+      temploc = -1;
-+    }
-+#ifdef ASSERT
-+    if (in_regs[i].first()->is_Register()) {
-+      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
-+    } else if (in_regs[i].first()->is_FloatRegister()) {
-+      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
-+    }
-+    if (out_regs[c_arg].first()->is_Register()) {
-+      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
-+    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
-+      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
-+    }
-+#endif /* ASSERT */
-+    switch (in_sig_bt[i]) {
-+      case T_ARRAY:
-+        if (is_critical_native) {
-+          unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
-+          c_arg++;
-+#ifdef ASSERT
-+          if (out_regs[c_arg].first()->is_Register()) {
-+            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
-+          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
-+            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
-+          }
-+#endif
-+          int_args++;
-+          break;
-+        }
-+      // no break
-+      case T_OBJECT:
-+        assert(!is_critical_native, "no oop arguments");
-+        __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
-+                      ((i == 0) && (!is_static)),
-+                      &receiver_offset);
-+        int_args++;
-+        break;
-+      case T_VOID:
-+        break;
++  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
++  ins_encode %{
++    __ string_compare_v($str1$$Register, $str2$$Register,
++                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                        $tmp1$$Register, $tmp2$$Register,
++                        StrIntrinsicNode::LU);
++  %}
++  ins_pipe(pipe_class_memory);
++%}
 +
-+      case T_FLOAT:
-+        __ float_move(in_regs[i], out_regs[c_arg]);
-+        float_args++;
-+        break;
++// fast byte[] to char[] inflation
++instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len,
++                         vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
++%{
++  predicate(UseRVV);
++  match(Set dummy (StrInflatedCopy src (Binary dst len)));
++  effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len);
 +
-+      case T_DOUBLE:
-+        assert( i + 1 < total_in_args &&
-+                in_sig_bt[i + 1] == T_VOID &&
-+                out_sig_bt[c_arg + 1] == T_VOID, "bad arg list");
-+        __ double_move(in_regs[i], out_regs[c_arg]);
-+        float_args++;
-+        break;
++  format %{ "String Inflate $src,$dst" %}
++  ins_encode %{
++    __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register);
++  %}
++  ins_pipe(pipe_class_memory);
++%}
 +
-+      case T_LONG :
-+        __ long_move(in_regs[i], out_regs[c_arg]);
-+        int_args++;
-+        break;
++// encode char[] to byte[] in ISO_8859_1
++instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
++                           vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
++%{
++  predicate(UseRVV);
++  match(Set result (EncodeISOArray src (Binary dst len)));
++  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
++         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
 +
-+      case T_ADDRESS:
-+        assert(false, "found T_ADDRESS in java args");
-+        break;
++  format %{ "Encode array $src,$dst,$len -> $result" %}
++  ins_encode %{
++    __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register,
++                          $result$$Register, $tmp$$Register);
++  %}
++  ins_pipe( pipe_class_memory );
++%}
 +
-+      default:
-+        __ move32_64(in_regs[i], out_regs[c_arg]);
-+        int_args++;
-+    }
-+  }
++// fast char[] to byte[] compression
++instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
++                          vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
++%{
++  predicate(UseRVV);
++  match(Set result (StrCompressedCopy src (Binary dst len)));
++  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
++         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
 +
-+  // point c_arg at the first arg that is already loaded in case we
-+  // need to spill before we call out
-+  int c_arg = total_c_args - total_in_args;
++  format %{ "String Compress $src,$dst -> $result    // KILL R11, R12, R13" %}
++  ins_encode %{
++    __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register,
++                             $result$$Register, $tmp$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
 +
-+  // Pre-load a static method's oop into c_rarg1.
-+  if (method->is_static() && !is_critical_native) {
++instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, iRegL tmp)
++%{
++  predicate(UseRVV);
++  match(Set result (CountPositives ary len));
++  effect(USE_KILL ary, USE_KILL len, TEMP tmp);
 +
-+    //  load oop into a register
-+    __ movoop(c_rarg1,
-+              JNIHandles::make_local(method->method_holder()->java_mirror()),
-+              /*immediate*/true);
++  format %{ "count positives byte[] $ary, $len -> $result" %}
++  ins_encode %{
++    __ count_positives_v($ary$$Register, $len$$Register, $result$$Register, $tmp$$Register);
++  %}
 +
-+    // Now handlize the static class mirror it's known not-null.
-+    __ sd(c_rarg1, Address(sp, klass_offset));
-+    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
++  ins_pipe(pipe_slow);
++%}
 +
-+    // Now get the handle
-+    __ la(c_rarg1, Address(sp, klass_offset));
-+    // and protect the arg if we must spill
-+    c_arg--;
-+  }
++instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
++                               iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
++                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
++%{
++  predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
++  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
++  effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
++         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
 +
-+  // Change state to native (we save the return address in the thread, since it might not
-+  // be pushed on the stack when we do a stack traversal).
-+  // We use the same pc/oopMap repeatedly when we call out
++  format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
 +
-+  Label native_return;
-+  __ set_last_Java_frame(sp, noreg, native_return, t0);
++  ins_encode %{
++    __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
++                             $result$$Register, $tmp1$$Register, $tmp2$$Register,
++                             false /* isL */);
++  %}
 +
-+  Label dtrace_method_entry, dtrace_method_entry_done;
-+  {
-+    int32_t offset = 0;
-+    __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
-+    __ lbu(t0, Address(t0, offset));
-+    __ addw(t0, t0, zr);
-+    __ bnez(t0, dtrace_method_entry);
-+    __ bind(dtrace_method_entry_done);
-+  }
++  ins_pipe(pipe_class_memory);
++%}
 +
-+  // RedefineClasses() tracing support for obsolete method entry
-+  if (log_is_enabled(Trace, redefine, class, obsolete)) {
-+    // protect the args we've loaded
-+    save_args(masm, total_c_args, c_arg, out_regs);
-+    __ mov_metadata(c_rarg1, method());
-+    __ call_VM_leaf(
-+      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
-+      xthread, c_rarg1);
-+    restore_args(masm, total_c_args, c_arg, out_regs);
-+  }
++instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
++                               iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
++                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
++%{
++  predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
++  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
++  effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
++         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
 +
-+  // Lock a synchronized method
++  format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
 +
-+  // Register definitions used by locking and unlocking
++  ins_encode %{
++    __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
++                             $result$$Register, $tmp1$$Register, $tmp2$$Register,
++                             true /* isL */);
++  %}
 +
-+  const Register swap_reg = x10;
-+  const Register obj_reg  = x9;  // Will contain the oop
-+  const Register lock_reg = x30;  // Address of compiler lock object (BasicLock)
-+  const Register old_hdr  = x30;  // value of old header at unlock time
-+  const Register tmp      = ra;
++  ins_pipe(pipe_class_memory);
++%}
 +
-+  Label slow_path_lock;
-+  Label lock_done;
++// clearing of an array
++instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
++                             vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3)
++%{
++  predicate(UseRVV);
++  match(Set dummy (ClearArray cnt base));
++  effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3);
 +
-+  if (method->is_synchronized()) {
-+    assert(!is_critical_native, "unhandled");
++  format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
 +
-+    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
++  ins_encode %{
++    __ clear_array_v($base$$Register, $cnt$$Register);
++  %}
 +
-+    // Get the handle (the 2nd argument)
-+    __ mv(oop_handle_reg, c_rarg1);
++  ins_pipe(pipe_class_memory);
++%}
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+new file mode 100644
+index 00000000000..f85d4b25a76
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -0,0 +1,2761 @@
++/*
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    // Get address of the box
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/debugInfoRec.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "compiler/oopMap.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "logging/log.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klass.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/align.hpp"
++#include "utilities/formatBuffer.hpp"
++#include "vmreg_riscv.inline.hpp"
++#ifdef COMPILER1
++#include "c1/c1_Runtime1.hpp"
++#endif
++#ifdef COMPILER2
++#include "adfiles/ad_riscv.hpp"
++#include "opto/runtime.hpp"
++#endif
 +
-+    __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
++#define __ masm->
 +
-+    // Load the oop from the handle
-+    __ ld(obj_reg, Address(oop_handle_reg, 0));
++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
 +
-+    if (UseBiasedLocking) {
-+      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock);
-+    }
++class SimpleRuntimeFrame {
++public:
 +
-+    // Load (object->mark() | 1) into swap_reg % x10
-+    __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-+    __ ori(swap_reg, t0, 1);
++  // Most of the runtime stubs have this simple frame layout.
++  // This class exists to make the layout shared in one place.
++  // Offsets are for compiler stack slots, which are jints.
++  enum layout {
++    // The frame sender code expects that fp will be in the "natural" place and
++    // will override any oopMap setting for it. We must therefore force the layout
++    // so that it agrees with the frame sender code.
++    // we don't expect any arg reg save area so riscv asserts that
++    // frame::arg_reg_save_area_bytes == 0
++    fp_off = 0, fp_off2,
++    return_off, return_off2,
++    framesize
++  };
++};
 +
-+    // Save (object->mark() | 1) into BasicLock's displaced header
-+    __ sd(swap_reg, Address(lock_reg, mark_word_offset));
++class RegisterSaver {
++  const bool _save_vectors;
++ public:
++  RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {}
++  ~RegisterSaver() {}
++  OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
++  void restore_live_registers(MacroAssembler* masm);
 +
-+    // src -> dest if dest == x10 else x10 <- dest
-+    {
-+      Label here;
-+      __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
++  // Offsets into the register save area
++  // Used by deoptimization when it is managing result register
++  // values on its own
++  // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4)
++  // |---v0---|<---SP
++  // |---v1---|save vectors only in generate_handler_blob
++  // |-- .. --|
++  // |---v31--|-----
++  // |---f0---|
++  // |---f1---|
++  // |   ..   |
++  // |---f31--|
++  // |---reserved slot for stack alignment---|
++  // |---x5---|
++  // |   x6   |
++  // |---.. --|
++  // |---x31--|
++  // |---fp---|
++  // |---ra---|
++  int v0_offset_in_bytes(void) { return 0; }
++  int f0_offset_in_bytes(void) {
++    int f0_offset = 0;
++#ifdef COMPILER2
++    if (_save_vectors) {
++      f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers *
++                   BytesPerInt;
 +    }
-+
-+    // Test if the oopMark is an obvious stack pointer, i.e.,
-+    //  1) (mark & 3) == 0, and
-+    //  2) sp <= mark < mark + os::pagesize()
-+    // These 3 tests can be done by evaluating the following
-+    // expression: ((mark - sp) & (3 - os::vm_page_size())),
-+    // assuming both stack pointer and pagesize have their
-+    // least significant 2 bits clear.
-+    // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
-+
-+    __ sub(swap_reg, swap_reg, sp);
-+    __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
-+
-+    // Save the test result, for recursive case, the result is zero
-+    __ sd(swap_reg, Address(lock_reg, mark_word_offset));
-+    __ bnez(swap_reg, slow_path_lock);
-+
-+    // Slow path will re-enter here
-+
-+    __ bind(lock_done);
++#endif
++    return f0_offset;
 +  }
-+
-+
-+  // Finally just about ready to make the JNI call
-+
-+  // get JNIEnv* which is first argument to native
-+  if (!is_critical_native) {
-+    __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset())));
++  int reserved_slot_offset_in_bytes(void) {
++    return f0_offset_in_bytes() +
++           FloatRegisterImpl::max_slots_per_register *
++           FloatRegisterImpl::number_of_registers *
++           BytesPerInt;
 +  }
 +
-+  // Now set thread in native
-+  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
-+  __ mv(t0, _thread_in_native);
-+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+  __ sw(t0, Address(t1));
-+
-+  __ rt_call(native_func);
++  int reg_offset_in_bytes(Register r) {
++    assert (r->encoding() > 4, "ra, sp, gp and tp not saved");
++    return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize;
++  }
 +
-+  __ bind(native_return);
++  int freg_offset_in_bytes(FloatRegister f) {
++    return f0_offset_in_bytes() + f->encoding() * wordSize;
++  }
 +
-+  intptr_t return_pc = (intptr_t) __ pc();
-+  oop_maps->add_gc_map(return_pc - start, map);
++  int ra_offset_in_bytes(void) {
++    return reserved_slot_offset_in_bytes() +
++           (RegisterImpl::number_of_registers - 3) *
++           RegisterImpl::max_slots_per_register *
++           BytesPerInt;
++  }
++};
 +
-+  // Unpack native results.
-+  if(ret_type != T_OBJECT && ret_type != T_ARRAY) {
-+    __ cast_primitive_type(ret_type, x10);
++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
++  int vector_size_in_bytes = 0;
++  int vector_size_in_slots = 0;
++#ifdef COMPILER2
++  if (_save_vectors) {
++    vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE);
++    vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT);
 +  }
++#endif
 +
-+  // Switch thread to "native transition" state before reading the synchronization state.
-+  // This additional state is necessary because reading and testing the synchronization
-+  // state is not atomic w.r.t. GC, as this scenario demonstrates:
-+  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
-+  //     VM thread changes sync state to synchronizing and suspends threads for GC.
-+  //     Thread A is resumed to finish this native method, but doesn't block here since it
-+  //     didn't see any synchronization is progress, and escapes.
-+  __ mv(t0, _thread_in_native_trans);
++  assert_cond(masm != NULL && total_frame_words != NULL);
++  int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16);
++  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
++  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
++  // The caller will allocate additional_frame_words
++  int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
++  // CodeBlob frame size is in words.
++  int frame_size_in_words = frame_size_in_bytes / wordSize;
++  *total_frame_words = frame_size_in_words;
 +
-+  if(os::is_MP()) {
-+    if (UseMembar) {
-+      __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
++  // Save Integer, Float and Vector registers.
++  __ enter();
++  __ push_CPU_state(_save_vectors, vector_size_in_bytes);
 +
-+      // Force this write out before the read below
-+      __ membar(MacroAssembler::AnyAny);
-+    } else {
-+      __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
-+      __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+      __ sw(t0, Address(t1));
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
++  assert_cond(oop_maps != NULL && oop_map != NULL);
 +
-+      // Write serialization page so VM thread can do a pseudo remote membar.
-+      // We use the current thread pointer to calculate a thread specific
-+      // offset to write to within the page. This minimizes bus traffic
-+      // due to cache line collision.
-+      __ serialize_memory(xthread, x12, t0);
++  int sp_offset_in_slots = 0;
++  int step_in_slots = 0;
++  if (_save_vectors) {
++    step_in_slots = vector_size_in_slots;
++    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
++      VectorRegister r = as_VectorRegister(i);
++      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
 +    }
-+  } else {
-+    __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
 +  }
 +
-+  // check for safepoint operation in progress and/or pending suspend requests
-+  Label safepoint_in_progress, safepoint_in_progress_done;
-+  {
-+    __ safepoint_poll_acquire(safepoint_in_progress);
-+    __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
-+    __ bnez(t0, safepoint_in_progress);
-+    __ bind(safepoint_in_progress_done);
++  step_in_slots = FloatRegisterImpl::max_slots_per_register;
++  for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
++    FloatRegister r = as_FloatRegister(i);
++    oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
 +  }
 +
-+  // change thread state
-+  Label after_transition;
-+  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
-+  __ mv(t0, _thread_in_Java);
-+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+  __ sw(t0, Address(t1));
-+  __ bind(after_transition);
-+
-+  Label reguard;
-+  Label reguard_done;
-+  __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));
-+  __ mv(t1, JavaThread::stack_guard_yellow_reserved_disabled);
-+  __ beq(t0, t1, reguard);
-+  __ bind(reguard_done);
-+
-+  // native result if any is live
++  step_in_slots = RegisterImpl::max_slots_per_register;
++  // skip the slot reserved for alignment, see MacroAssembler::push_reg;
++  // also skip x5 ~ x6 on the stack because they are caller-saved registers.
++  sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3;
++  // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack.
++  for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
++    Register r = as_Register(i);
++    if (r != xthread) {
++      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg());
++    }
++  }
 +
-+  // Unlock
-+  Label unlock_done;
-+  Label slow_path_unlock;
-+  if (method->is_synchronized()) {
++  return oop_map;
++}
 +
-+    // Get locked oop from the handle we passed to jni
-+    __ ld(obj_reg, Address(oop_handle_reg, 0));
++void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
++  assert_cond(masm != NULL);
++#ifdef COMPILER2
++  __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE));
++#else
++  __ pop_CPU_state(_save_vectors);
++#endif
++  __ leave();
++}
 +
-+    Label done;
++// Is vector's size (in bytes) bigger than a size saved by default?
++// riscv does not ovlerlay the floating-point registers on vector registers like aarch64.
++bool SharedRuntime::is_wide_vector(int size) {
++  return UseRVV;
++}
 +
-+    if (UseBiasedLocking) {
-+      __ biased_locking_exit(obj_reg, old_hdr, done);
-+    }
++// The java_calling_convention describes stack locations as ideal slots on
++// a frame with no abi restrictions. Since we must observe abi restrictions
++// (like the placement of the register window) the slots must be biased by
++// the following value.
++static int reg2offset_in(VMReg r) {
++  // Account for saved fp and ra
++  // This should really be in_preserve_stack_slots
++  return r->reg2stack() * VMRegImpl::stack_slot_size;
++}
 +
-+    // Simple recursive lock?
++static int reg2offset_out(VMReg r) {
++  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++}
 +
-+    __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-+    __ beqz(t0, done);
++// ---------------------------------------------------------------------------
++// Read the array of BasicTypes from a signature, and compute where the
++// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
++// quantities.  Values less than VMRegImpl::stack0 are registers, those above
++// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
++// as framesizes are fixed.
++// VMRegImpl::stack0 refers to the first slot 0(sp).
++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
++// up to RegisterImpl::number_of_registers) are the 64-bit
++// integer registers.
 +
-+    // Must save x10 if if it is live now because cmpxchg must use it
-+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
-+      save_native_result(masm, ret_type, stack_slots);
-+    }
++// Note: the INPUTS in sig_bt are in units of Java argument words,
++// which are 64-bit.  The OUTPUTS are in 32-bit units.
 +
++// The Java calling convention is a "shifted" version of the C ABI.
++// By skipping the first C ABI register we can call non-static jni
++// methods with small numbers of arguments without having to shuffle
++// the arguments at all. Since we control the java ABI we ought to at
++// least get some advantage out of it.
 +
-+    // get address of the stack lock
-+    __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-+    //  get old displaced header
-+    __ ld(old_hdr, Address(x10, 0));
++int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
++                                           VMRegPair *regs,
++                                           int total_args_passed) {
++  // Create the mapping between argument positions and
++  // registers.
++  static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
++    j_rarg0, j_rarg1, j_rarg2, j_rarg3,
++    j_rarg4, j_rarg5, j_rarg6, j_rarg7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
++    j_farg0, j_farg1, j_farg2, j_farg3,
++    j_farg4, j_farg5, j_farg6, j_farg7
++  };
 +
-+    // Atomic swap old header if oop still contains the stack lock
-+    Label succeed;
-+    __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
-+    __ bind(succeed);
++  uint int_args = 0;
++  uint fp_args = 0;
++  uint stk_args = 0; // inc by 2 each time
 +
-+    // slow path re-enters here
-+    __ bind(unlock_done);
-+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
-+      restore_native_result(masm, ret_type, stack_slots);
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++      case T_BOOLEAN: // fall through
++      case T_CHAR:    // fall through
++      case T_BYTE:    // fall through
++      case T_SHORT:   // fall through
++      case T_INT:
++        if (int_args < Argument::n_int_register_parameters_j) {
++          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set1(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_VOID:
++        // halves of T_LONG or T_DOUBLE
++        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++        regs[i].set_bad();
++        break;
++      case T_LONG:      // fall through
++        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++      case T_OBJECT:    // fall through
++      case T_ARRAY:     // fall through
++      case T_ADDRESS:
++        if (int_args < Argument::n_int_register_parameters_j) {
++          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set2(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_FLOAT:
++        if (fp_args < Argument::n_float_register_parameters_j) {
++          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
++        } else {
++          regs[i].set1(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_DOUBLE:
++        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++        if (fp_args < Argument::n_float_register_parameters_j) {
++          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
++        } else {
++          regs[i].set2(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      default:
++        ShouldNotReachHere();
 +    }
-+
-+    __ bind(done);
-+  }
-+
-+  Label dtrace_method_exit, dtrace_method_exit_done;
-+  {
-+    int32_t offset = 0;
-+    __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
-+    __ lbu(t0, Address(t0, offset));
-+    __ bnez(t0, dtrace_method_exit);
-+    __ bind(dtrace_method_exit_done);
 +  }
 +
-+  __ reset_last_Java_frame(false);
++  return align_up(stk_args, 2);
++}
 +
-+  // Unbox oop result, e.g. JNIHandles::resolve result.
-+  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
-+    __ resolve_jobject(x10, xthread, t1);
-+  }
++// Patch the callers callsite with entry to compiled code if it exists.
++static void patch_callers_callsite(MacroAssembler *masm) {
++  assert_cond(masm != NULL);
++  Label L;
++  __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
++  __ beqz(t0, L);
 +
-+  if (CheckJNICalls) {
-+    // clear_pending_jni_exception_check
-+    __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
-+  }
++  __ enter();
++  __ push_CPU_state();
 +
-+  if (!is_critical_native) {
-+    // reset handle block
-+    __ ld(x12, Address(xthread, JavaThread::active_handles_offset()));
-+    __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes()));
-+  }
++  // VM needs caller's callsite
++  // VM needs target method
++  // This needs to be a long call since we will relocate this adapter to
++  // the codeBuffer and it may not reach
 +
-+  __ leave();
++#ifndef PRODUCT
++  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
 +
-+  if (!is_critical_native) {
-+    // Any exception pending?
-+    __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+    __ bnez(t0, exception_pending);
-+  }
++  __ mv(c_rarg0, xmethod);
++  __ mv(c_rarg1, ra);
++  int32_t offset = 0;
++  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset);
++  __ jalr(x1, t0, offset);
 +
-+  // We're done
-+  __ ret();
++  // Explicit fence.i required because fixup_callers_callsite may change the code
++  // stream.
++  __ safepoint_ifence();
 +
-+  // Unexpected paths are out of line and go here
++  __ pop_CPU_state();
++  // restore sp
++  __ leave();
++  __ bind(L);
++}
 +
-+  if (!is_critical_native) {
-+    // forward the exception
-+    __ bind(exception_pending);
++static void gen_c2i_adapter(MacroAssembler *masm,
++                            int total_args_passed,
++                            int comp_args_on_stack,
++                            const BasicType *sig_bt,
++                            const VMRegPair *regs,
++                            Label& skip_fixup) {
++  // Before we get into the guts of the C2I adapter, see if we should be here
++  // at all.  We've come from compiled code and are attempting to jump to the
++  // interpreter, which means the caller made a static call to get here
++  // (vcalls always get a compiled target if there is one).  Check for a
++  // compiled target.  If there is one, we need to patch the caller's call.
++  patch_callers_callsite(masm);
 +
-+    // and forward the exception
-+    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
-+  }
++  __ bind(skip_fixup);
 +
-+  // Slow path locking & unlocking
-+  if (method->is_synchronized()) {
++  int words_pushed = 0;
 +
-+    __ block_comment("Slow path lock {");
-+    __ bind(slow_path_lock);
++  // Since all args are passed on the stack, total_args_passed *
++  // Interpreter::stackElementSize is the space we need.
 +
-+    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
-+    // args are (oop obj, BasicLock* lock, JavaThread* thread)
++  int extraspace = total_args_passed * Interpreter::stackElementSize;
 +
-+    // protect the args we've loaded
-+    save_args(masm, total_c_args, c_arg, out_regs);
++  __ mv(x30, sp);
 +
-+    __ mv(c_rarg0, obj_reg);
-+    __ mv(c_rarg1, lock_reg);
-+    __ mv(c_rarg2, xthread);
++  // stack is aligned, keep it that way
++  extraspace = align_up(extraspace, 2 * wordSize);
 +
-+    // Not a leaf but we have last_Java_frame setup as we want
-+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
-+    restore_args(masm, total_c_args, c_arg, out_regs);
++  if (extraspace) {
++    __ sub(sp, sp, extraspace);
++  }
 +
-+#ifdef ASSERT
-+    { Label L;
-+      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+      __ beqz(t0, L);
-+      __ stop("no pending exception allowed on exit from monitorenter");
-+      __ bind(L);
++  // Now write the args into the outgoing interpreter space
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
++      continue;
 +    }
-+#endif
-+    __ j(lock_done);
 +
-+    __ block_comment("} Slow path lock");
++    // offset to start parameters
++    int st_off   = (total_args_passed - i - 1) * Interpreter::stackElementSize;
++    int next_off = st_off - Interpreter::stackElementSize;
 +
-+    __ block_comment("Slow path unlock {");
-+    __ bind(slow_path_unlock);
++    // Say 4 args:
++    // i   st_off
++    // 0   32 T_LONG
++    // 1   24 T_VOID
++    // 2   16 T_OBJECT
++    // 3    8 T_BOOL
++    // -    0 return address
++    //
++    // However to make thing extra confusing. Because we can fit a Java long/double in
++    // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
++    // leaves one slot empty and only stores to a single slot. In this case the
++    // slot that is occupied is the T_VOID slot. See I said it was confusing.
 +
-+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
-+      save_native_result(masm, ret_type, stack_slots);
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
 +    }
++    if (r_1->is_stack()) {
++      // memory to memory use t0
++      int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
++                    + extraspace
++                    + words_pushed * wordSize);
++      if (!r_2->is_valid()) {
++        __ lwu(t0, Address(sp, ld_off));
++        __ sd(t0, Address(sp, st_off), /*temp register*/esp);
++      } else {
++        __ ld(t0, Address(sp, ld_off), /*temp register*/esp);
 +
-+    __ mv(c_rarg2, xthread);
-+    __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-+    __ mv(c_rarg0, obj_reg);
++        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
++        // T_DOUBLE and T_LONG use two slots in the interpreter
++        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
++          // ld_off == LSW, ld_off+wordSize == MSW
++          // st_off == MSW, next_off == LSW
++          __ sd(t0, Address(sp, next_off), /*temp register*/esp);
++#ifdef ASSERT
++          // Overwrite the unused slot with known junk
++          __ li(t0, 0xdeadffffdeadaaaaul);
++          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
++#endif /* ASSERT */
++        } else {
++          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
++        }
++      }
++    } else if (r_1->is_Register()) {
++      Register r = r_1->as_Register();
++      if (!r_2->is_valid()) {
++        // must be only an int (or less ) so move only 32bits to slot
++        __ sd(r, Address(sp, st_off));
++      } else {
++        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
++        // T_DOUBLE and T_LONG use two slots in the interpreter
++        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
++          // long/double in gpr
++#ifdef ASSERT
++          // Overwrite the unused slot with known junk
++          __ li(t0, 0xdeadffffdeadaaabul);
++          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
++#endif /* ASSERT */
++          __ sd(r, Address(sp, next_off));
++        } else {
++          __ sd(r, Address(sp, st_off));
++        }
++      }
++    } else {
++      assert(r_1->is_FloatRegister(), "");
++      if (!r_2->is_valid()) {
++        // only a float use just part of the slot
++        __ fsw(r_1->as_FloatRegister(), Address(sp, st_off));
++      } else {
++#ifdef ASSERT
++        // Overwrite the unused slot with known junk
++        __ li(t0, 0xdeadffffdeadaaacul);
++        __ sd(t0, Address(sp, st_off), /*temp register*/esp);
++#endif /* ASSERT */
++        __ fsd(r_1->as_FloatRegister(), Address(sp, next_off));
++      }
++    }
++  }
 +
-+    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
-+    // NOTE that obj_reg == x9 currently
-+    __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+    __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++  __ mv(esp, sp); // Interp expects args on caller's expression stack
 +
-+    __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
++  __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset())));
++  __ jr(t0);
++}
 +
-+#ifdef ASSERT
-+    {
-+      Label L;
-+      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+      __ beqz(t0, L);
-+      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
-+      __ bind(L);
-+    }
-+#endif /* ASSERT */
++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
++                                    int total_args_passed,
++                                    int comp_args_on_stack,
++                                    const BasicType *sig_bt,
++                                    const VMRegPair *regs) {
++  // Cut-out for having no stack args.
++  int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord;
++  if (comp_args_on_stack != 0) {
++    __ sub(t0, sp, comp_words_on_stack * wordSize);
++    __ andi(sp, t0, -16);
++  }
 +
-+    __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++  // Will jump to the compiled code just as if compiled code was doing it.
++  // Pre-load the register-jump target early, to schedule it better.
++  __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset())));
 +
-+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
-+      restore_native_result(masm, ret_type, stack_slots);
++  // Now generate the shuffle code.
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
++      continue;
 +    }
-+    __ j(unlock_done);
 +
-+    __ block_comment("} Slow path unlock");
++    // Pick up 0, 1 or 2 words from SP+offset.
 +
-+  } // synchronized
++    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
++           "scrambled load targets?");
++    // Load in argument order going down.
++    int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
++    // Point to interpreter value (vs. tag)
++    int next_off = ld_off - Interpreter::stackElementSize;
 +
-+  // SLOW PATH Reguard the stack if needed
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // Convert stack slot to an SP offset (+ wordSize to account for return address )
++      int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size;
++      if (!r_2->is_valid()) {
++        __ lw(t0, Address(esp, ld_off));
++        __ sd(t0, Address(sp, st_off), /*temp register*/t2);
++      } else {
++        //
++        // We are using two optoregs. This can be either T_OBJECT,
++        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
++        // two slots but only uses one for thr T_LONG or T_DOUBLE case
++        // So we must adjust where to pick up the data to match the
++        // interpreter.
++        //
++        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
++        // are accessed as negative so LSW is at LOW address
 +
-+  __ bind(reguard);
-+  save_native_result(masm, ret_type, stack_slots);
-+  __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
-+  restore_native_result(masm, ret_type, stack_slots);
-+  // and continue
-+  __ j(reguard_done);
++        // ld_off is MSW so get LSW
++        const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
++                           next_off : ld_off;
++        __ ld(t0, Address(esp, offset));
++        // st_off is LSW (i.e. reg.first())
++        __ sd(t0, Address(sp, st_off), /*temp register*/t2);
++      }
++    } else if (r_1->is_Register()) {  // Register argument
++      Register r = r_1->as_Register();
++      if (r_2->is_valid()) {
++        //
++        // We are using two VMRegs. This can be either T_OBJECT,
++        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
++        // two slots but only uses one for thr T_LONG or T_DOUBLE case
++        // So we must adjust where to pick up the data to match the
++        // interpreter.
 +
-+  // SLOW PATH safepoint
-+  {
-+    __ block_comment("safepoint {");
-+    __ bind(safepoint_in_progress);
++        const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
++                           next_off : ld_off;
 +
-+    // Don't use call_VM as it will see a possible pending exception and forward it
-+    // and never return here preventing us from clearing _last_native_pc down below.
-+    //
-+    save_native_result(masm, ret_type, stack_slots);
-+    __ mv(c_rarg0, xthread);
-+#ifndef PRODUCT
-+    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
-+#endif
-+    int32_t offset = 0;
-+    if (!is_critical_native) {
-+      __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset);
++        // this can be a misaligned move
++        __ ld(r, Address(esp, offset));
++      } else {
++        // sign extend and use a full word?
++        __ lw(r, Address(esp, ld_off));
++      }
 +    } else {
-+      __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)), offset);
-+    }
-+    __ jalr(x1, t0, offset);
-+    // Restore any method result value
-+    restore_native_result(masm, ret_type, stack_slots);
-+
-+    if (is_critical_native) {
-+      // The call above performed the transition to thread_in_Java so
-+      // skip the transition logic above.
-+      __ j(after_transition);
++      if (!r_2->is_valid()) {
++        __ flw(r_1->as_FloatRegister(), Address(esp, ld_off));
++      } else {
++        __ fld(r_1->as_FloatRegister(), Address(esp, next_off));
++      }
 +    }
-+
-+    __ j(safepoint_in_progress_done);
-+    __ block_comment("} safepoint");
 +  }
 +
-+  // SLOW PATH dtrace support
-+  {
-+    __ block_comment("dtrace entry {");
-+    __ bind(dtrace_method_entry);
++  // 6243940 We might end up in handle_wrong_method if
++  // the callee is deoptimized as we race thru here. If that
++  // happens we don't want to take a safepoint because the
++  // caller frame will look interpreted and arguments are now
++  // "compiled" so it is much better to make this transition
++  // invisible to the stack walking code. Unfortunately if
++  // we try and find the callee by normal means a safepoint
++  // is possible. So we stash the desired callee in the thread
++  // and the vm will find there should this case occur.
 +
-+    // We have all of the arguments setup at this point. We must not touch any register
-+    // argument registers at this point (what if we save/restore them there are no oop?
++  __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset()));
 +
-+    save_args(masm, total_c_args, c_arg, out_regs);
-+    __ mov_metadata(c_rarg1, method());
-+    __ call_VM_leaf(
-+      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
-+      xthread, c_rarg1);
-+    restore_args(masm, total_c_args, c_arg, out_regs);
-+    __ j(dtrace_method_entry_done);
-+    __ block_comment("} dtrace entry");
-+  }
-+
-+  {
-+    __ block_comment("dtrace exit {");
-+    __ bind(dtrace_method_exit);
-+    save_native_result(masm, ret_type, stack_slots);
-+    __ mov_metadata(c_rarg1, method());
-+    __ call_VM_leaf(
-+         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
-+         xthread, c_rarg1);
-+    restore_native_result(masm, ret_type, stack_slots);
-+    __ j(dtrace_method_exit_done);
-+    __ block_comment("} dtrace exit");
-+  }
++  __ jr(t1);
++}
 +
-+  __ flush();
++// ---------------------------------------------------------------
++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
++                                                            int total_args_passed,
++                                                            int comp_args_on_stack,
++                                                            const BasicType *sig_bt,
++                                                            const VMRegPair *regs,
++                                                            AdapterFingerPrint* fingerprint) {
++  address i2c_entry = __ pc();
++  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 +
-+  nmethod *nm = nmethod::new_native_nmethod(method,
-+                                            compile_id,
-+                                            masm->code(),
-+                                            vep_offset,
-+                                            frame_complete,
-+                                            stack_slots / VMRegImpl::slots_per_word,
-+                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
-+                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
-+                                            oop_maps);
-+  assert(nm != NULL, "create native nmethod fail!");
-+  if (is_critical_native) {
-+    nm->set_lazy_critical_native(true);
-+  }
++  address c2i_unverified_entry = __ pc();
++  Label skip_fixup;
 +
-+  return nm;
-+}
++  Label ok;
 +
-+// this function returns the adjust size (in number of words) to a c2i adapter
-+// activation for use during deoptimization
-+int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
-+  assert(callee_locals >= callee_parameters,
-+         "test and remove; got more parms than locals");
-+  if (callee_locals < callee_parameters) {
-+    return 0;                   // No adjustment for negative locals
-+  }
-+  int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
-+  // diff is counted in stack words
-+  return align_up(diff, 2);
-+}
++  const Register holder = t1;
++  const Register receiver = j_rarg0;
++  const Register tmp = t2;  // A call-clobbered register not used for arg passing
 +
-+//------------------------------generate_deopt_blob----------------------------
-+void SharedRuntime::generate_deopt_blob() {
-+  // Allocate space for the code
-+  ResourceMark rm;
-+  // Setup code generation tools
-+  int pad = 0;
-+  CodeBuffer buffer("deopt_blob", 2048 + pad, 1024);
-+  MacroAssembler* masm = new MacroAssembler(&buffer);
-+  int frame_size_in_words = -1;
-+  OopMap* map = NULL;
-+  OopMapSet *oop_maps = new OopMapSet();
-+  assert_cond(masm != NULL && oop_maps != NULL);
-+  RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0);
++  // -------------------------------------------------------------------------
++  // Generate a C2I adapter.  On entry we know xmethod holds the Method* during calls
++  // to the interpreter.  The args start out packed in the compiled layout.  They
++  // need to be unpacked into the interpreter layout.  This will almost always
++  // require some stack space.  We grow the current (compiled) stack, then repack
++  // the args.  We  finally end in a jump to the generic interpreter entry point.
++  // On exit from the interpreter, the interpreter will restore our SP (lest the
++  // compiled code, which relys solely on SP and not FP, get sick).
 +
-+  // -------------
-+  // This code enters when returning to a de-optimized nmethod.  A return
-+  // address has been pushed on the the stack, and return values are in
-+  // registers.
-+  // If we are doing a normal deopt then we were called from the patched
-+  // nmethod from the point we returned to the nmethod. So the return
-+  // address on the stack is wrong by NativeCall::instruction_size
-+  // We will adjust the value so it looks like we have the original return
-+  // address on the stack (like when we eagerly deoptimized).
-+  // In the case of an exception pending when deoptimizing, we enter
-+  // with a return address on the stack that points after the call we patched
-+  // into the exception handler. We have the following register state from,
-+  // e.g., the forward exception stub (see stubGenerator_riscv.cpp).
-+  //    x10: exception oop
-+  //    x9: exception handler
-+  //    x13: throwing pc
-+  // So in this case we simply jam x13 into the useless return address and
-+  // the stack looks just like we want.
-+  //
-+  // At this point we need to de-opt.  We save the argument return
-+  // registers.  We call the first C routine, fetch_unroll_info().  This
-+  // routine captures the return values and returns a structure which
-+  // describes the current frame size and the sizes of all replacement frames.
-+  // The current frame is compiled code and may contain many inlined
-+  // functions, each with their own JVM state.  We pop the current frame, then
-+  // push all the new frames.  Then we call the C routine unpack_frames() to
-+  // populate these frames.  Finally unpack_frames() returns us the new target
-+  // address.  Notice that callee-save registers are BLOWN here; they have
-+  // already been captured in the vframeArray at the time the return PC was
-+  // patched.
-+  address start = __ pc();
-+  Label cont;
++  {
++    __ block_comment("c2i_unverified_entry {");
++    __ load_klass(t0, receiver);
++    __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset()));
++    __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset()));
++    __ beq(t0, tmp, ok);
++    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 +
-+  // Prolog for non exception case!
++    __ bind(ok);
++    // Method might have been compiled since the call site was patched to
++    // interpreted; if that is the case treat it as a miss so we can get
++    // the call site corrected.
++    __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
++    __ beqz(t0, skip_fixup);
++    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
++    __ block_comment("} c2i_unverified_entry");
++  }
 +
-+  // Save everything in sight.
-+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
++  address c2i_entry = __ pc();
 +
-+  // Normal deoptimization.  Save exec mode for unpack_frames.
-+  __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved
-+  __ j(cont);
++  // Class initialization barrier for static methods
++  address c2i_no_clinit_check_entry = NULL;
++  if (VM_Version::supports_fast_class_init_checks()) {
++    Label L_skip_barrier;
 +
-+  int reexecute_offset = __ pc() - start;
++    { // Bypass the barrier for non-static methods
++      __ lwu(t0, Address(xmethod, Method::access_flags_offset()));
++      __ andi(t1, t0, JVM_ACC_STATIC);
++      __ beqz(t1, L_skip_barrier); // non-static
++    }
 +
-+  // Reexecute case
-+  // return address is the pc describes what bci to do re-execute at
++    __ load_method_holder(t1, xmethod);
++    __ clinit_barrier(t1, t0, &L_skip_barrier);
++    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 +
-+  // No need to update map as each call to save_live_registers will produce identical oopmap
-+  (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
++    __ bind(L_skip_barrier);
++    c2i_no_clinit_check_entry = __ pc();
++  }
 +
-+  __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved
-+  __ j(cont);
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->c2i_entry_barrier(masm);
 +
-+  int exception_offset = __ pc() - start;
++  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
 +
-+  // Prolog for exception case
++  __ flush();
++  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
++}
 +
-+  // all registers are dead at this entry point, except for x10, and
-+  // x13 which contain the exception oop and exception pc
-+  // respectively.  Set them in TLS and fall thru to the
-+  // unpack_with_exception_in_tls entry point.
++int SharedRuntime::vector_calling_convention(VMRegPair *regs,
++                                             uint num_bits,
++                                             uint total_args_passed) {
++  Unimplemented();
++  return 0;
++}
 +
-+  __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
-+  __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
++int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
++                                         VMRegPair *regs,
++                                         VMRegPair *regs2,
++                                         int total_args_passed) {
++  assert(regs2 == NULL, "not needed on riscv");
 +
-+  int exception_in_tls_offset = __ pc() - start;
++  // We return the amount of VMRegImpl stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots.
 +
-+  // new implementation because exception oop is now passed in JavaThread
++  static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
++    c_rarg0, c_rarg1, c_rarg2, c_rarg3,
++    c_rarg4, c_rarg5,  c_rarg6,  c_rarg7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
++    c_farg0, c_farg1, c_farg2, c_farg3,
++    c_farg4, c_farg5, c_farg6, c_farg7
++  };
 +
-+  // Prolog for exception case
-+  // All registers must be preserved because they might be used by LinearScan
-+  // Exceptiop oop and throwing PC are passed in JavaThread
-+  // tos: stack at point of call to method that threw the exception (i.e. only
-+  // args are on the stack, no return address)
++  uint int_args = 0;
++  uint fp_args = 0;
++  uint stk_args = 0; // inc by 2 each time
 +
-+  // The return address pushed by save_live_registers will be patched
-+  // later with the throwing pc. The correct value is not available
-+  // now because loading it from memory would destroy registers.
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++      case T_BOOLEAN:  // fall through
++      case T_CHAR:     // fall through
++      case T_BYTE:     // fall through
++      case T_SHORT:    // fall through
++      case T_INT:
++        if (int_args < Argument::n_int_register_parameters_c) {
++          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set1(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_LONG:      // fall through
++        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++      case T_OBJECT:    // fall through
++      case T_ARRAY:     // fall through
++      case T_ADDRESS:   // fall through
++      case T_METADATA:
++        if (int_args < Argument::n_int_register_parameters_c) {
++          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set2(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_FLOAT:
++        if (fp_args < Argument::n_float_register_parameters_c) {
++          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
++        } else if (int_args < Argument::n_int_register_parameters_c) {
++          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set1(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_DOUBLE:
++        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++        if (fp_args < Argument::n_float_register_parameters_c) {
++          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
++        } else if (int_args < Argument::n_int_register_parameters_c) {
++          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set2(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_VOID: // Halves of longs and doubles
++        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++        regs[i].set_bad();
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  }
 +
-+  // NB: The SP at this point must be the SP of the method that is
-+  // being deoptimized.  Deoptimization assumes that the frame created
-+  // here by save_live_registers is immediately below the method's SP.
-+  // This is a somewhat fragile mechanism.
++  return stk_args;
++}
 +
-+  // Save everything in sight.
-+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
++// On 64 bit we will store integer like items to the stack as
++// 64 bits items (riscv64 abi) even though java would only store
++// 32bits for a parameter. On 32bit it will simply be 32 bits
++// So this routine will do 32->32 on 32bit and 32->64 on 64bit
++static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert_cond(masm != NULL);
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ ld(t0, Address(fp, reg2offset_in(src.first())));
++      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
++    } else {
++      // stack to reg
++      __ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
++  } else {
++    if (dst.first() != src.first()) {
++      // 32bits extend sign
++      __ addw(dst.first()->as_Register(), src.first()->as_Register(), zr);
++    }
++  }
++}
 +
-+  // Now it is safe to overwrite any register
++// An oop arg. Must pass a handle not the oop itself
++static void object_move(MacroAssembler* masm,
++                        OopMap* map,
++                        int oop_handle_offset,
++                        int framesize_in_slots,
++                        VMRegPair src,
++                        VMRegPair dst,
++                        bool is_receiver,
++                        int* receiver_offset) {
++  assert_cond(masm != NULL && map != NULL && receiver_offset != NULL);
++  // must pass a handle. First figure out the location we use as a handle
++  Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
 +
-+  // Deopt during an exception.  Save exec mode for unpack_frames.
-+  __ mv(xcpool, Deoptimization::Unpack_exception); // callee-saved
++  // See if oop is NULL if it is we need no handle
 +
-+  // load throwing pc from JavaThread and patch it as the return address
-+  // of the current frame. Then clear the field in JavaThread
++  if (src.first()->is_stack()) {
 +
-+  __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
-+  __ sd(x13, Address(fp, frame::return_addr_offset * wordSize));
-+  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
++    // Oop is already on the stack as an argument
++    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
++    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
++    if (is_receiver) {
++      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
++    }
 +
-+#ifdef ASSERT
-+  // verify that there is really an exception oop in JavaThread
-+  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
-+  __ verify_oop(x10);
++    __ ld(t0, Address(fp, reg2offset_in(src.first())));
++    __ la(rHandle, Address(fp, reg2offset_in(src.first())));
++    // conditionally move a NULL
++    Label notZero1;
++    __ bnez(t0, notZero1);
++    __ mv(rHandle, zr);
++    __ bind(notZero1);
++  } else {
 +
-+  // verify that there is no pending exception
-+  Label no_pending_exception;
-+  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+  __ beqz(t0, no_pending_exception);
-+  __ stop("must not have pending exception here");
-+  __ bind(no_pending_exception);
-+#endif
++    // Oop is in an a register we must store it to the space we reserve
++    // on the stack for oop_handles and pass a handle if oop is non-NULL
 +
-+  __ bind(cont);
++    const Register rOop = src.first()->as_Register();
++    int oop_slot = -1;
++    if (rOop == j_rarg0) {
++      oop_slot = 0;
++    } else if (rOop == j_rarg1) {
++      oop_slot = 1;
++    } else if (rOop == j_rarg2) {
++      oop_slot = 2;
++    } else if (rOop == j_rarg3) {
++      oop_slot = 3;
++    } else if (rOop == j_rarg4) {
++      oop_slot = 4;
++    } else if (rOop == j_rarg5) {
++      oop_slot = 5;
++    } else if (rOop == j_rarg6) {
++      oop_slot = 6;
++    } else {
++      assert(rOop == j_rarg7, "wrong register");
++      oop_slot = 7;
++    }
 +
-+  // Call C code.  Need thread and this frame, but NOT official VM entry
-+  // crud.  We cannot block on this call, no GC can happen.
-+  //
-+  // UnrollBlock* fetch_unroll_info(JavaThread* thread)
++    oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
++    int offset = oop_slot * VMRegImpl::stack_slot_size;
 +
-+  // fetch_unroll_info needs to call last_java_frame().
++    map->set_oop(VMRegImpl::stack2reg(oop_slot));
++    // Store oop in handle area, may be NULL
++    __ sd(rOop, Address(sp, offset));
++    if (is_receiver) {
++      *receiver_offset = offset;
++    }
 +
-+  Label retaddr;
-+  __ set_last_Java_frame(sp, noreg, retaddr, t0);
-+#ifdef ASSERT
-+  {
-+    Label L;
-+    __ ld(t0, Address(xthread,
-+                              JavaThread::last_Java_fp_offset()));
-+    __ beqz(t0, L);
-+    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
-+    __ bind(L);
++    //rOop maybe the same as rHandle
++    if (rOop == rHandle) {
++      Label isZero;
++      __ beqz(rOop, isZero);
++      __ la(rHandle, Address(sp, offset));
++      __ bind(isZero);
++    } else {
++      Label notZero2;
++      __ la(rHandle, Address(sp, offset));
++      __ bnez(rOop, notZero2);
++      __ mv(rHandle, zr);
++      __ bind(notZero2);
++    }
 +  }
-+#endif // ASSERT
-+  __ mv(c_rarg0, xthread);
-+  __ mv(c_rarg1, xcpool);
-+  int32_t offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset);
-+  __ jalr(x1, t0, offset);
-+  __ bind(retaddr);
 +
-+  // Need to have an oopmap that tells fetch_unroll_info where to
-+  // find any register it might need.
-+  oop_maps->add_gc_map(__ pc() - start, map);
++  // If arg is on the stack then place it otherwise it is already in correct reg.
++  if (dst.first()->is_stack()) {
++    __ sd(rHandle, Address(sp, reg2offset_out(dst.first())));
++  }
++}
 +
-+  __ reset_last_Java_frame(false);
++// A float arg may have to do float reg int reg conversion
++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(src.first()->is_stack() && dst.first()->is_stack() ||
++         src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
++  assert_cond(masm != NULL);
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      __ lwu(t0, Address(fp, reg2offset_in(src.first())));
++      __ sw(t0, Address(sp, reg2offset_out(dst.first())));
++    } else if (dst.first()->is_Register()) {
++      __ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (src.first() != dst.first()) {
++    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
++      __ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      ShouldNotReachHere();
++    }
++  }
++}
 +
-+  // Load UnrollBlock* into x15
-+  __ mv(x15, x10);
++// A long move
++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert_cond(masm != NULL);
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ ld(t0, Address(fp, reg2offset_in(src.first())));
++      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
++    } else {
++      // stack to reg
++      __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
++  } else {
++    if (dst.first() != src.first()) {
++      __ mv(dst.first()->as_Register(), src.first()->as_Register());
++    }
++  }
++}
 +
-+  __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
-+  Label noException;
-+  __ mv(t0, Deoptimization::Unpack_exception);
-+  __ bne(xcpool, t0, noException); // Was exception pending?
-+  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
-+  __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
-+  __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
-+  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
++// A double move
++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(src.first()->is_stack() && dst.first()->is_stack() ||
++         src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
++  assert_cond(masm != NULL);
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      __ ld(t0, Address(fp, reg2offset_in(src.first())));
++      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
++    } else if (dst.first()-> is_Register()) {
++      __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (src.first() != dst.first()) {
++    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
++      __ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      ShouldNotReachHere();
++    }
++  }
++}
 +
-+  __ verify_oop(x10);
++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  assert_cond(masm != NULL);
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ fsw(f10, Address(fp, -3 * wordSize));
++      break;
++    case T_DOUBLE:
++      __ fsd(f10, Address(fp, -3 * wordSize));
++      break;
++    case T_VOID:  break;
++    default: {
++      __ sd(x10, Address(fp, -3 * wordSize));
++    }
++  }
++}
 +
-+  // Overwrite the result registers with the exception results.
-+  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  assert_cond(masm != NULL);
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ flw(f10, Address(fp, -3 * wordSize));
++      break;
++    case T_DOUBLE:
++      __ fld(f10, Address(fp, -3 * wordSize));
++      break;
++    case T_VOID:  break;
++    default: {
++      __ ld(x10, Address(fp, -3 * wordSize));
++    }
++  }
++}
 +
-+  __ bind(noException);
++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  assert_cond(masm != NULL && args != NULL);
++  RegSet x;
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      x = x + args[i].first()->as_Register();
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ addi(sp, sp, -2 * wordSize);
++      __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0));
++    }
++  }
++  __ push_reg(x, sp);
++}
 +
-+  // Only register save data is on the stack.
-+  // Now restore the result registers.  Everything else is either dead
-+  // or captured in the vframeArray.
-+  reg_saver.restore_result_registers(masm);
++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  assert_cond(masm != NULL && args != NULL);
++  RegSet x;
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      x = x + args[i].first()->as_Register();
++    } else {
++      ;
++    }
++  }
++  __ pop_reg(x, sp);
++  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
++    if (args[i].first()->is_Register()) {
++      ;
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0));
++      __ add(sp, sp, 2 * wordSize);
++    }
++  }
++}
 +
-+  // All of the register save area has been popped of the stack. Only the
-+  // return address remains.
++static void rt_call(MacroAssembler* masm, address dest) {
++  assert_cond(masm != NULL);
++  CodeBlob *cb = CodeCache::find_blob(dest);
++  if (cb) {
++    __ far_call(RuntimeAddress(dest));
++  } else {
++    int32_t offset = 0;
++    __ la_patchable(t0, RuntimeAddress(dest), offset);
++    __ jalr(x1, t0, offset);
++  }
++}
 +
-+  // Pop all the frames we must move/replace.
-+  //
-+  // Frame picture (youngest to oldest)
-+  // 1: self-frame (no frame link)
-+  // 2: deopting frame  (no frame link)
-+  // 3: caller of deopting frame (could be compiled/interpreted).
-+  //
-+  // Note: by leaving the return address of self-frame on the stack
-+  // and using the size of frame 2 to adjust the stack
-+  // when we are done the return to frame 3 will still be on the stack.
-+
-+  // Pop deoptimized frame
-+  __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
-+  __ sub(x12, x12, 2 * wordSize);
-+  __ add(sp, sp, x12);
-+  __ ld(fp, Address(sp, 0));
-+  __ ld(ra, Address(sp, wordSize));
-+  __ addi(sp, sp, 2 * wordSize);
-+  // RA should now be the return address to the caller (3)
-+
-+#ifdef ASSERT
-+  // Compilers generate code that bang the stack by as much as the
-+  // interpreter would need. So this stack banging should never
-+  // trigger a fault. Verify that it does not on non product builds.
-+  if (UseStackBanging) {
-+    __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
-+    __ bang_stack_size(x9, x12);
++static void verify_oop_args(MacroAssembler* masm,
++                            const methodHandle& method,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {
++  const Register temp_reg = x9;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < method->size_of_parameters(); i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
 +  }
-+#endif
-+  // Load address of array of frame pcs into x12
-+  __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
++}
 +
-+  // Load address of array of frame sizes into x14
-+  __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 const methodHandle& method,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {
++  verify_oop_args(masm, method, sig_bt, regs);
++  vmIntrinsics::ID iid = method->intrinsic_id();
 +
-+  // Load counter into x13
-+  __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
++  if (ref_kind != 0) {
++    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
++    member_reg = x9;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
++    has_receiver = true;
++  } else {
++    fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
++  }
 +
-+  // Now adjust the caller's stack to make up for the extra locals
-+  // but record the original sp so that we can save it in the skeletal interpreter
-+  // frame and the stack walking of interpreter_sender will get the unextended sp
-+  // value and not the "real" sp value.
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
++    VMReg r = regs[member_arg_pos].first();
++    if (r->is_stack()) {
++      __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
 +
-+  const Register sender_sp = x16;
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(method->size_of_parameters() > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      fatal("receiver always in a register");
++      receiver_reg = x12;  // known to be free at this point
++      __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
 +
-+  __ mv(sender_sp, sp);
-+  __ lwu(x9, Address(x15,
-+                     Deoptimization::UnrollBlock::
-+                     caller_adjustment_offset_in_bytes()));
-+  __ sub(sp, sp, x9);
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, iid,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
 +
-+  // Push interpreter frames in a loop
-+  __ mv(t0, (uint64_t)0xDEADDEAD);     // Make a recognizable pattern
-+  __ mv(t1, t0);
-+  Label loop;
-+  __ bind(loop);
-+  __ ld(x9, Address(x14, 0));          // Load frame size
-+  __ addi(x14, x14, wordSize);
-+  __ sub(x9, x9, 2 * wordSize);        // We'll push pc and fp by hand
-+  __ ld(ra, Address(x12, 0));          // Load pc
-+  __ addi(x12, x12, wordSize);
-+  __ enter();                          // Save old & set new fp
-+  __ sub(sp, sp, x9);                  // Prolog
-+  // This value is corrected by layout_activation_impl
-+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
-+  __ mv(sender_sp, sp);                // Pass sender_sp to next frame
-+  __ addi(x13, x13, -1);               // Decrement counter
-+  __ bnez(x13, loop);
++// ---------------------------------------------------------------------------
++// Generate a native wrapper for a given method.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// convention (handlizes oops, etc), transitions to native, makes the call,
++// returns to java state (possibly blocking), unhandlizes any result and
++// returns.
++//
++// Critical native functions are a shorthand for the use of
++// GetPrimtiveArrayCritical and disallow the use of any other JNI
++// functions.  The wrapper is expected to unpack the arguments before
++// passing them to the callee and perform checks before and after the
++// native call to ensure that they GCLocker
++// lock_critical/unlock_critical semantics are followed.  Some other
++// parts of JNI setup are skipped like the tear down of the JNI handle
++// block and the check for pending exceptions it's impossible for them
++// to be thrown.
++//
++// They are roughly structured like this:
++//    if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical()
++//    tranistion to thread_in_native
++//    unpack arrray arguments and call native entry point
++//    check for safepoint in progress
++//    check if any thread suspend flags are set
++//      call into JVM and possible unlock the JNI critical
++//      if a GC was suppressed while in the critical native.
++//    transition back to thread_in_Java
++//    return to caller
++//
++nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
++                                                const methodHandle& method,
++                                                int compile_id,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
++                                                BasicType ret_type) {
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
 +
-+    // Re-push self-frame
-+  __ ld(ra, Address(x12));
-+  __ enter();
++    // First instruction must be a nop as it may need to be patched on deoptimisation
++    __ nop();
++    gen_special_dispatch(masm,
++                         method,
++                         in_sig_bt,
++                         in_regs);
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
++  address native_func = method->native_function();
++  assert(native_func != NULL, "must have function");
 +
-+  // Allocate a full sized register save area.  We subtract 2 because
-+  // enter() just pushed 2 words
-+  __ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
++  // An OopMap for lock (and class if static)
++  OopMapSet *oop_maps = new OopMapSet();
++  assert_cond(oop_maps != NULL);
++  intptr_t start = (intptr_t)__ pc();
 +
-+  // Restore frame locals after moving the frame
-+  __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
-+  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the jni function will expect them. To figure out where they go
++  // we convert the java signature to a C signature by inserting
++  // the hidden arguments as arg[0] and possibly arg[1] (static method)
 +
-+  // Call C code.  Need thread but NOT official VM entry
-+  // crud.  We cannot block on this call, no GC can happen.  Call should
-+  // restore return values to their stack-slots with the new SP.
-+  //
-+  // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
++  const int total_in_args = method->size_of_parameters();
++  int total_c_args = total_in_args + (method->is_static() ? 2 : 1);
 +
-+  // Use fp because the frames look interpreted now
-+  // Don't need the precise return PC here, just precise enough to point into this code blob.
-+  address the_pc = __ pc();
-+  __ set_last_Java_frame(sp, fp, the_pc, t0);
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
++  BasicType* in_elem_bt = NULL;
 +
-+  __ mv(c_rarg0, xthread);
-+  __ mv(c_rarg1, xcpool); // second arg: exec_mode
-+  offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
-+  __ jalr(x1, t0, offset);
++  int argc = 0;
++  out_sig_bt[argc++] = T_ADDRESS;
++  if (method->is_static()) {
++    out_sig_bt[argc++] = T_OBJECT;
++  }
 +
-+  // Set an oopmap for the call site
-+  // Use the same PC we used for the last java frame
-+  oop_maps->add_gc_map(the_pc - start,
-+                       new OopMap( frame_size_in_words, 0 ));
++  for (int i = 0; i < total_in_args ; i++) {
++    out_sig_bt[argc++] = in_sig_bt[i];
++  }
 +
-+  // Clear fp AND pc
-+  __ reset_last_Java_frame(true);
++  // Now figure out where the args must be stored and how much stack space
++  // they require.
++  int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
 +
-+  // Collect return values
-+  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
-+  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
++  // Compute framesize for the wrapper.  We need to handlize all oops in
++  // incoming registers
 +
-+  // Pop self-frame.
-+  __ leave();                           // Epilog
++  // Calculate the total number of stack slots we will need.
 +
-+  // Jump to interpreter
-+  __ ret();
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
 +
-+  // Make sure all code is generated
-+  masm->flush();
++  // Now the space for the inbound oop handle area
++  int total_save_slots = 8 * VMRegImpl::slots_per_word;  // 8 arguments passed in registers
 +
-+  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
-+  assert(_deopt_blob != NULL, "create deoptimization blob fail!");
-+  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
-+}
++  int oop_handle_offset = stack_slots;
++  stack_slots += total_save_slots;
 +
-+uint SharedRuntime::out_preserve_stack_slots() {
-+  return 0;
-+}
++  // Now any space we need for handlizing a klass if static method
 +
-+#ifdef COMPILER2
-+//------------------------------generate_uncommon_trap_blob--------------------
-+void SharedRuntime::generate_uncommon_trap_blob() {
-+  // Allocate space for the code
-+  ResourceMark rm;
-+  // Setup code generation tools
-+  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
-+  MacroAssembler* masm = new MacroAssembler(&buffer);
-+  assert_cond(masm != NULL);
++  int klass_slot_offset = 0;
++  int klass_offset = -1;
++  int lock_slot_offset = 0;
++  bool is_static = false;
 +
-+  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
++  if (method->is_static()) {
++    klass_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
++    is_static = true;
++  }
 +
-+  address start = __ pc();
++  // Plus a lock if needed
 +
-+  // Push self-frame.  We get here with a return address in RA
-+  // and sp should be 16 byte aligned
-+  // push fp and retaddr by hand
-+  __ addi(sp, sp, -2 * wordSize);
-+  __ sd(ra, Address(sp, wordSize));
-+  __ sd(fp, Address(sp, 0));
-+  // we don't expect an arg reg save area
-+#ifndef PRODUCT
-+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
-+#endif
-+  // compiler left unloaded_class_index in j_rarg0 move to where the
-+  // runtime expects it.
-+  __ addiw(c_rarg1, j_rarg0, 0);
++  if (method->is_synchronized()) {
++    lock_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++  }
 +
-+  // we need to set the past SP to the stack pointer of the stub frame
-+  // and the pc to the address where this runtime call will return
-+  // although actually any pc in this code blob will do).
-+  Label retaddr;
-+  __ set_last_Java_frame(sp, noreg, retaddr, t0);
++  // Now a place (+2) to save return values or temp during shuffling
++  // + 4 for return address (which we own) and saved fp
++  stack_slots += 6;
 +
-+  // Call C code.  Need thread but NOT official VM entry
-+  // crud.  We cannot block on this call, no GC can happen.  Call should
-+  // capture callee-saved registers as well as return values.
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      | 2 slots (ra)        |
++  //      | 2 slots (fp)        |
++  //      |---------------------|
++  //      | 2 slots for moves   |
++  //      |---------------------|
++  //      | lock box (if sync)  |
++  //      |---------------------| <- lock_slot_offset
++  //      | klass (if static)   |
++  //      |---------------------| <- klass_slot_offset
++  //      | oopHandle area      |
++  //      |---------------------| <- oop_handle_offset (8 java arg registers)
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
 +  //
-+  // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode)
 +  //
-+  // n.b. 3 gp args, 0 fp args, integral return type
-+
-+  __ mv(c_rarg0, xthread);
-+  __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
-+  int32_t offset = 0;
-+  __ la_patchable(t0,
-+        RuntimeAddress(CAST_FROM_FN_PTR(address,
-+                                        Deoptimization::uncommon_trap)), offset);
-+  __ jalr(x1, t0, offset);
-+  __ bind(retaddr);
 +
-+  // Set an oopmap for the call site
-+  OopMapSet* oop_maps = new OopMapSet();
-+  OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
-+  assert_cond(oop_maps != NULL && map != NULL);
 +
-+  // location of fp is known implicitly by the frame sender code
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = align_up(stack_slots, StackAlignmentInSlots);
 +
-+  oop_maps->add_gc_map(__ pc() - start, map);
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
 +
-+  __ reset_last_Java_frame(false);
++  // First thing make an ic check to see if we should even be here
 +
-+  // move UnrollBlock* into x14
-+  __ mv(x14, x10);
++  // We are free to use all registers as temps without saving them and
++  // restoring them except fp. fp is the only callee save register
++  // as far as the interpreter and the compiler(s) are concerned.
 +
-+#ifdef ASSERT
-+  { Label L;
-+    __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
-+    __ mvw(t1, Deoptimization::Unpack_uncommon_trap);
-+    __ beq(t0, t1, L);
-+    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
-+    __ bind(L);
-+  }
-+#endif
 +
-+  // Pop all the frames we must move/replace.
-+  //
-+  // Frame picture (youngest to oldest)
-+  // 1: self-frame (no frame link)
-+  // 2: deopting frame  (no frame link)
-+  // 3: caller of deopting frame (could be compiled/interpreted).
++  const Register ic_reg = t1;
++  const Register receiver = j_rarg0;
 +
-+  __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog!
++  Label hit;
++  Label exception_pending;
 +
-+  // Pop deoptimized frame (int)
-+  __ lwu(x12, Address(x14,
-+                      Deoptimization::UnrollBlock::
-+                      size_of_deoptimized_frame_offset_in_bytes()));
-+  __ sub(x12, x12, 2 * wordSize);
-+  __ add(sp, sp, x12);
-+  __ ld(fp, sp, 0);
-+  __ ld(ra, sp, wordSize);
-+  __ addi(sp, sp, 2 * wordSize);
-+  // RA should now be the return address to the caller (3) frame
++  assert_different_registers(ic_reg, receiver, t0);
++  __ verify_oop(receiver);
++  __ cmp_klass(receiver, ic_reg, t0, hit);
 +
-+#ifdef ASSERT
-+  // Compilers generate code that bang the stack by as much as the
-+  // interpreter would need. So this stack banging should never
-+  // trigger a fault. Verify that it does not on non product builds.
-+  if (UseStackBanging) {
-+    __ lwu(x11, Address(x14,
-+                        Deoptimization::UnrollBlock::
-+                        total_frame_sizes_offset_in_bytes()));
-+    __ bang_stack_size(x11, x12);
-+  }
-+#endif
++  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 +
-+  // Load address of array of frame pcs into x12 (address*)
-+  __ ld(x12, Address(x14,
-+                     Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
++  // Verified entry point must be aligned
++  __ align(8);
 +
-+  // Load address of array of frame sizes into x15 (intptr_t*)
-+  __ ld(x15, Address(x14,
-+                     Deoptimization::UnrollBlock::
-+                     frame_sizes_offset_in_bytes()));
++  __ bind(hit);
 +
-+  // Counter
-+  __ lwu(x13, Address(x14,
-+                      Deoptimization::UnrollBlock::
-+                      number_of_frames_offset_in_bytes())); // (int)
++  int vep_offset = ((intptr_t)__ pc()) - start;
 +
-+  // Now adjust the caller's stack to make up for the extra locals but
-+  // record the original sp so that we can save it in the skeletal
-+  // interpreter frame and the stack walking of interpreter_sender
-+  // will get the unextended sp value and not the "real" sp value.
++  // If we have to make this method not-entrant we'll overwrite its
++  // first instruction with a jump.
++  __ nop();
 +
-+  const Register sender_sp = t1; // temporary register
++  if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
++    Label L_skip_barrier;
++    __ mov_metadata(t1, method->method_holder()); // InstanceKlass*
++    __ clinit_barrier(t1, t0, &L_skip_barrier);
++    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 +
-+  __ lwu(x11, Address(x14,
-+                      Deoptimization::UnrollBlock::
-+                      caller_adjustment_offset_in_bytes())); // (int)
-+  __ mv(sender_sp, sp);
-+  __ sub(sp, sp, x11);
++    __ bind(L_skip_barrier);
++  }
 +
-+  // Push interpreter frames in a loop
-+  Label loop;
-+  __ bind(loop);
-+  __ ld(x11, Address(x15, 0));       // Load frame size
-+  __ sub(x11, x11, 2 * wordSize);    // We'll push pc and fp by hand
-+  __ ld(ra, Address(x12, 0));        // Save return address
-+  __ enter();                        // and old fp & set new fp
-+  __ sub(sp, sp, x11);               // Prolog
-+  __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
-+  // This value is corrected by layout_activation_impl
-+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  __ mv(sender_sp, sp);              // Pass sender_sp to next frame
-+  __ add(x15, x15, wordSize);        // Bump array pointer (sizes)
-+  __ add(x12, x12, wordSize);        // Bump array pointer (pcs)
-+  __ subw(x13, x13, 1);              // Decrement counter
-+  __ bgtz(x13, loop);
-+  __ ld(ra, Address(x12, 0));        // save final return address
-+  // Re-push self-frame
-+  __ enter();                        // & old fp & set new fp
++  // Generate stack overflow check
++  __ bang_stack_with_offset(checked_cast<int>(StackOverflow::stack_shadow_zone_size()));
 +
-+  // Use fp because the frames look interpreted now
-+  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
-+  // Don't need the precise return PC here, just precise enough to point into this code blob.
-+  address the_pc = __ pc();
-+  __ set_last_Java_frame(sp, fp, the_pc, t0);
++  // Generate a new frame for the wrapper.
++  __ enter();
++  // -2 because return address is already present and so is saved fp
++  __ sub(sp, sp, stack_size - 2 * wordSize);
 +
-+  // Call C code.  Need thread but NOT official VM entry
-+  // crud.  We cannot block on this call, no GC can happen.  Call should
-+  // restore return values to their stack-slots with the new SP.
-+  //
-+  // BasicType unpack_frames(JavaThread* thread, int exec_mode)
-+  //
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  assert_cond(bs != NULL);
++  bs->nmethod_entry_barrier(masm);
 +
-+  // n.b. 2 gp args, 0 fp args, integral return type
++  // Frame is now completed as far as size and linkage.
++  int frame_complete = ((intptr_t)__ pc()) - start;
 +
-+  // sp should already be aligned
-+  __ mv(c_rarg0, xthread);
-+  __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap);
-+  offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
-+  __ jalr(x1, t0, offset);
++  // We use x18 as the oop handle for the receiver/klass
++  // It is callee save so it survives the call to native
 +
-+  // Set an oopmap for the call site
-+  // Use the same PC we used for the last java frame
-+  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
++  const Register oop_handle_reg = x18;
 +
-+  // Clear fp AND pc
-+  __ reset_last_Java_frame(true);
++  //
++  // We immediately shuffle the arguments so that any vm call we have to
++  // make from here on out (sync slow path, jvmti, etc.) we will have
++  // captured the oops from our caller and have a valid oopMap for
++  // them.
 +
-+  // Pop self-frame.
-+  __ leave();                 // Epilog
++  // -----------------
++  // The Grand Shuffle
 +
-+  // Jump to interpreter
-+  __ ret();
++  // The Java calling convention is either equal (linux) or denser (win64) than the
++  // c calling convention. However the because of the jni_env argument the c calling
++  // convention always has at least one more (and two for static) arguments than Java.
++  // Therefore if we move the args from java -> c backwards then we will never have
++  // a register->register conflict and we don't have to build a dependency graph
++  // and figure out how to break any cycles.
++  //
 +
-+  // Make sure all code is generated
-+  masm->flush();
++  // Record esp-based slot for receiver on stack for non-static methods
++  int receiver_offset = -1;
 +
-+  _uncommon_trap_blob =  UncommonTrapBlob::create(&buffer, oop_maps,
-+                                                  SimpleRuntimeFrame::framesize >> 1);
-+}
-+#endif // COMPILER2
++  // This is a trick. We double the stack slots so we can claim
++  // the oops in the caller's frame. Since we are sure to have
++  // more args than the caller doubling is enough to make
++  // sure we can capture all the incoming oop args from the
++  // caller.
++  //
++  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++  assert_cond(map != NULL);
 +
-+//------------------------------generate_handler_blob------
-+//
-+// Generate a special Compile2Runtime blob that saves all registers,
-+// and setup oopmap.
-+//
-+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
-+  ResourceMark rm;
-+  OopMapSet *oop_maps = new OopMapSet();
-+  assert_cond(oop_maps != NULL);
-+  OopMap* map = NULL;
++  int float_args = 0;
++  int int_args = 0;
 +
-+  // Allocate space for the code.  Setup code generation tools.
-+  CodeBuffer buffer("handler_blob", 2048, 1024);
-+  MacroAssembler* masm = new MacroAssembler(&buffer);
-+  assert_cond(masm != NULL);
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
 +
-+  address start   = __ pc();
-+  address call_pc = NULL;
-+  int frame_size_in_words = -1;
-+  bool cause_return = (poll_type == POLL_AT_RETURN);
-+  RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
++#endif /* ASSERT */
 +
-+  // Save Integer and Float registers.
-+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
++  // For JNI natives the incoming and outgoing registers are offset upwards.
++  GrowableArray<int> arg_order(2 * total_in_args);
++  VMRegPair tmp_vmreg;
++  tmp_vmreg.set2(x9->as_VMReg());
 +
-+  // The following is basically a call_VM.  However, we need the precise
-+  // address of the call in order to generate an oopmap. Hence, we do all the
-+  // work outselves.
++  for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
++    arg_order.push(i);
++    arg_order.push(c_arg);
++  }
 +
-+  Label retaddr;
-+  __ set_last_Java_frame(sp, noreg, retaddr, t0);
++  int temploc = -1;
++  for (int ai = 0; ai < arg_order.length(); ai += 2) {
++    int i = arg_order.at(ai);
++    int c_arg = arg_order.at(ai + 1);
++    __ block_comment(err_msg("mv %d -> %d", i, c_arg));
++    assert(c_arg != -1 && i != -1, "wrong order");
++#ifdef ASSERT
++    if (in_regs[i].first()->is_Register()) {
++      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
++    } else if (in_regs[i].first()->is_FloatRegister()) {
++      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
++    }
++    if (out_regs[c_arg].first()->is_Register()) {
++      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++    }
++#endif /* ASSERT */
++    switch (in_sig_bt[i]) {
++      case T_ARRAY:
++      case T_OBJECT:
++        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
++                    ((i == 0) && (!is_static)),
++                    &receiver_offset);
++        int_args++;
++        break;
++      case T_VOID:
++        break;
 +
-+  // The return address must always be correct so that frame constructor never
-+  // sees an invalid pc.
++      case T_FLOAT:
++        float_move(masm, in_regs[i], out_regs[c_arg]);
++        float_args++;
++        break;
 +
-+  if (!cause_return) {
-+    // overwrite the return address pushed by save_live_registers
-+    // Additionally, x18 is a callee-saved register so we can look at
-+    // it later to determine if someone changed the return address for
-+    // us!
-+    __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset()));
-+    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
++      case T_DOUBLE:
++        assert( i + 1 < total_in_args &&
++                in_sig_bt[i + 1] == T_VOID &&
++                out_sig_bt[c_arg + 1] == T_VOID, "bad arg list");
++        double_move(masm, in_regs[i], out_regs[c_arg]);
++        float_args++;
++        break;
++
++      case T_LONG :
++        long_move(masm, in_regs[i], out_regs[c_arg]);
++        int_args++;
++        break;
++
++      case T_ADDRESS:
++        assert(false, "found T_ADDRESS in java args");
++        break;
++
++      default:
++        move32_64(masm, in_regs[i], out_regs[c_arg]);
++        int_args++;
++    }
 +  }
 +
-+  // Do the call
-+  __ mv(c_rarg0, xthread);
-+  int32_t offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(call_ptr), offset);
-+  __ jalr(x1, t0, offset);
-+  __ bind(retaddr);
++  // point c_arg at the first arg that is already loaded in case we
++  // need to spill before we call out
++  int c_arg = total_c_args - total_in_args;
 +
-+  // Set an oopmap for the call site.  This oopmap will map all
-+  // oop-registers and debug-info registers as callee-saved.  This
-+  // will allow deoptimization at this safepoint to find all possible
-+  // debug-info recordings, as well as let GC find all oops.
++  // Pre-load a static method's oop into c_rarg1.
++  if (method->is_static()) {
 +
-+  oop_maps->add_gc_map( __ pc() - start, map);
++    //  load oop into a register
++    __ movoop(c_rarg1,
++              JNIHandles::make_local(method->method_holder()->java_mirror()),
++              /*immediate*/true);
 +
-+  Label noException;
++    // Now handlize the static class mirror it's known not-null.
++    __ sd(c_rarg1, Address(sp, klass_offset));
++    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
 +
-+  __ reset_last_Java_frame(false);
++    // Now get the handle
++    __ la(c_rarg1, Address(sp, klass_offset));
++    // and protect the arg if we must spill
++    c_arg--;
++  }
 +
-+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a stack traversal).
++  // We use the same pc/oopMap repeatedly when we call out
 +
-+  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+  __ beqz(t0, noException);
++  Label native_return;
++  __ set_last_Java_frame(sp, noreg, native_return, t0);
 +
-+  // Exception pending
++  Label dtrace_method_entry, dtrace_method_entry_done;
++  {
++    int32_t offset = 0;
++    __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
++    __ lbu(t0, Address(t0, offset));
++    __ addw(t0, t0, zr);
++    __ bnez(t0, dtrace_method_entry);
++    __ bind(dtrace_method_entry_done);
++  }
 +
-+  reg_saver.restore_live_registers(masm);
++  // RedefineClasses() tracing support for obsolete method entry
++  if (log_is_enabled(Trace, redefine, class, obsolete)) {
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++    __ mov_metadata(c_rarg1, method());
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
++      xthread, c_rarg1);
++    restore_args(masm, total_c_args, c_arg, out_regs);
++  }
 +
-+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++  // Lock a synchronized method
 +
-+  // No exception case
-+  __ bind(noException);
++  // Register definitions used by locking and unlocking
 +
-+  Label no_adjust, bail;
-+  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
-+    // If our stashed return pc was modified by the runtime we avoid touching it
-+    __ ld(t0, Address(fp, frame::return_addr_offset * wordSize));
-+    __ bne(x18, t0, no_adjust);
++  const Register swap_reg = x10;
++  const Register obj_reg  = x9;  // Will contain the oop
++  const Register lock_reg = x30;  // Address of compiler lock object (BasicLock)
++  const Register old_hdr  = x30;  // value of old header at unlock time
++  const Register tmp      = ra;
 +
-+#ifdef ASSERT
-+    // Verify the correct encoding of the poll we're about to skip.
-+    // See NativeInstruction::is_lwu_to_zr()
-+    __ lwu(t0, Address(x18));
-+    __ andi(t1, t0, 0b0000011);
-+    __ mv(t2, 0b0000011);
-+    __ bne(t1, t2, bail); // 0-6:0b0000011
-+    __ srli(t1, t0, 7);
-+    __ andi(t1, t1, 0b00000);
-+    __ bnez(t1, bail);    // 7-11:0b00000
-+    __ srli(t1, t0, 12);
-+    __ andi(t1, t1, 0b110);
-+    __ mv(t2, 0b110);
-+    __ bne(t1, t2, bail); // 12-14:0b110
-+#endif
-+    // Adjust return pc forward to step over the safepoint poll instruction
-+    __ add(x18, x18, NativeInstruction::instruction_size);
-+    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
-+  }
++  Label slow_path_lock;
++  Label lock_done;
 +
-+  __ bind(no_adjust);
-+  // Normal exit, restore registers and exit.
++  if (method->is_synchronized()) {
 +
-+  reg_saver.restore_live_registers(masm);
-+  __ ret();
++    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
 +
-+#ifdef ASSERT
-+  __ bind(bail);
-+  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
-+#endif
++    // Get the handle (the 2nd argument)
++    __ mv(oop_handle_reg, c_rarg1);
 +
-+  // Make sure all code is generated
-+  masm->flush();
++    // Get address of the box
 +
-+  // Fill-out other meta info
-+  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
-+}
++    __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
 +
-+//
-+// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
-+//
-+// Generate a stub that calls into vm to find out the proper destination
-+// of a java call. All the argument registers are live at this point
-+// but since this is generic code we don't know what they are and the caller
-+// must do any gc of the args.
-+//
-+RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
-+  assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++    // Load the oop from the handle
++    __ ld(obj_reg, Address(oop_handle_reg, 0));
 +
-+  // allocate space for the code
-+  ResourceMark rm;
++    if (!UseHeavyMonitors) {
++      // Load (object->mark() | 1) into swap_reg % x10
++      __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++      __ ori(swap_reg, t0, 1);
 +
-+  CodeBuffer buffer(name, 1000, 512);
-+  MacroAssembler* masm = new MacroAssembler(&buffer);
-+  assert_cond(masm != NULL);
++      // Save (object->mark() | 1) into BasicLock's displaced header
++      __ sd(swap_reg, Address(lock_reg, mark_word_offset));
 +
-+  int frame_size_in_words = -1;
-+  RegisterSaver reg_saver(false /* save_vectors */);
++      // src -> dest if dest == x10 else x10 <- dest
++      {
++        Label here;
++        __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
++      }
 +
-+  OopMapSet *oop_maps = new OopMapSet();
-+  assert_cond(oop_maps != NULL);
-+  OopMap* map = NULL;
++      // Test if the oopMark is an obvious stack pointer, i.e.,
++      //  1) (mark & 3) == 0, and
++      //  2) sp <= mark < mark + os::pagesize()
++      // These 3 tests can be done by evaluating the following
++      // expression: ((mark - sp) & (3 - os::vm_page_size())),
++      // assuming both stack pointer and pagesize have their
++      // least significant 2 bits clear.
++      // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
++
++      __ sub(swap_reg, swap_reg, sp);
++      __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
++
++      // Save the test result, for recursive case, the result is zero
++      __ sd(swap_reg, Address(lock_reg, mark_word_offset));
++      __ bnez(swap_reg, slow_path_lock);
++    } else {
++      __ j(slow_path_lock);
++    }
 +
-+  int start = __ offset();
++    // Slow path will re-enter here
++    __ bind(lock_done);
++  }
 +
-+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+  int frame_complete = __ offset();
++  // Finally just about ready to make the JNI call
 +
-+  {
-+    Label retaddr;
-+    __ set_last_Java_frame(sp, noreg, retaddr, t0);
++  // get JNIEnv* which is first argument to native
++  __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset())));
 +
-+    __ mv(c_rarg0, xthread);
-+    int32_t offset = 0;
-+    __ la_patchable(t0, RuntimeAddress(destination), offset);
-+    __ jalr(x1, t0, offset);
-+    __ bind(retaddr);
++  // Now set thread in native
++  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
++  __ mv(t0, _thread_in_native);
++  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++  __ sw(t0, Address(t1));
++
++  rt_call(masm, native_func);
++
++  __ bind(native_return);
++
++  intptr_t return_pc = (intptr_t) __ pc();
++  oop_maps->add_gc_map(return_pc - start, map);
++
++  // Unpack native results.
++  if (ret_type != T_OBJECT && ret_type != T_ARRAY) {
++    __ cast_primitive_type(ret_type, x10);
 +  }
 +
-+  // Set an oopmap for the call site.
-+  // We need this not only for callee-saved registers, but also for volatile
-+  // registers that the compiler might be keeping live across a safepoint.
++  Label safepoint_in_progress, safepoint_in_progress_done;
++  Label after_transition;
 +
-+  oop_maps->add_gc_map( __ offset() - start, map);
++  // Switch thread to "native transition" state before reading the synchronization state.
++  // This additional state is necessary because reading and testing the synchronization
++  // state is not atomic w.r.t. GC, as this scenario demonstrates:
++  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
++  //     VM thread changes sync state to synchronizing and suspends threads for GC.
++  //     Thread A is resumed to finish this native method, but doesn't block here since it
++  //     didn't see any synchronization is progress, and escapes.
++  __ mv(t0, _thread_in_native_trans);
 +
-+  // x10 contains the address we are going to jump to assuming no exception got installed
++  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
 +
-+  // clear last_Java_sp
-+  __ reset_last_Java_frame(false);
-+  // check for pending exceptions
-+  Label pending;
-+  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+  __ bnez(t0, pending);
++  // Force this write out before the read below
++  __ membar(MacroAssembler::AnyAny);
 +
-+  // get the returned Method*
-+  __ get_vm_result_2(xmethod, xthread);
-+  __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod)));
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    // We need an acquire here to ensure that any subsequent load of the
++    // global SafepointSynchronize::_state flag is ordered after this load
++    // of the thread-local polling word. We don't want this poll to
++    // return false (i.e. not safepointing) and a later poll of the global
++    // SafepointSynchronize::_state spuriously to return true.
++    // This is to avoid a race when we're in a native->Java transition
++    // racing the code which wakes up from a safepoint.
++
++    __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */);
++    __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
++    __ bnez(t0, safepoint_in_progress);
++    __ bind(safepoint_in_progress_done);
++  }
 +
-+  // x10 is where we want to jump, overwrite t0 which is saved and temporary
-+  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0)));
-+  reg_saver.restore_live_registers(masm);
++  // change thread state
++  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
++  __ mv(t0, _thread_in_Java);
++  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++  __ sw(t0, Address(t1));
++  __ bind(after_transition);
 +
-+  // We are back the the original state on entry and ready to go.
++  Label reguard;
++  Label reguard_done;
++  __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));
++  __ mv(t1, StackOverflow::stack_guard_yellow_reserved_disabled);
++  __ beq(t0, t1, reguard);
++  __ bind(reguard_done);
 +
-+  __ jr(t0);
++  // native result if any is live
 +
-+  // Pending exception after the safepoint
++  // Unlock
++  Label unlock_done;
++  Label slow_path_unlock;
++  if (method->is_synchronized()) {
 +
-+  __ bind(pending);
++    // Get locked oop from the handle we passed to jni
++    __ ld(obj_reg, Address(oop_handle_reg, 0));
 +
-+  reg_saver.restore_live_registers(masm);
++    Label done;
 +
-+  // exception pending => remove activation and forward to exception handler
++    if (!UseHeavyMonitors) {
++      // Simple recursive lock?
++      __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
++      __ beqz(t0, done);
++    }
 +
-+  __ sd(zr, Address(xthread, JavaThread::vm_result_offset()));
 +
-+  __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
-+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++    // Must save x10 if if it is live now because cmpxchg must use it
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
 +
-+  // -------------
-+  // make sure all code is generated
-+  masm->flush();
++    if (!UseHeavyMonitors) {
++      // get address of the stack lock
++      __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
++      //  get old displaced header
++      __ ld(old_hdr, Address(x10, 0));
 +
-+  // return the  blob
-+  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
-+}
++      // Atomic swap old header if oop still contains the stack lock
++      Label succeed;
++      __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
++      __ bind(succeed);
++    } else {
++      __ j(slow_path_unlock);
++    }
 +
-+#ifdef COMPILER2
-+//------------------------------generate_exception_blob---------------------------
-+// creates exception blob at the end
-+// Using exception blob, this code is jumped from a compiled method.
-+// (see emit_exception_handler in riscv.ad file)
-+//
-+// Given an exception pc at a call we call into the runtime for the
-+// handler in this method. This handler might merely restore state
-+// (i.e. callee save registers) unwind the frame and jump to the
-+// exception handler for the nmethod if there is no Java level handler
-+// for the nmethod.
-+//
-+// This code is entered with a jmp.
-+//
-+// Arguments:
-+//   x10: exception oop
-+//   x13: exception pc
-+//
-+// Results:
-+//   x10: exception oop
-+//   x13: exception pc in caller
-+//   destination: exception handler of caller
-+//
-+// Note: the exception pc MUST be at a call (precise debug information)
-+//       Registers x10, x13, x12, x14, x15, t0 are not callee saved.
-+//
++    // slow path re-enters here
++    __ bind(unlock_done);
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
 +
-+void OptoRuntime::generate_exception_blob() {
-+  assert(!OptoRuntime::is_callee_saved_register(R13_num), "");
-+  assert(!OptoRuntime::is_callee_saved_register(R10_num), "");
-+  assert(!OptoRuntime::is_callee_saved_register(R12_num), "");
++    __ bind(done);
++  }
 +
-+  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
++  Label dtrace_method_exit, dtrace_method_exit_done;
++  {
++    int32_t offset = 0;
++    __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
++    __ lbu(t0, Address(t0, offset));
++    __ bnez(t0, dtrace_method_exit);
++    __ bind(dtrace_method_exit_done);
++  }
 +
-+  // Allocate space for the code
-+  ResourceMark rm;
-+  // Setup code generation tools
-+  CodeBuffer buffer("exception_blob", 2048, 1024);
-+  MacroAssembler* masm = new MacroAssembler(&buffer);
-+  assert_cond(masm != NULL);
++  __ reset_last_Java_frame(false);
 +
-+  // TODO check various assumptions made here
-+  //
-+  // make sure we do so before running this
++  // Unbox oop result, e.g. JNIHandles::resolve result.
++  if (is_reference_type(ret_type)) {
++    __ resolve_jobject(x10, xthread, t1);
++  }
 +
-+  address start = __ pc();
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
++  }
 +
-+  // push fp and retaddr by hand
-+  // Exception pc is 'return address' for stack walker
-+  __ addi(sp, sp, -2 * wordSize);
-+  __ sd(ra, Address(sp, wordSize));
-+  __ sd(fp, Address(sp));
-+  // there are no callee save registers and we don't expect an
-+  // arg reg save area
-+#ifndef PRODUCT
-+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
-+#endif
-+  // Store exception in Thread object. We cannot pass any arguments to the
-+  // handle_exception call, since we do not want to make any assumption
-+  // about the size of the frame where the exception happened in.
-+  __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
-+  __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
++  // reset handle block
++  __ ld(x12, Address(xthread, JavaThread::active_handles_offset()));
++  __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes()));
 +
-+  // This call does all the hard work.  It checks if an exception handler
-+  // exists in the method.
-+  // If so, it returns the handler address.
-+  // If not, it prepares for stack-unwinding, restoring the callee-save
-+  // registers of the frame being removed.
-+  //
-+  // address OptoRuntime::handle_exception_C(JavaThread* thread)
-+  //
-+  // n.b. 1 gp arg, 0 fp args, integral return type
++  __ leave();
 +
-+  // the stack should always be aligned
-+  address the_pc = __ pc();
-+  __ set_last_Java_frame(sp, noreg, the_pc, t0);
-+  __ mv(c_rarg0, xthread);
-+  int32_t offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset);
-+  __ jalr(x1, t0, offset);
++  // Any exception pending?
++  __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++  __ bnez(t0, exception_pending);
 +
-+  // Set an oopmap for the call site.  This oopmap will only be used if we
-+  // are unwinding the stack.  Hence, all locations will be dead.
-+  // Callee-saved registers will be the same as the frame above (i.e.,
-+  // handle_exception_stub), since they were restored when we got the
-+  // exception.
++  // We're done
++  __ ret();
 +
-+  OopMapSet* oop_maps = new OopMapSet();
-+  assert_cond(oop_maps != NULL);
++  // Unexpected paths are out of line and go here
 +
-+  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
++  // forward the exception
++  __ bind(exception_pending);
 +
-+  __ reset_last_Java_frame(false);
++  // and forward the exception
++  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 +
-+  // Restore callee-saved registers
++  // Slow path locking & unlocking
++  if (method->is_synchronized()) {
 +
-+  // fp is an implicitly saved callee saved register (i.e. the calling
-+  // convention will save restore it in prolog/epilog) Other than that
-+  // there are no callee save registers now that adapter frames are gone.
-+  // and we dont' expect an arg reg save area
-+  __ ld(fp, Address(sp));
-+  __ ld(x13, Address(sp, wordSize));
-+  __ addi(sp, sp , 2 * wordSize);
++    __ block_comment("Slow path lock {");
++    __ bind(slow_path_lock);
 +
-+  // x10: exception handler
++    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
++    // args are (oop obj, BasicLock* lock, JavaThread* thread)
 +
-+  // We have a handler in x10 (could be deopt blob).
-+  __ mv(t0, x10);
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++
++    __ mv(c_rarg0, obj_reg);
++    __ mv(c_rarg1, lock_reg);
++    __ mv(c_rarg2, xthread);
++
++    // Not a leaf but we have last_Java_frame setup as we want
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
++    restore_args(masm, total_c_args, c_arg, out_regs);
 +
-+  // Get the exception oop
-+  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
-+  // Get the exception pc in case we are deoptimized
-+  __ ld(x14, Address(xthread, JavaThread::exception_pc_offset()));
 +#ifdef ASSERT
-+  __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset()));
-+  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
++    { Label L;
++      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++      __ beqz(t0, L);
++      __ stop("no pending exception allowed on exit from monitorenter");
++      __ bind(L);
++    }
 +#endif
-+  // Clear the exception oop so GC no longer processes it as a root.
-+  __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
++    __ j(lock_done);
 +
-+  // x10: exception oop
-+  // t0:  exception handler
-+  // x14: exception pc
-+  // Jump to handler
++    __ block_comment("} Slow path lock");
 +
-+  __ jr(t0);
++    __ block_comment("Slow path unlock {");
++    __ bind(slow_path_unlock);
 +
-+  // Make sure all code is generated
-+  masm->flush();
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
 +
-+  // Set exception blob
-+  _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
-+}
-+#endif // COMPILER2
-diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-new file mode 100644
-index 000000000..c5b3b094c
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-@@ -0,0 +1,3743 @@
-+/*
-+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++    __ mv(c_rarg2, xthread);
++    __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
++    __ mv(c_rarg0, obj_reg);
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/instanceOop.hpp"
-+#include "oops/method.hpp"
-+#include "oops/objArrayKlass.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "prims/methodHandles.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/handles.inline.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/stubCodeGenerator.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/thread.inline.hpp"
-+#include "utilities/align.hpp"
-+#ifdef COMPILER2
-+#include "opto/runtime.hpp"
-+#endif
++    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
++    // NOTE that obj_reg == x9 currently
++    __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++    __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset())));
 +
++    rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
 +
-+// Declaration and definition of StubGenerator (no .hpp file).
-+// For a more detailed description of the stub routine structure
-+// see the comment in stubRoutines.hpp
++#ifdef ASSERT
++    {
++      Label L;
++      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++      __ beqz(t0, L);
++      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
++      __ bind(L);
++    }
++#endif /* ASSERT */
 +
-+#undef __
-+#define __ _masm->
++    __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
 +
-+#ifdef PRODUCT
-+#define BLOCK_COMMENT(str) /* nothing */
-+#else
-+#define BLOCK_COMMENT(str) __ block_comment(str)
-+#endif
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++    __ j(unlock_done);
 +
-+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++    __ block_comment("} Slow path unlock");
 +
-+// Stub Code definitions
++  } // synchronized
 +
-+class StubGenerator: public StubCodeGenerator {
-+ private:
++  // SLOW PATH Reguard the stack if needed
 +
-+#ifdef PRODUCT
-+#define inc_counter_np(counter) ((void)0)
-+#else
-+  void inc_counter_np_(int& counter) {
-+    __ la(t1, ExternalAddress((address)&counter));
-+    __ lwu(t0, Address(t1, 0));
-+    __ addiw(t0, t0, 1);
-+    __ sw(t0, Address(t1, 0));
-+  }
-+#define inc_counter_np(counter) \
-+  BLOCK_COMMENT("inc_counter " #counter); \
-+  inc_counter_np_(counter);
-+#endif
++  __ bind(reguard);
++  save_native_result(masm, ret_type, stack_slots);
++  rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
++  restore_native_result(masm, ret_type, stack_slots);
++  // and continue
++  __ j(reguard_done);
 +
-+  // Call stubs are used to call Java from C
-+  //
-+  // Arguments:
-+  //    c_rarg0:   call wrapper address                   address
-+  //    c_rarg1:   result                                 address
-+  //    c_rarg2:   result type                            BasicType
-+  //    c_rarg3:   method                                 Method*
-+  //    c_rarg4:   (interpreter) entry point              address
-+  //    c_rarg5:   parameters                             intptr_t*
-+  //    c_rarg6:   parameter size (in words)              int
-+  //    c_rarg7:   thread                                 Thread*
-+  //
-+  // There is no return from the stub itself as any Java result
-+  // is written to result
-+  //
-+  // we save x1 (ra) as the return PC at the base of the frame and
-+  // link x8 (fp) below it as the frame pointer installing sp (x2)
-+  // into fp.
-+  //
-+  // we save x10-x17, which accounts for all the c arguments.
-+  //
-+  // TODO: strictly do we need to save them all? they are treated as
-+  // volatile by C so could we omit saving the ones we are going to
-+  // place in global registers (thread? method?) or those we only use
-+  // during setup of the Java call?
-+  //
-+  // we don't need to save x5 which C uses as an indirect result location
-+  // return register.
-+  //
-+  // we don't need to save x6-x7 and x28-x31 which both C and Java treat as
-+  // volatile
-+  //
-+  // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary
-+  // registers and C expects to be callee-save
-+  //
-+  // so the stub frame looks like this when we enter Java code
-+  //
-+  //     [ return_from_Java     ] <--- sp
-+  //     [ argument word n      ]
-+  //      ...
-+  // -34 [ argument word 1      ]
-+  // -33 [ saved f27            ] <--- sp_after_call
-+  // -32 [ saved f26            ]
-+  // -31 [ saved f25            ]
-+  // -30 [ saved f24            ]
-+  // -29 [ saved f23            ]
-+  // -28 [ saved f22            ]
-+  // -27 [ saved f21            ]
-+  // -26 [ saved f20            ]
-+  // -25 [ saved f19            ]
-+  // -24 [ saved f18            ]
-+  // -23 [ saved f9             ]
-+  // -22 [ saved f8             ]
-+  // -21 [ saved x27            ]
-+  // -20 [ saved x26            ]
-+  // -19 [ saved x25            ]
-+  // -18 [ saved x24            ]
-+  // -17 [ saved x23            ]
-+  // -16 [ saved x22            ]
-+  // -15 [ saved x21            ]
-+  // -14 [ saved x20            ]
-+  // -13 [ saved x19            ]
-+  // -12 [ saved x18            ]
-+  // -11 [ saved x9             ]
-+  // -10 [ call wrapper   (x10) ]
-+  //  -9 [ result         (x11) ]
-+  //  -8 [ result type    (x12) ]
-+  //  -7 [ method         (x13) ]
-+  //  -6 [ entry point    (x14) ]
-+  //  -5 [ parameters     (x15) ]
-+  //  -4 [ parameter size (x16) ]
-+  //  -3 [ thread         (x17) ]
-+  //  -2 [ saved fp       (x8)  ] 
-+  //  -1 [ saved ra       (x1)  ] 
-+  //   0 [                      ] <--- fp == saved sp (x2)
++  // SLOW PATH safepoint
++  {
++    __ block_comment("safepoint {");
++    __ bind(safepoint_in_progress);
 +
-+  // Call stub stack layout word offsets from fp
-+  enum call_stub_layout {
-+    sp_after_call_off  = -33,
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    //
++    save_native_result(masm, ret_type, stack_slots);
++    __ mv(c_rarg0, xthread);
++#ifndef PRODUCT
++    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
++    int32_t offset = 0;
++    __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset);
++    __ jalr(x1, t0, offset);
 +
-+    f27_off            = -33,
-+    f26_off            = -32,
-+    f25_off            = -31,
-+    f24_off            = -30,
-+    f23_off            = -29,
-+    f22_off            = -28,
-+    f21_off            = -27,
-+    f20_off            = -26,
-+    f19_off            = -25,
-+    f18_off            = -24,
-+    f9_off             = -23,
-+    f8_off             = -22,
++    // Restore any method result value
++    restore_native_result(masm, ret_type, stack_slots);
 +
-+    x27_off            = -21,
-+    x26_off            = -20,
-+    x25_off            = -19,
-+    x24_off            = -18,
-+    x23_off            = -17,
-+    x22_off            = -16,
-+    x21_off            = -15,
-+    x20_off            = -14,
-+    x19_off            = -13,
-+    x18_off            = -12,
-+    x9_off             = -11,
++    __ j(safepoint_in_progress_done);
++    __ block_comment("} safepoint");
++  }
 +
-+    call_wrapper_off   = -10,
-+    result_off         =  -9,
-+    result_type_off    =  -8,
-+    method_off         =  -7,
-+    entry_point_off    =  -6,
-+    parameters_off     =  -5,
-+    parameter_size_off =  -4,
-+    thread_off         =  -3,
-+    fp_f               =  -2,
-+    retaddr_off        =  -1,
-+  };
++  // SLOW PATH dtrace support
++  {
++    __ block_comment("dtrace entry {");
++    __ bind(dtrace_method_entry);
 +
-+  address generate_call_stub(address& return_address) {
-+    assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 &&
-+           (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
-+           "adjust this code");
++    // We have all of the arguments setup at this point. We must not touch any register
++    // argument registers at this point (what if we save/restore them there are no oop?
 +
-+    StubCodeMark mark(this, "StubRoutines", "call_stub");
-+    address start = __ pc();
++    save_args(masm, total_c_args, c_arg, out_regs);
++    __ mov_metadata(c_rarg1, method());
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++      xthread, c_rarg1);
++    restore_args(masm, total_c_args, c_arg, out_regs);
++    __ j(dtrace_method_entry_done);
++    __ block_comment("} dtrace entry");
++  }
 +
-+    const Address sp_after_call (fp, sp_after_call_off  * wordSize);
++  {
++    __ block_comment("dtrace exit {");
++    __ bind(dtrace_method_exit);
++    save_native_result(masm, ret_type, stack_slots);
++    __ mov_metadata(c_rarg1, method());
++    __ call_VM_leaf(
++         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++         xthread, c_rarg1);
++    restore_native_result(masm, ret_type, stack_slots);
++    __ j(dtrace_method_exit_done);
++    __ block_comment("} dtrace exit");
++  }
 +
-+    const Address call_wrapper  (fp, call_wrapper_off   * wordSize);
-+    const Address result        (fp, result_off         * wordSize);
-+    const Address result_type   (fp, result_type_off    * wordSize);
-+    const Address method        (fp, method_off         * wordSize);
-+    const Address entry_point   (fp, entry_point_off    * wordSize);
-+    const Address parameters    (fp, parameters_off     * wordSize);
-+    const Address parameter_size(fp, parameter_size_off * wordSize);
++  __ flush();
 +
-+    const Address thread        (fp, thread_off         * wordSize);
++  nmethod *nm = nmethod::new_native_nmethod(method,
++                                            compile_id,
++                                            masm->code(),
++                                            vep_offset,
++                                            frame_complete,
++                                            stack_slots / VMRegImpl::slots_per_word,
++                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
++                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
++                                            oop_maps);
++  assert(nm != NULL, "create native nmethod fail!");
++  return nm;
++}
 +
-+    const Address f27_save      (fp, f27_off            * wordSize);
-+    const Address f26_save      (fp, f26_off            * wordSize);
-+    const Address f25_save      (fp, f25_off            * wordSize);
-+    const Address f24_save      (fp, f24_off            * wordSize);
-+    const Address f23_save      (fp, f23_off            * wordSize);
-+    const Address f22_save      (fp, f22_off            * wordSize);
-+    const Address f21_save      (fp, f21_off            * wordSize);
-+    const Address f20_save      (fp, f20_off            * wordSize);
-+    const Address f19_save      (fp, f19_off            * wordSize);
-+    const Address f18_save      (fp, f18_off            * wordSize);
-+    const Address f9_save       (fp, f9_off             * wordSize);
-+    const Address f8_save       (fp, f8_off             * wordSize);
++// this function returns the adjust size (in number of words) to a c2i adapter
++// activation for use during deoptimization
++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
++  assert(callee_locals >= callee_parameters,
++         "test and remove; got more parms than locals");
++  if (callee_locals < callee_parameters) {
++    return 0;                   // No adjustment for negative locals
++  }
++  int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
++  // diff is counted in stack words
++  return align_up(diff, 2);
++}
 +
-+    const Address x27_save      (fp, x27_off            * wordSize);
-+    const Address x26_save      (fp, x26_off            * wordSize);
-+    const Address x25_save      (fp, x25_off            * wordSize);
-+    const Address x24_save      (fp, x24_off            * wordSize);
-+    const Address x23_save      (fp, x23_off            * wordSize);
-+    const Address x22_save      (fp, x22_off            * wordSize);
-+    const Address x21_save      (fp, x21_off            * wordSize);
-+    const Address x20_save      (fp, x20_off            * wordSize);
-+    const Address x19_save      (fp, x19_off            * wordSize);
-+    const Address x18_save      (fp, x18_off            * wordSize);
++//------------------------------generate_deopt_blob----------------------------
++void SharedRuntime::generate_deopt_blob() {
++  // Allocate space for the code
++  ResourceMark rm;
++  // Setup code generation tools
++  int pad = 0;
++  CodeBuffer buffer("deopt_blob", 2048 + pad, 1024);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++  int frame_size_in_words = -1;
++  OopMap* map = NULL;
++  OopMapSet *oop_maps = new OopMapSet();
++  assert_cond(masm != NULL && oop_maps != NULL);
++  RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0);
 +
-+    const Address x9_save       (fp, x9_off             * wordSize);
++  // -------------
++  // This code enters when returning to a de-optimized nmethod.  A return
++  // address has been pushed on the the stack, and return values are in
++  // registers.
++  // If we are doing a normal deopt then we were called from the patched
++  // nmethod from the point we returned to the nmethod. So the return
++  // address on the stack is wrong by NativeCall::instruction_size
++  // We will adjust the value so it looks like we have the original return
++  // address on the stack (like when we eagerly deoptimized).
++  // In the case of an exception pending when deoptimizing, we enter
++  // with a return address on the stack that points after the call we patched
++  // into the exception handler. We have the following register state from,
++  // e.g., the forward exception stub (see stubGenerator_riscv.cpp).
++  //    x10: exception oop
++  //    x9: exception handler
++  //    x13: throwing pc
++  // So in this case we simply jam x13 into the useless return address and
++  // the stack looks just like we want.
++  //
++  // At this point we need to de-opt.  We save the argument return
++  // registers.  We call the first C routine, fetch_unroll_info().  This
++  // routine captures the return values and returns a structure which
++  // describes the current frame size and the sizes of all replacement frames.
++  // The current frame is compiled code and may contain many inlined
++  // functions, each with their own JVM state.  We pop the current frame, then
++  // push all the new frames.  Then we call the C routine unpack_frames() to
++  // populate these frames.  Finally unpack_frames() returns us the new target
++  // address.  Notice that callee-save registers are BLOWN here; they have
++  // already been captured in the vframeArray at the time the return PC was
++  // patched.
++  address start = __ pc();
++  Label cont;
 +
-+    // stub code
++  // Prolog for non exception case!
 +
-+    address riscv_entry = __ pc();
++  // Save everything in sight.
++  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+    // set up frame and move sp to end of save area
-+    __ enter();
-+    __ addi(sp, fp, sp_after_call_off * wordSize);
++  // Normal deoptimization.  Save exec mode for unpack_frames.
++  __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved
++  __ j(cont);
 +
-+    // save register parameters and Java temporary/global registers
-+    // n.b. we save thread even though it gets installed in
-+    // xthread because we want to sanity check tp later
-+    __ sd(c_rarg7, thread);
-+    __ sw(c_rarg6, parameter_size);
-+    __ sd(c_rarg5, parameters);
-+    __ sd(c_rarg4, entry_point);
-+    __ sd(c_rarg3, method);
-+    __ sd(c_rarg2, result_type);
-+    __ sd(c_rarg1, result);
-+    __ sd(c_rarg0, call_wrapper);
++  int reexecute_offset = __ pc() - start;
 +
-+    __ sd(x9, x9_save);
++  // Reexecute case
++  // return address is the pc describes what bci to do re-execute at
 +
-+    __ sd(x18, x18_save);
-+    __ sd(x19, x19_save);
-+    __ sd(x20, x20_save);
-+    __ sd(x21, x21_save);
-+    __ sd(x22, x22_save);
-+    __ sd(x23, x23_save);
-+    __ sd(x24, x24_save);
-+    __ sd(x25, x25_save);
-+    __ sd(x26, x26_save);
-+    __ sd(x27, x27_save);
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+    __ fsd(f8,  f8_save);
-+    __ fsd(f9,  f9_save);
-+    __ fsd(f18, f18_save);
-+    __ fsd(f19, f19_save);
-+    __ fsd(f20, f20_save);
-+    __ fsd(f21, f21_save);
-+    __ fsd(f22, f22_save);
-+    __ fsd(f23, f23_save);
-+    __ fsd(f24, f24_save);
-+    __ fsd(f25, f25_save);
-+    __ fsd(f26, f26_save);
-+    __ fsd(f27, f27_save);
++  __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved
++  __ j(cont);
 +
-+    // install Java thread in global register now we have saved
-+    // whatever value it held
-+    __ mv(xthread, c_rarg7);
++  int exception_offset = __ pc() - start;
 +
-+    // And method
-+    __ mv(xmethod, c_rarg3);
++  // Prolog for exception case
 +
-+    // set up the heapbase register
-+    __ reinit_heapbase();
++  // all registers are dead at this entry point, except for x10, and
++  // x13 which contain the exception oop and exception pc
++  // respectively.  Set them in TLS and fall thru to the
++  // unpack_with_exception_in_tls entry point.
 +
-+#ifdef ASSERT
-+    // make sure we have no pending exceptions
-+    {
-+      Label L;
-+      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+      __ beqz(t0, L);
-+      __ stop("StubRoutines::call_stub: entered with pending exception");
-+      __ BIND(L);
-+    }
-+#endif
-+    // pass parameters if any
-+    __ mv(esp, sp);
-+    __ slli(t0, c_rarg6, LogBytesPerWord);
-+    __ sub(t0, sp, t0); // Move SP out of the way
-+    __ andi(sp, t0, -2 * wordSize);
++  __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
++  __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
 +
-+    BLOCK_COMMENT("pass parameters if any");
-+    Label parameters_done;
-+    // parameter count is still in c_rarg6
-+    // and parameter pointer identifying param 1 is in c_rarg5
-+    __ beqz(c_rarg6, parameters_done);
++  int exception_in_tls_offset = __ pc() - start;
 +
-+    address loop = __ pc();
-+    __ ld(t0, c_rarg5, 0);
-+    __ addi(c_rarg5, c_rarg5, wordSize);
-+    __ addi(c_rarg6, c_rarg6, -1);
-+    __ push_reg(t0);
-+    __ bgtz(c_rarg6, loop);
++  // new implementation because exception oop is now passed in JavaThread
 +
-+    __ BIND(parameters_done);
++  // Prolog for exception case
++  // All registers must be preserved because they might be used by LinearScan
++  // Exceptiop oop and throwing PC are passed in JavaThread
++  // tos: stack at point of call to method that threw the exception (i.e. only
++  // args are on the stack, no return address)
 +
-+    // call Java entry -- passing methdoOop, and current sp
-+    //      xmethod: Method*
-+    //      x30: sender sp
-+    BLOCK_COMMENT("call Java function");
-+    __ mv(x30, sp);
-+    __ jalr(c_rarg4);
++  // The return address pushed by save_live_registers will be patched
++  // later with the throwing pc. The correct value is not available
++  // now because loading it from memory would destroy registers.
 +
-+    // save current address for use by exception handling code
++  // NB: The SP at this point must be the SP of the method that is
++  // being deoptimized.  Deoptimization assumes that the frame created
++  // here by save_live_registers is immediately below the method's SP.
++  // This is a somewhat fragile mechanism.
 +
-+    return_address = __ pc();
++  // Save everything in sight.
++  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+    // store result depending on type (everything that is not
-+    // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
-+    // n.b. this assumes Java returns an integral result in x10
-+    // and a floating result in j_farg0
-+    __ ld(j_rarg2, result);
-+    Label is_long, is_float, is_double, exit;
-+    __ ld(j_rarg1, result_type);
-+    __ mv(t0, (u1)T_OBJECT);
-+    __ beq(j_rarg1, t0, is_long);
-+    __ mv(t0, (u1)T_LONG);
-+    __ beq(j_rarg1, t0, is_long);
-+    __ mv(t0, (u1)T_FLOAT);
-+    __ beq(j_rarg1, t0, is_float);
-+    __ mv(t0, (u1)T_DOUBLE);
-+    __ beq(j_rarg1, t0, is_double);
++  // Now it is safe to overwrite any register
 +
-+    // handle T_INT case
-+    __ sw(x10, Address(j_rarg2));
++  // Deopt during an exception.  Save exec mode for unpack_frames.
++  __ li(xcpool, Deoptimization::Unpack_exception); // callee-saved
 +
-+    __ BIND(exit);
++  // load throwing pc from JavaThread and patch it as the return address
++  // of the current frame. Then clear the field in JavaThread
 +
-+    // pop parameters
-+    __ addi(esp, fp, sp_after_call_off * wordSize);
++  __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
++  __ sd(x13, Address(fp, frame::return_addr_offset * wordSize));
++  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
 +
 +#ifdef ASSERT
-+    // verify that threads correspond
-+    {
-+      Label L, S;
-+      __ ld(t0, thread);
-+      __ bne(xthread, t0, S);
-+      __ get_thread(t0);
-+      __ beq(xthread, t0, L);
-+      __ BIND(S);
-+      __ stop("StubRoutines::call_stub: threads must correspond");
-+      __ BIND(L);
-+    }
-+#endif
-+
-+    // restore callee-save registers
-+    __ fld(f27, f27_save);
-+    __ fld(f26, f26_save);
-+    __ fld(f25, f25_save);
-+    __ fld(f24, f24_save);
-+    __ fld(f23, f23_save);
-+    __ fld(f22, f22_save);
-+    __ fld(f21, f21_save);
-+    __ fld(f20, f20_save);
-+    __ fld(f19, f19_save);
-+    __ fld(f18, f18_save);
-+    __ fld(f9,  f9_save);
-+    __ fld(f8,  f8_save);
-+
-+    __ ld(x27, x27_save);
-+    __ ld(x26, x26_save);
-+    __ ld(x25, x25_save);
-+    __ ld(x24, x24_save);
-+    __ ld(x23, x23_save);
-+    __ ld(x22, x22_save);
-+    __ ld(x21, x21_save);
-+    __ ld(x20, x20_save);
-+    __ ld(x19, x19_save);
-+    __ ld(x18, x18_save);
++  // verify that there is really an exception oop in JavaThread
++  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
++  __ verify_oop(x10);
 +
-+    __ ld(x9, x9_save);
++  // verify that there is no pending exception
++  Label no_pending_exception;
++  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++  __ beqz(t0, no_pending_exception);
++  __ stop("must not have pending exception here");
++  __ bind(no_pending_exception);
++#endif
 +
-+    __ ld(c_rarg0, call_wrapper);
-+    __ ld(c_rarg1, result);
-+    __ ld(c_rarg2, result_type);
-+    __ ld(c_rarg3, method);
-+    __ ld(c_rarg4, entry_point);
-+    __ ld(c_rarg5, parameters);
-+    __ ld(c_rarg6, parameter_size);
-+    __ ld(c_rarg7, thread);
++  __ bind(cont);
 +
-+    // leave frame and return to caller
-+    __ leave();
-+    __ ret();
++  // Call C code.  Need thread and this frame, but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.
++  //
++  // UnrollBlock* fetch_unroll_info(JavaThread* thread)
 +
-+    // handle return types different from T_INT
++  // fetch_unroll_info needs to call last_java_frame().
 +
-+    __ BIND(is_long);
-+    __ sd(x10, Address(j_rarg2, 0));
-+    __ j(exit);
++  Label retaddr;
++  __ set_last_Java_frame(sp, noreg, retaddr, t0);
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld(t0, Address(xthread,
++                              JavaThread::last_Java_fp_offset()));
++    __ beqz(t0, L);
++    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
++    __ bind(L);
++  }
++#endif // ASSERT
++  __ mv(c_rarg0, xthread);
++  __ mv(c_rarg1, xcpool);
++  int32_t offset = 0;
++  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset);
++  __ jalr(x1, t0, offset);
++  __ bind(retaddr);
 +
-+    __ BIND(is_float);
-+    __ fsw(j_farg0, Address(j_rarg2, 0), t0);
-+    __ j(exit);
++  // Need to have an oopmap that tells fetch_unroll_info where to
++  // find any register it might need.
++  oop_maps->add_gc_map(__ pc() - start, map);
 +
-+    __ BIND(is_double);
-+    __ fsd(j_farg0, Address(j_rarg2, 0), t0);
-+    __ j(exit);
++  __ reset_last_Java_frame(false);
 +
-+    return start;
-+  }
++  // Load UnrollBlock* into x15
++  __ mv(x15, x10);
 +
-+  // Return point for a Java call if there's an exception thrown in
-+  // Java code.  The exception is caught and transformed into a
-+  // pending exception stored in JavaThread that can be tested from
-+  // within the VM.
-+  //
-+  // Note: Usually the parameters are removed by the callee. In case
-+  // of an exception crossing an activation frame boundary, that is
-+  // not the case if the callee is compiled code => need to setup the
-+  // sp.
-+  //
-+  // x10: exception oop
++  __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
++  Label noException;
++  __ li(t0, Deoptimization::Unpack_exception);
++  __ bne(xcpool, t0, noException); // Was exception pending?
++  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
++  __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
++  __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
++  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
 +
-+  address generate_catch_exception() {
-+    StubCodeMark mark(this, "StubRoutines", "catch_exception");
-+    address start = __ pc();
++  __ verify_oop(x10);
 +
-+    // same as in generate_call_stub():
-+    const Address thread(fp, thread_off * wordSize);
++  // Overwrite the result registers with the exception results.
++  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
 +
-+#ifdef ASSERT
-+    // verify that threads correspond
-+    {
-+      Label L, S;
-+      __ ld(t0, thread);
-+      __ bne(xthread, t0, S);
-+      __ get_thread(t0);
-+      __ beq(xthread, t0, L);
-+      __ bind(S);
-+      __ stop("StubRoutines::catch_exception: threads must correspond");
-+      __ bind(L);
-+    }
-+#endif
++  __ bind(noException);
 +
-+    // set pending exception
-+    __ verify_oop(x10);
++  // Only register save data is on the stack.
++  // Now restore the result registers.  Everything else is either dead
++  // or captured in the vframeArray.
 +
-+    __ sd(x10, Address(xthread, Thread::pending_exception_offset()));
-+    __ mv(t0, (address)__FILE__);
-+    __ sd(t0, Address(xthread, Thread::exception_file_offset()));
-+    __ mv(t0, (int)__LINE__);
-+    __ sw(t0, Address(xthread, Thread::exception_line_offset()));
++  // Restore fp result register
++  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
++  // Restore integer result register
++  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
 +
-+    // complete return to VM
-+    assert(StubRoutines::_call_stub_return_address != NULL,
-+           "_call_stub_return_address must have been generated before");
-+    __ j(StubRoutines::_call_stub_return_address);
++  // Pop all of the register save area off the stack
++  __ add(sp, sp, frame_size_in_words * wordSize);
 +
-+    return start;
-+  }
++  // All of the register save area has been popped of the stack. Only the
++  // return address remains.
 +
-+  // Continuation point for runtime calls returning with a pending
-+  // exception.  The pending exception check happened in the runtime
-+  // or native call stub.  The pending exception in Thread is
-+  // converted into a Java-level exception.
++  // Pop all the frames we must move/replace.
 +  //
-+  // Contract with Java-level exception handlers:
-+  // x10: exception
-+  // x13: throwing pc
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
 +  //
-+  // NOTE: At entry of this stub, exception-pc must be in RA !!
-+
-+  // NOTE: this is always used as a jump target within generated code
-+  // so it just needs to be generated code with no x86 prolog
-+
-+  address generate_forward_exception() {
-+    StubCodeMark mark(this, "StubRoutines", "forward exception");
-+    address start = __ pc();
++  // Note: by leaving the return address of self-frame on the stack
++  // and using the size of frame 2 to adjust the stack
++  // when we are done the return to frame 3 will still be on the stack.
 +
-+    // Upon entry, RA points to the return address returning into
-+    // Java (interpreted or compiled) code; i.e., the return address
-+    // becomes the throwing pc.
-+    //
-+    // Arguments pushed before the runtime call are still on the stack
-+    // but the exception handler will reset the stack pointer ->
-+    // ignore them.  A potential result in registers can be ignored as
-+    // well.
++  // Pop deoptimized frame
++  __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
++  __ sub(x12, x12, 2 * wordSize);
++  __ add(sp, sp, x12);
++  __ ld(fp, Address(sp, 0));
++  __ ld(ra, Address(sp, wordSize));
++  __ addi(sp, sp, 2 * wordSize);
++  // RA should now be the return address to the caller (3)
 +
 +#ifdef ASSERT
-+    // make sure this code is only executed if there is a pending exception
-+    {
-+      Label L;
-+      __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+      __ bnez(t0, L);
-+      __ stop("StubRoutines::forward exception: no pending exception (1)");
-+      __ bind(L);
-+    }
++  // Compilers generate code that bang the stack by as much as the
++  // interpreter would need. So this stack banging should never
++  // trigger a fault. Verify that it does not on non product builds.
++  __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
++  __ bang_stack_size(x9, x12);
 +#endif
++  // Load address of array of frame pcs into x12
++  __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
 +
-+    // compute exception handler into x9
-+
-+    // call the VM to find the handler address associated with the
-+    // caller address. pass thread in x10 and caller pc (ret address)
-+    // in x11. n.b. the caller pc is in ra, unlike x86 where it is on
-+    // the stack.
-+    __ mv(c_rarg1, ra);
-+    // ra will be trashed by the VM call so we move it to x9
-+    // (callee-saved) because we also need to pass it to the handler
-+    // returned by this call.
-+    __ mv(x9, ra);
-+    BLOCK_COMMENT("call exception_handler_for_return_address");
-+    __ call_VM_leaf(CAST_FROM_FN_PTR(address,
-+                         SharedRuntime::exception_handler_for_return_address),
-+                    xthread, c_rarg1);
-+    // we should not really care that ra is no longer the callee
-+    // address. we saved the value the handler needs in x9 so we can
-+    // just copy it to x13. however, the C2 handler will push its own
-+    // frame and then calls into the VM and the VM code asserts that
-+    // the PC for the frame above the handler belongs to a compiled
-+    // Java method. So, we restore ra here to satisfy that assert.
-+    __ mv(ra, x9);
-+    // setup x10 & x13 & clear pending exception
-+    __ mv(x13, x9);
-+    __ mv(x9, x10);
-+    __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
-+    __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
++  // Load address of array of frame sizes into x14
++  __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
 +
-+#ifdef ASSERT
-+    // make sure exception is set
-+    {
-+      Label L;
-+      __ bnez(x10, L);
-+      __ stop("StubRoutines::forward exception: no pending exception (2)");
-+      __ bind(L);
-+    }
-+#endif
++  // Load counter into x13
++  __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
 +
-+    // continue at exception handler
-+    // x10: exception
-+    // x13: throwing pc
-+    // x9: exception handler
-+    __ verify_oop(x10);
-+    __ jr(x9);
++  // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
 +
-+    return start;
-+  }
++  const Register sender_sp = x16;
 +
-+  // Non-destructive plausibility checks for oops
-+  //
-+  // Arguments:
-+  //    x10: oop to verify
-+  //    t0: error message
-+  //
-+  // Stack after saving c_rarg3:
-+  //    [tos + 0]: saved c_rarg3
-+  //    [tos + 1]: saved c_rarg2
-+  //    [tos + 2]: saved ra
-+  //    [tos + 3]: saved t1
-+  //    [tos + 4]: saved x10
-+  //    [tos + 5]: saved t0
-+  address generate_verify_oop() {
++  __ mv(sender_sp, sp);
++  __ lwu(x9, Address(x15,
++                     Deoptimization::UnrollBlock::
++                     caller_adjustment_offset_in_bytes()));
++  __ sub(sp, sp, x9);
 +
-+    StubCodeMark mark(this, "StubRoutines", "verify_oop");
-+    address start = __ pc();
++  // Push interpreter frames in a loop
++  __ li(t0, 0xDEADDEAD);               // Make a recognizable pattern
++  __ mv(t1, t0);
++  Label loop;
++  __ bind(loop);
++  __ ld(x9, Address(x14, 0));          // Load frame size
++  __ addi(x14, x14, wordSize);
++  __ sub(x9, x9, 2 * wordSize);        // We'll push pc and fp by hand
++  __ ld(ra, Address(x12, 0));          // Load pc
++  __ addi(x12, x12, wordSize);
++  __ enter();                          // Save old & set new fp
++  __ sub(sp, sp, x9);                  // Prolog
++  // This value is corrected by layout_activation_impl
++  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
++  __ mv(sender_sp, sp);                // Pass sender_sp to next frame
++  __ addi(x13, x13, -1);               // Decrement counter
++  __ bnez(x13, loop);
 +
-+    Label exit, error;
++    // Re-push self-frame
++  __ ld(ra, Address(x12));
++  __ enter();
 +
-+    __ push_reg(RegSet::of(c_rarg2, c_rarg3), sp);   // save c_rarg2 and c_rarg3
++  // Allocate a full sized register save area.  We subtract 2 because
++  // enter() just pushed 2 words
++  __ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
 +
-+    __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
-+    __ ld(c_rarg3, Address(c_rarg2));
-+    __ add(c_rarg3, c_rarg3, 1);
-+    __ sd(c_rarg3, Address(c_rarg2));
++  // Restore frame locals after moving the frame
++  __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
++  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
 +
-+    // object is in x10
-+    // make sure object is 'reasonable'
-+    __ beqz(x10, exit); // if obj is NULL it is OK
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  //
++  // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
 +
-+    // Check if the oop is in the right area of memory
-+    __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask());
-+    __ andr(c_rarg2, x10, c_rarg3);
-+    __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits());
++  // Use fp because the frames look interpreted now
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  address the_pc = __ pc();
++  __ set_last_Java_frame(sp, fp, the_pc, t0);
 +
-+    // Compare c_rarg2 and c_rarg3
-+    __ bne(c_rarg2, c_rarg3, error);
++  __ mv(c_rarg0, xthread);
++  __ mv(c_rarg1, xcpool); // second arg: exec_mode
++  offset = 0;
++  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
++  __ jalr(x1, t0, offset);
 +
-+    // make sure klass is 'reasonable', which is not zero.
-+    __ load_klass(x10, x10);  // get klass
-+    __ beqz(x10, error);      // if klass is NULL it is broken
++  // Set an oopmap for the call site
++  // Use the same PC we used for the last java frame
++  oop_maps->add_gc_map(the_pc - start,
++                       new OopMap(frame_size_in_words, 0));
 +
-+    // return if everything seems ok
-+    __ bind(exit);
++  // Clear fp AND pc
++  __ reset_last_Java_frame(true);
 +
-+    __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp);   // pop c_rarg2 and c_rarg3
-+    __ ret();
++  // Collect return values
++  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
++  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
 +
-+    // handle errors
-+    __ bind(error);
-+    __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp);   // pop c_rarg2 and c_rarg3
++  // Pop self-frame.
++  __ leave();                           // Epilog
 +
-+    __ push_reg(RegSet::range(x0, x31), sp);
-+    // prepare parameters for debug64, c_rarg0: address of error message,
-+    // c_rarg1: return address, c_rarg2: address of regs on stack
-+    __ mv(c_rarg0, t0);             // pass address of error message
-+    __ mv(c_rarg1, ra);             // pass return address
-+    __ mv(c_rarg2, sp);             // pass address of regs on stack
-+#ifndef PRODUCT
-+    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
-+#endif
-+    BLOCK_COMMENT("call MacroAssembler::debug");
-+    int32_t offset = 0;
-+    __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset);
-+    __ jalr(x1, t0, offset);
++  // Jump to interpreter
++  __ ret();
 +
-+    return start;
-+  }
++  // Make sure all code is generated
++  masm->flush();
 +
-+  // The inner part of zero_words().
-+  //
-+  // Inputs:
-+  // x28: the HeapWord-aligned base address of an array to zero.
-+  // x29: the count in HeapWords, x29 > 0.
-+  //
-+  // Returns x28 and x29, adjusted for the caller to clear.
-+  // x28: the base address of the tail of words left to clear.
-+  // x29: the number of words in the tail.
-+  //      x29 < MacroAssembler::zero_words_block_size.
++  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
++  assert(_deopt_blob != NULL, "create deoptimization blob fail!");
++  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
++}
 +
-+  address generate_zero_blocks() {
-+    Label done;
++// Number of stack slots between incoming argument block and the start of
++// a new frame. The PROLOG must add this many slots to the stack. The
++// EPILOG must remove this many slots.
++// RISCV needs two words for RA (return address) and FP (frame pointer).
++uint SharedRuntime::in_preserve_stack_slots() {
++  return 2 * VMRegImpl::slots_per_word;
++}
 +
-+    const Register base = x28, cnt = x29;
++uint SharedRuntime::out_preserve_stack_slots() {
++  return 0;
++}
 +
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", "zero_blocks");
-+    address start = __ pc();
++#ifdef COMPILER2
++//------------------------------generate_uncommon_trap_blob--------------------
++void SharedRuntime::generate_uncommon_trap_blob() {
++  // Allocate space for the code
++  ResourceMark rm;
++  // Setup code generation tools
++  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++  assert_cond(masm != NULL);
 +
-+    {
-+      // Clear the remaining blocks.
-+      Label loop;
-+      __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
-+      __ bltz(cnt, done);
-+      __ bind(loop);
-+      for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) {
-+        __ sd(zr, Address(base, 0));
-+        __ add(base, base, 8);
-+      }
-+      __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
-+      __ bgez(cnt, loop);
-+      __ bind(done);
-+      __ add(cnt, cnt, MacroAssembler::zero_words_block_size);
-+    }
++  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
 +
-+    __ ret();
++  address start = __ pc();
 +
-+    return start;
-+  }
++  // Push self-frame.  We get here with a return address in RA
++  // and sp should be 16 byte aligned
++  // push fp and retaddr by hand
++  __ addi(sp, sp, -2 * wordSize);
++  __ sd(ra, Address(sp, wordSize));
++  __ sd(fp, Address(sp, 0));
++  // we don't expect an arg reg save area
++#ifndef PRODUCT
++  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
++  // compiler left unloaded_class_index in j_rarg0 move to where the
++  // runtime expects it.
++  __ addiw(c_rarg1, j_rarg0, 0);
 +
-+  typedef void (MacroAssembler::*copy_insn)(Register R1, Register R2, const int32_t offset);
++  // we need to set the past SP to the stack pointer of the stub frame
++  // and the pc to the address where this runtime call will return
++  // although actually any pc in this code blob will do).
++  Label retaddr;
++  __ set_last_Java_frame(sp, noreg, retaddr, t0);
 +
-+  void copy_by_step(RegSet tmp_regs, Register src, Register dst,
-+                    unsigned unroll_factor, int unit) {
-+    unsigned char regs[32];
-+    int offset = unit < 0 ? unit : 0;
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // capture callee-saved registers as well as return values.
++  //
++  // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode)
++  //
++  // n.b. 3 gp args, 0 fp args, integral return type
 +
-+    // Scan bitset to get tmp regs
-+    unsigned int regsSize = 0;
-+    unsigned bitset = tmp_regs.bits();
++  __ mv(c_rarg0, xthread);
++  __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
++  int32_t offset = 0;
++  __ la_patchable(t0,
++        RuntimeAddress(CAST_FROM_FN_PTR(address,
++                                        Deoptimization::uncommon_trap)), offset);
++  __ jalr(x1, t0, offset);
++  __ bind(retaddr);
 +
-+    assert(((bitset & (1 << (src->encoding()))) == 0), "src should not in tmp regs");
-+    assert(((bitset & (1 << (dst->encoding()))) == 0), "dst should not in tmp regs");
++  // Set an oopmap for the call site
++  OopMapSet* oop_maps = new OopMapSet();
++  OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
++  assert_cond(oop_maps != NULL && map != NULL);
 +
-+    for (int reg = 31; reg >= 0; reg--) {
-+      if ((1U << 31) & bitset) {
-+        regs[regsSize++] = reg;
-+      }
-+      bitset <<= 1;
-+    }
++  // location of fp is known implicitly by the frame sender code
 +
-+    copy_insn ld_arr = NULL, st_arr = NULL;
-+    switch (abs(unit)) {
-+      case 1 :
-+        ld_arr = (copy_insn)&MacroAssembler::lbu;
-+        st_arr = (copy_insn)&MacroAssembler::sb;
-+        break;
-+      case BytesPerShort :
-+        ld_arr = (copy_insn)&MacroAssembler::lhu;
-+        st_arr = (copy_insn)&MacroAssembler::sh;
-+        break;
-+      case BytesPerInt :
-+        ld_arr = (copy_insn)&MacroAssembler::lwu;
-+        st_arr = (copy_insn)&MacroAssembler::sw;
-+        break;
-+      case BytesPerLong :
-+        ld_arr = (copy_insn)&MacroAssembler::ld;
-+        st_arr = (copy_insn)&MacroAssembler::sd;
-+        break;
-+      default :
-+        ShouldNotReachHere();
-+    }
++  oop_maps->add_gc_map(__ pc() - start, map);
 +
-+    for (unsigned i = 0; i < unroll_factor; i++) {
-+      (_masm->*ld_arr)(as_Register(regs[0]), src, i * unit + offset);
-+      (_masm->*st_arr)(as_Register(regs[0]), dst, i * unit + offset);
-+    }
++  __ reset_last_Java_frame(false);
 +
-+    __ addi(src, src, unroll_factor * unit);
-+    __ addi(dst, dst, unroll_factor * unit);
-+  }
++  // move UnrollBlock* into x14
++  __ mv(x14, x10);
 +
-+  void copy_tail(Register src, Register dst, Register count_in_bytes, Register tmp,
-+                 int ele_size, unsigned align_unit) {
-+    bool is_backwards = ele_size < 0;
-+    size_t granularity = uabs(ele_size);
-+    for (unsigned unit = (align_unit >> 1); unit >= granularity; unit >>= 1) {
-+      int offset = is_backwards ? (int)(-unit) : unit;
-+      Label exit;
-+      __ andi(tmp, count_in_bytes, unit);
-+      __ beqz(tmp, exit);
-+      copy_by_step(RegSet::of(tmp), src, dst, /* unroll_factor */ 1, offset);
-+      __ bind(exit);
-+    }
++#ifdef ASSERT
++  { Label L;
++    __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
++    __ mvw(t1, Deoptimization::Unpack_uncommon_trap);
++    __ beq(t0, t1, L);
++    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
++    __ bind(L);
 +  }
++#endif
 +
-+  void copy_loop8(Register src, Register dst, Register count_in_bytes, Register tmp,
-+                  int step, Label *Lcopy_small, Register loopsize = noreg) {
-+    size_t granularity = uabs(step);
-+    RegSet tmp_regs = RegSet::range(x13, x16);
-+    assert_different_registers(src, dst, count_in_bytes, tmp);
-+
-+    Label loop, copy2, copy1, finish;
-+    if (loopsize == noreg) {
-+      loopsize = t1;
-+      __ mv(loopsize, 8 * granularity);
-+    }
-+
-+    // Cyclic copy with 8*step.
-+    __ bind(loop);
-+    {
-+      copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 8, step);
-+      __ sub(count_in_bytes, count_in_bytes, 8 * granularity);
-+      __ bge(count_in_bytes, loopsize, loop);
-+    }
-+
-+    if (Lcopy_small != NULL) {
-+      __ bind(*Lcopy_small);
-+    }
++  // Pop all the frames we must move/replace.
++  //
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
 +
-+    // copy memory smaller than step * 8 bytes
-+    __ andi(tmp, count_in_bytes, granularity << 2);
-+    __ beqz(tmp, copy2);
-+    copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 4, step);
++  __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog!
 +
-+    __ bind(copy2);
-+    __ andi(tmp, count_in_bytes, granularity << 1);
-+    __ beqz(tmp, copy1);
-+    copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 2, step);
++  // Pop deoptimized frame (int)
++  __ lwu(x12, Address(x14,
++                      Deoptimization::UnrollBlock::
++                      size_of_deoptimized_frame_offset_in_bytes()));
++  __ sub(x12, x12, 2 * wordSize);
++  __ add(sp, sp, x12);
++  __ ld(fp, sp, 0);
++  __ ld(ra, sp, wordSize);
++  __ addi(sp, sp, 2 * wordSize);
++  // RA should now be the return address to the caller (3) frame
 +
-+    __ bind(copy1);
-+    __ andi(tmp, count_in_bytes, granularity);
-+    __ beqz(tmp, finish);
-+    copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 1, step);
++#ifdef ASSERT
++  // Compilers generate code that bang the stack by as much as the
++  // interpreter would need. So this stack banging should never
++  // trigger a fault. Verify that it does not on non product builds.
++  __ lwu(x11, Address(x14,
++                      Deoptimization::UnrollBlock::
++                      total_frame_sizes_offset_in_bytes()));
++  __ bang_stack_size(x11, x12);
++#endif
 +
-+    __ bind(finish);
-+  }
++  // Load address of array of frame pcs into x12 (address*)
++  __ ld(x12, Address(x14,
++                     Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
 +
-+  // Cyclic copy with one step.
-+  void copy_loop1(Register src, Register dst, Register count_in_bytes, int step, Register loopsize = noreg) {
-+    size_t granularity = uabs(step);
-+    Label loop1;
-+    if (loopsize == noreg) {
-+      loopsize = t0;
-+      __ mv(loopsize, granularity);
-+    }
++  // Load address of array of frame sizes into x15 (intptr_t*)
++  __ ld(x15, Address(x14,
++                     Deoptimization::UnrollBlock::
++                     frame_sizes_offset_in_bytes()));
 +
-+    __ bind(loop1);
-+    {
-+      copy_by_step(RegSet::of(x13), src, dst, /* unroll_factor */ 1, step);
-+      __ sub(count_in_bytes, count_in_bytes, granularity);
-+      __ bge(count_in_bytes, loopsize, loop1);
-+    }
-+  }
-+
-+  void align_unit(Register src, Register dst, Register count_in_bytes,
-+                  unsigned unit, bool is_backwards) {
-+    Label skip;
-+    __ andi(t0, dst, unit);
-+    __ beqz(t0, skip);
-+    copy_by_step(RegSet::of(t0), src, dst, 1, is_backwards ? -unit : unit);
-+    __ sub(count_in_bytes, count_in_bytes, unit);
-+    __ bind(skip);
-+  }
-+
-+  void copy_memory(bool is_align, Register s, Register d, Register count_in_elements,
-+                   Register tmp, int ele_step) {
-+
-+    bool is_backwards = ele_step < 0;
-+    unsigned int granularity = uabs(ele_step);
-+    Label Lcopy_small, Ldone, Lcopy_ele, Laligned;
-+    const Register count_in_bytes = x31, src = x28, dst = x29;
-+    assert_different_registers(src, dst, count_in_elements, count_in_bytes, tmp, t1);
-+    __ slli(count_in_bytes, count_in_elements, exact_log2(granularity));
-+    __ add(src, s, is_backwards ? count_in_bytes : zr);
-+    __ add(dst, d, is_backwards ? count_in_bytes : zr);
-+
-+    // if count_in_elements < 8, copy_small
-+    __ mv(t0, 8);
-+    if (is_align && granularity < BytesPerLong) {
-+      __ blt(count_in_bytes, t0, Lcopy_small);
-+    } else {
-+      __ blt(count_in_elements, t0, Lcopy_small);
-+    }
-+
-+    if (granularity < BytesPerLong) {
-+      Label Lcopy_aligned[3];
-+      Label Lalign8;
-+      if (!is_align) {
-+        Label Lalign_and_copy;
-+        __ mv(t0, EagerArrayCopyThreshold);
-+        __ blt(count_in_bytes, t0, Lalign_and_copy);
-+        // Align dst to 8.
-+        for (unsigned unit = granularity; unit <= 4; unit <<= 1) {
-+          align_unit(src, dst, count_in_bytes, unit, is_backwards);
-+        }
++  // Counter
++  __ lwu(x13, Address(x14,
++                      Deoptimization::UnrollBlock::
++                      number_of_frames_offset_in_bytes())); // (int)
 +
-+        Register shr = x30, shl = x7, tmp1 = x13;
++  // Now adjust the caller's stack to make up for the extra locals but
++  // record the original sp so that we can save it in the skeletal
++  // interpreter frame and the stack walking of interpreter_sender
++  // will get the unextended sp value and not the "real" sp value.
 +
-+        __ andi(shr, src, 0x7);
-+        __ beqz(shr, Lalign8);
-+        {
-+          // calculaute the shift for store doubleword
-+          __ slli(shr, shr, 3);
-+          __ sub(shl, shr, 64);
-+          __ sub(shl, zr, shl);
-+
-+          // alsrc: previous position of src octal alignment
-+          Register alsrc = t1;
-+          __ andi(alsrc, src, -8);
-+
-+          // move src to tail
-+          __ andi(t0, count_in_bytes, -8);
-+          if (is_backwards) {
-+            __ sub(src, src, t0);
-+          } else {
-+            __ add(src, src, t0);
-+          }
++  const Register sender_sp = t1; // temporary register
 +
-+          // prepare for copy_dstaligned_loop
-+          __ ld(tmp1, alsrc, 0);
-+          dst_aligned_copy_32bytes_loop(alsrc, dst, shr, shl, count_in_bytes, is_backwards);
-+          __ mv(x17, 8);
-+          __ blt(count_in_bytes, x17, Lcopy_small);
-+          dst_aligned_copy_8bytes_loop(alsrc, dst, shr, shl, count_in_bytes, x17, is_backwards);
-+          __ j(Lcopy_small);
-+        }
-+        __ j(Ldone);
-+        __ bind(Lalign_and_copy);
-+
-+        // Check src and dst could be 8/4/2 algined at the same time. If could, align the
-+        // memory and copy by 8/4/2.
-+        __ xorr(t1, src, dst);
-+
-+        for (unsigned alignment = granularity << 1; alignment <= 8; alignment <<= 1) {
-+          Label skip;
-+          unsigned int unit = alignment >> 1;
-+          // Check src and dst could be aligned to checkbyte at the same time
-+          // if copy from src to dst. If couldn't, jump to label not_aligned.
-+          __ andi(t0, t1, alignment - 1);
-+          __ bnez(t0, Lcopy_aligned[exact_log2(unit)]);
-+          // Align src and dst to unit.
-+          align_unit(src, dst, count_in_bytes, unit, is_backwards);
-+        }
-+      }
-+      __ bind(Lalign8);
-+      for (unsigned step_size = 8; step_size > granularity; step_size >>= 1) {
-+        // Copy memory by steps, which has been aligned to step_size.
-+        Label loop8, Ltail;
-+        int step = is_backwards ? (-step_size) : step_size;
-+        if (!(step_size == 8 && is_align)) { // which has load 8 to t0 before
-+          // Check whether the memory size is smaller than step_size.
-+          __ mv(t0, step_size);
-+          __ blt(count_in_bytes, t0, Ltail);
-+        }
-+        const Register eight_step = t1;
-+        __ mv(eight_step, step_size * 8);
-+        __ bge(count_in_bytes, eight_step, loop8);
-+        // If memory is less than 8*step_size bytes, loop by step.
-+        copy_loop1(src, dst, count_in_bytes, step, t0);
-+        copy_tail(src, dst, count_in_bytes, tmp, ele_step, step_size);
-+        __ j(Ldone);
-+
-+        __ bind(loop8);
-+        // If memory is greater than or equal to 8*step_size bytes, loop by step*8.
-+        copy_loop8(src, dst, count_in_bytes, tmp, step, NULL, eight_step);
-+        __ bind(Ltail);
-+        copy_tail(src, dst, count_in_bytes, tmp, ele_step, step_size);
-+        __ j(Ldone);
-+
-+        __ bind(Lcopy_aligned[exact_log2(step_size >> 1)]);
-+      }
-+    }
-+    // If the ele_step is greater than 8, or the memory src and dst cannot
-+    // be aligned with a number greater than the value of step.
-+    // Cyclic copy with 8*ele_step.
-+    copy_loop8(src, dst, count_in_bytes, tmp, ele_step, &Lcopy_small, noreg);
++  __ lwu(x11, Address(x14,
++                      Deoptimization::UnrollBlock::
++                      caller_adjustment_offset_in_bytes())); // (int)
++  __ mv(sender_sp, sp);
++  __ sub(sp, sp, x11);
 +
-+    __ bind(Ldone);
-+  }
++  // Push interpreter frames in a loop
++  Label loop;
++  __ bind(loop);
++  __ ld(x11, Address(x15, 0));       // Load frame size
++  __ sub(x11, x11, 2 * wordSize);    // We'll push pc and fp by hand
++  __ ld(ra, Address(x12, 0));        // Save return address
++  __ enter();                        // and old fp & set new fp
++  __ sub(sp, sp, x11);               // Prolog
++  __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
++  // This value is corrected by layout_activation_impl
++  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ mv(sender_sp, sp);              // Pass sender_sp to next frame
++  __ add(x15, x15, wordSize);        // Bump array pointer (sizes)
++  __ add(x12, x12, wordSize);        // Bump array pointer (pcs)
++  __ subw(x13, x13, 1);              // Decrement counter
++  __ bgtz(x13, loop);
++  __ ld(ra, Address(x12, 0));        // save final return address
++  // Re-push self-frame
++  __ enter();                        // & old fp & set new fp
 +
-+  void dst_aligned_copy_32bytes_loop(Register alsrc, Register dst,
-+                                     Register shr,   Register shl,
-+                                     Register count_in_bytes, bool is_backwards) {
-+    const Register tmp1 = x13, tmp2 = x14, tmp3 = x15, tmp4 = x16, thirty_two = x17;
-+    const Register sll_reg1 = is_backwards ? tmp1 : tmp2,
-+                   srl_reg1 = is_backwards ? tmp2 : tmp1,
-+                   sll_reg2 = is_backwards ? tmp2 : tmp3,
-+                   srl_reg2 = is_backwards ? tmp3 : tmp2,
-+                   sll_reg3 = is_backwards ? tmp3 : tmp4,
-+                   srl_reg3 = is_backwards ? tmp4 : tmp3,
-+                   sll_reg4 = is_backwards ? tmp4 : tmp1,
-+                   srl_reg4 = is_backwards ? tmp1 : tmp4;
-+    assert_different_registers(t0, thirty_two, alsrc, shr, shl);
-+    int unit = is_backwards ? -wordSize : wordSize;
-+    int offset = is_backwards ? -wordSize : 0;
-+    Label loop, done;
-+
-+    __ mv(thirty_two, 32);
-+    __ blt(count_in_bytes, thirty_two, done);
++  // Use fp because the frames look interpreted now
++  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  address the_pc = __ pc();
++  __ set_last_Java_frame(sp, fp, the_pc, t0);
 +
-+    __ bind(loop);
-+    __ ld(tmp2, alsrc, unit);
-+    __ sll(t0, sll_reg1, shl);
-+    __ srl(tmp1, srl_reg1, shr);
-+    __ orr(tmp1, tmp1, t0);
-+    __ sd(tmp1, dst, offset);
-+
-+    __ ld(tmp3, alsrc, unit * 2);
-+    __ sll(t0, sll_reg2, shl);
-+    __ srl(tmp2, srl_reg2, shr);
-+    __ orr(tmp2, tmp2, t0);
-+    __ sd(tmp2, dst, unit + offset);
-+
-+    __ ld(tmp4, alsrc, unit * 3);
-+    __ sll(t0, sll_reg3, shl);
-+    __ srl(tmp3, srl_reg3, shr);
-+    __ orr(tmp3, tmp3, t0);
-+    __ sd(tmp3, dst, unit * 2 + offset);
-+
-+    __ ld(tmp1, alsrc, unit * 4);
-+    __ sll(t0, sll_reg4, shl);
-+    __ srl(tmp4, srl_reg4, shr);
-+    __ orr(tmp4, tmp4, t0);
-+    __ sd(tmp4, dst, unit * 3 + offset);
-+
-+    __ add(alsrc, alsrc, unit * 4);
-+    __ add(dst, dst, unit * 4);
-+    __ sub(count_in_bytes, count_in_bytes, 32);
-+    __ bge(count_in_bytes, thirty_two, loop);
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  //
++  // BasicType unpack_frames(JavaThread* thread, int exec_mode)
++  //
 +
-+    __ bind(done);
-+  }
++  // n.b. 2 gp args, 0 fp args, integral return type
 +
-+  void dst_aligned_copy_8bytes_loop(Register alsrc, Register dst,
-+                                    Register shr,   Register shl,
-+                                    Register count_in_bytes, Register eight,
-+                                    bool is_backwards) {
-+    const Register tmp1 = x13, tmp2 = x14, tmp3 = x15, tmp4 = x16;
-+    const Register sll_reg = is_backwards ? tmp1 : tmp2,
-+                   srl_reg = is_backwards ? tmp2 : tmp1;
-+    assert_different_registers(t0, eight, alsrc, shr, shl);
-+    Label loop;
-+    int unit = is_backwards ? -wordSize : wordSize;
++  // sp should already be aligned
++  __ mv(c_rarg0, xthread);
++  __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap);
++  offset = 0;
++  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
++  __ jalr(x1, t0, offset);
 +
-+    __ bind(loop);
-+    __ ld(tmp2, alsrc, unit);
-+    __ sll(t0, sll_reg, shl);
-+    __ srl(tmp1, srl_reg, shr);
-+    __ orr(t0, tmp1, t0);
-+    __ sd(t0, dst, is_backwards ? unit : 0);
-+    __ mv(tmp1, tmp2);
-+    __ add(alsrc, alsrc, unit);
-+    __ add(dst, dst, unit);
-+    __ sub(count_in_bytes, count_in_bytes, 8);
-+    __ bge(count_in_bytes, eight, loop);
-+  }
++  // Set an oopmap for the call site
++  // Use the same PC we used for the last java frame
++  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
 +
-+  // Scan over array at a for count oops, verifying each one.
-+  // Preserves a and count, clobbers t0 and t1.
-+  void verify_oop_array(int size, Register a, Register count, Register temp) {
-+    Label loop, end;
-+    __ mv(t1, zr);
-+    __ slli(t0, count, exact_log2(size));
-+    __ bind(loop);
-+    __ bgeu(t1, t0, end);
++  // Clear fp AND pc
++  __ reset_last_Java_frame(true);
 +
-+    __ add(temp, a, t1);
-+    if (size == wordSize) {
-+      __ ld(temp, Address(temp, 0));
-+      __ verify_oop(temp);
-+    } else {
-+      __ lwu(temp, Address(temp, 0));
-+      __ decode_heap_oop(temp); // calls verify_oop
-+    }
-+    __ add(t1, t1, size);
-+    __ j(loop);
-+    __ bind(end);
-+  }
++  // Pop self-frame.
++  __ leave();                 // Epilog
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   is_oop  - true => oop array, so generate store check code
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
-+  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomically.
-+  //
-+  // Side Effects:
-+  //   disjoint_int_copy_entry is set to the no-overlap entry point
-+  //   used by generate_conjoint_int_oop_copy().
-+  //
-+  address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry,
-+                                 const char* name, bool dest_uninitialized = false) {
-+    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
-+    RegSet saved_reg = RegSet::of(s, d, count);
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", name);
-+    address start = __ pc();
-+    __ enter();
++  // Jump to interpreter
++  __ ret();
 +
-+    if (entry != NULL) {
-+      *entry = __ pc();
-+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-+      BLOCK_COMMENT("Entry:");
-+    }
++  // Make sure all code is generated
++  masm->flush();
 +
-+    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
-+    if (dest_uninitialized) {
-+      decorators |= IS_DEST_UNINITIALIZED;
-+    }
-+    if (aligned) {
-+      decorators |= ARRAYCOPY_ALIGNED;
-+    }
++  _uncommon_trap_blob =  UncommonTrapBlob::create(&buffer, oop_maps,
++                                                  SimpleRuntimeFrame::framesize >> 1);
++}
++#endif // COMPILER2
 +
-+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+    bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg);
++//------------------------------generate_handler_blob------
++//
++// Generate a special Compile2Runtime blob that saves all registers,
++// and setup oopmap.
++//
++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
++  ResourceMark rm;
++  OopMapSet *oop_maps = new OopMapSet();
++  assert_cond(oop_maps != NULL);
++  OopMap* map = NULL;
 +
-+    if (is_oop) {
-+      // save regs before copy_memory
-+      __ push_reg(RegSet::of(d, count), sp);
-+    }
-+    copy_memory(aligned, s, d, count, t0, checked_cast<int>(size));
++  // Allocate space for the code.  Setup code generation tools.
++  CodeBuffer buffer("handler_blob", 2048, 1024);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++  assert_cond(masm != NULL);
 +
-+    if (is_oop) {
-+      __ pop_reg(RegSet::of(d, count), sp);
-+      if (VerifyOops) {
-+        verify_oop_array(checked_cast<int>(size), d, count, t2);
-+      }
-+    }
++  address start   = __ pc();
++  address call_pc = NULL;
++  int frame_size_in_words = -1;
++  bool cause_return = (poll_type == POLL_AT_RETURN);
++  RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
 +
-+    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, saved_reg);
++  // Save Integer and Float registers.
++  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+    __ leave();
-+    __ mv(x10, zr); // return 0
-+    __ ret();
-+    return start;
++  // The following is basically a call_VM.  However, we need the precise
++  // address of the call in order to generate an oopmap. Hence, we do all the
++  // work outselves.
++
++  Label retaddr;
++  __ set_last_Java_frame(sp, noreg, retaddr, t0);
++
++  // The return address must always be correct so that frame constructor never
++  // sees an invalid pc.
++
++  if (!cause_return) {
++    // overwrite the return address pushed by save_live_registers
++    // Additionally, x18 is a callee-saved register so we can look at
++    // it later to determine if someone changed the return address for
++    // us!
++    __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset()));
++    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
 +  }
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   is_oop  - true => oop array, so generate store check code
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
-+  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomically.
-+  //
-+  address generate_conjoint_copy(int size, bool aligned, bool is_oop, address nooverlap_target,
-+                                 address* entry, const char* name,
-+                                 bool dest_uninitialized = false) {
-+    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
-+    RegSet saved_regs = RegSet::of(s, d, count);
-+    StubCodeMark mark(this, "StubRoutines", name);
-+    address start = __ pc();
-+    __ enter();
++  // Do the call
++  __ mv(c_rarg0, xthread);
++  int32_t offset = 0;
++  __ la_patchable(t0, RuntimeAddress(call_ptr), offset);
++  __ jalr(x1, t0, offset);
++  __ bind(retaddr);
 +
-+    if (entry != NULL) {
-+      *entry = __ pc();
-+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-+      BLOCK_COMMENT("Entry:");
-+    }
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
 +
-+    // use fwd copy when (d-s) above_equal (count*size)
-+    __ sub(t0, d, s);
-+    __ slli(t1, count, exact_log2(size));
-+    __ bgeu(t0, t1, nooverlap_target);
++  oop_maps->add_gc_map( __ pc() - start, map);
 +
-+    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
-+    if (dest_uninitialized) {
-+      decorators |= IS_DEST_UNINITIALIZED;
-+    }
-+    if (aligned) {
-+      decorators |= ARRAYCOPY_ALIGNED;
-+    }
++  Label noException;
 +
-+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+    bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs);
++  __ reset_last_Java_frame(false);
 +
-+    if (is_oop) {
-+      // save regs before copy_memory
-+      __ push_reg(RegSet::of(d, count), sp);
-+    }
++  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
 +
-+    copy_memory(aligned, s, d, count, t0, -size);
-+    if (is_oop) {
-+      __ pop_reg(RegSet::of(d, count), sp);
-+      if (VerifyOops) {
-+        verify_oop_array(size, d, count, t2);
-+      }
-+    }
-+    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, saved_regs);
-+    __ leave();
-+    __ mv(x10, zr); // return 0
-+    __ ret();
-+    return start;
-+  }
++  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++  __ beqz(t0, noException);
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
-+  // we let the hardware handle it.  The one to eight bytes within words,
-+  // dwords or qwords that span cache line boundaries will still be loaded
-+  // and stored atomically.
-+  //
-+  // Side Effects:
-+  //   disjoint_byte_copy_entry is set to the no-overlap entry point  //
-+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
-+  // we let the hardware handle it.  The one to eight bytes within words,
-+  // dwords or qwords that span cache line boundaries will still be loaded
-+  // and stored atomically.
-+  //
-+  // Side Effects:
-+  //   disjoint_byte_copy_entry is set to the no-overlap entry point
-+  //   used by generate_conjoint_byte_copy().
-+  //
-+  address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) {
-+    const bool not_oop = false;
-+    return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name);
-+  }
++  // Exception pending
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
-+  // we let the hardware handle it.  The one to eight bytes within words,
-+  // dwords or qwords that span cache line boundaries will still be loaded
-+  // and stored atomically.
-+  //
-+  address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
-+                                      address* entry, const char* name) {
-+    const bool not_oop = false;
-+    return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name);
-+  }
++  reg_saver.restore_live_registers(masm);
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
-+  // let the hardware handle it.  The two or four words within dwords
-+  // or qwords that span cache line boundaries will still be loaded
-+  // and stored atomically.
-+  //
-+  // Side Effects:
-+  //   disjoint_short_copy_entry is set to the no-overlap entry point
-+  //   used by generate_conjoint_short_copy().
-+  //
-+  address generate_disjoint_short_copy(bool aligned,
-+                                       address* entry, const char* name) {
-+    const bool not_oop = false;
-+    return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name);
-+  }
++  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
-+  // let the hardware handle it.  The two or four words within dwords
-+  // or qwords that span cache line boundaries will still be loaded
-+  // and stored atomically.
-+  //
-+  address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
-+                                       address* entry, const char* name) {
-+    const bool not_oop = false;
-+    return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name);
-+  }
++  // No exception case
++  __ bind(noException);
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
-+  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomically.
-+  //
-+  // Side Effects:
-+  //   disjoint_int_copy_entry is set to the no-overlap entry point
-+  //   used by generate_conjoint_int_oop_copy().
-+  //
-+  address generate_disjoint_int_copy(bool aligned, address* entry,
-+                                     const char* name, bool dest_uninitialized = false) {
-+    const bool not_oop = false;
-+    return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name);
-+  }
++  Label no_adjust, bail;
++  if (!cause_return) {
++    // If our stashed return pc was modified by the runtime we avoid touching it
++    __ ld(t0, Address(fp, frame::return_addr_offset * wordSize));
++    __ bne(x18, t0, no_adjust);
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
-+  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomically.
-+  //
-+  address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
-+                                     address* entry, const char* name,
-+                                     bool dest_uninitialized = false) {
-+    const bool not_oop = false;
-+    return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name);
++#ifdef ASSERT
++    // Verify the correct encoding of the poll we're about to skip.
++    // See NativeInstruction::is_lwu_to_zr()
++    __ lwu(t0, Address(x18));
++    __ andi(t1, t0, 0b0000011);
++    __ mv(t2, 0b0000011);
++    __ bne(t1, t2, bail); // 0-6:0b0000011
++    __ srli(t1, t0, 7);
++    __ andi(t1, t1, 0b00000);
++    __ bnez(t1, bail);    // 7-11:0b00000
++    __ srli(t1, t0, 12);
++    __ andi(t1, t1, 0b110);
++    __ mv(t2, 0b110);
++    __ bne(t1, t2, bail); // 12-14:0b110
++#endif
++    // Adjust return pc forward to step over the safepoint poll instruction
++    __ add(x18, x18, NativeInstruction::instruction_size);
++    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
 +  }
 +
++  __ bind(no_adjust);
++  // Normal exit, restore registers and exit.
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as size_t, can be zero
-+  //
-+  // Side Effects:
-+  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
-+  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
-+  //
-+  address generate_disjoint_long_copy(bool aligned, address* entry,
-+                                      const char* name, bool dest_uninitialized = false) {
-+    const bool not_oop = false;
-+    return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name);
-+  }
++  reg_saver.restore_live_registers(masm);
++  __ ret();
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as size_t, can be zero
-+  //
-+  address generate_conjoint_long_copy(bool aligned,
-+                                      address nooverlap_target, address* entry,
-+                                      const char* name, bool dest_uninitialized = false) {
-+    const bool not_oop = false;
-+    return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name);
-+  }
++#ifdef ASSERT
++  __ bind(bail);
++  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
++#endif
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as size_t, can be zero
-+  //
-+  // Side Effects:
-+  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
-+  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
-+  //
-+  address generate_disjoint_oop_copy(bool aligned, address* entry,
-+                                     const char* name, bool dest_uninitialized) {
-+    const bool is_oop = true;
-+    const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
-+    return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
-+  }
++  // Make sure all code is generated
++  masm->flush();
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as size_t, can be zero
-+  //
-+  address generate_conjoint_oop_copy(bool aligned,
-+                                     address nooverlap_target, address* entry,
-+                                     const char* name, bool dest_uninitialized) {
-+    const bool is_oop = true;
-+    const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
-+    return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
-+                                  name, dest_uninitialized);
-+  }
-+
-+  // Helper for generating a dynamic type check.
-+  // Smashes t0, t1.
-+  void generate_type_check(Register sub_klass,
-+                           Register super_check_offset,
-+                           Register super_klass,
-+                           Label& L_success) {
-+    assert_different_registers(sub_klass, super_check_offset, super_klass);
-+
-+    BLOCK_COMMENT("type_check:");
-+
-+    Label L_miss;
-+
-+    __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset);
-+    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
-+
-+    // Fall through on failure!
-+    __ BIND(L_miss);
-+  }
-+
-+  //
-+  //  Generate checkcasting array copy stub
-+  //
-+  //  Input:
-+  //    c_rarg0   - source array address
-+  //    c_rarg1   - destination array address
-+  //    c_rarg2   - element count, treated as ssize_t, can be zero
-+  //    c_rarg3   - size_t ckoff (super_check_offset)
-+  //    c_rarg4   - oop ckval (super_klass)
-+  //
-+  //  Output:
-+  //    x10 ==  0  -  success
-+  //    x10 == -1^K - failure, where K is partial transfer count
-+  //
-+  address generate_checkcast_copy(const char* name, address* entry,
-+                                  bool dest_uninitialized = false) {
-+    Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
-+
-+    // Input registers (after setup_arg_regs)
-+    const Register from        = c_rarg0;   // source array address
-+    const Register to          = c_rarg1;   // destination array address
-+    const Register count       = c_rarg2;   // elementscount
-+    const Register ckoff       = c_rarg3;   // super_check_offset
-+    const Register ckval       = c_rarg4;   // super_klass
++  // Fill-out other meta info
++  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
++}
 +
-+    RegSet wb_pre_saved_regs   = RegSet::range(c_rarg0, c_rarg4);
-+    RegSet wb_post_saved_regs  = RegSet::of(count);
++//
++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
++//
++// Generate a stub that calls into vm to find out the proper destination
++// of a java call. All the argument registers are live at this point
++// but since this is generic code we don't know what they are and the caller
++// must do any gc of the args.
++//
++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
 +
-+    // Registers used as temps (x7, x9, x18 are save-on-entry)
-+    const Register count_save  = x19;       // orig elementscount
-+    const Register start_to    = x18;       // destination array start address
-+    const Register copied_oop  = x7;        // actual oop copied
-+    const Register r9_klass    = x9;        // oop._klass
++  // allocate space for the code
++  ResourceMark rm;
 +
-+    //---------------------------------------------------------------
-+    // Assembler stub will be used for this call to arraycopy
-+    // if the two arrays are subtypes of Object[] but the
-+    // destination array type is not equal to or a supertype
-+    // of the source type.  Each element must be separately
-+    // checked.
++  CodeBuffer buffer(name, 1000, 512);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++  assert_cond(masm != NULL);
 +
-+    assert_different_registers(from, to, count, ckoff, ckval, start_to,
-+                               copied_oop, r9_klass, count_save);
++  int frame_size_in_words = -1;
++  RegisterSaver reg_saver(false /* save_vectors */);
 +
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", name);
-+    address start = __ pc();
++  OopMapSet *oop_maps = new OopMapSet();
++  assert_cond(oop_maps != NULL);
++  OopMap* map = NULL;
 +
-+    __ enter(); // required for proper stackwalking of RuntimeStub frame
++  int start = __ offset();
 +
-+    // Caller of this entry point must set up the argument registers
-+    if (entry != NULL) {
-+      *entry = __ pc();
-+      BLOCK_COMMENT("Entry:");
-+    }
++  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+    // Empty array:  Nothing to do
-+    __ beqz(count, L_done);
++  int frame_complete = __ offset();
 +
-+    __ push_reg(RegSet::of(x7, x9, x18, x19), sp);
++  {
++    Label retaddr;
++    __ set_last_Java_frame(sp, noreg, retaddr, t0);
 +
-+#ifdef ASSERT
-+    BLOCK_COMMENT("assert consistent ckoff/ckval");
-+    // The ckoff and ckval must be mutually consistent,
-+    // even though caller generates both.
-+    { Label L;
-+      int sco_offset = in_bytes(Klass::super_check_offset_offset());
-+      __ lwu(start_to, Address(ckval, sco_offset));
-+      __ beq(ckoff, start_to, L);
-+      __ stop("super_check_offset inconsistent");
-+      __ bind(L);
-+    }
-+#endif //ASSERT
++    __ mv(c_rarg0, xthread);
++    int32_t offset = 0;
++    __ la_patchable(t0, RuntimeAddress(destination), offset);
++    __ jalr(x1, t0, offset);
++    __ bind(retaddr);
++  }
 +
-+    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
-+    bool is_oop = true;
-+    if (dest_uninitialized) {
-+      decorators |= IS_DEST_UNINITIALIZED;
-+    }
++  // Set an oopmap for the call site.
++  // We need this not only for callee-saved registers, but also for volatile
++  // registers that the compiler might be keeping live across a safepoint.
 +
-+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+    bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs);
++  oop_maps->add_gc_map( __ offset() - start, map);
 +
-+    // save the original count
-+    __ mv(count_save, count);
++  // x10 contains the address we are going to jump to assuming no exception got installed
 +
-+    // Copy from low to high addresses
-+    __ mv(start_to, to);              // Save destination array start address
-+    __ j(L_load_element);
++  // clear last_Java_sp
++  __ reset_last_Java_frame(false);
++  // check for pending exceptions
++  Label pending;
++  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++  __ bnez(t0, pending);
 +
-+    // ======== begin loop ========
-+    // (Loop is rotated; its entry is L_load_element.)
-+    // Loop control:
-+    //   for count to 0 do
-+    //     copied_oop = load_heap_oop(from++)
-+    //     ... generate_type_check ...
-+    //     store_heap_oop(to++, copied_oop)
-+    //   end
++  // get the returned Method*
++  __ get_vm_result_2(xmethod, xthread);
++  __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod)));
 +
-+    __ align(OptoLoopAlignment);
++  // x10 is where we want to jump, overwrite t0 which is saved and temporary
++  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0)));
++  reg_saver.restore_live_registers(masm);
 +
-+    __ BIND(L_store_element);
-+    __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, noreg, AS_RAW); // store the oop
-+    __ add(to, to, UseCompressedOops ? 4 : 8);
-+    __ sub(count, count, 1);
-+    __ beqz(count, L_do_card_marks);
++  // We are back the the original state on entry and ready to go.
 +
-+    // ======== loop entry is here ========
-+    __ BIND(L_load_element);
-+    __ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop
-+    __ add(from, from, UseCompressedOops ? 4 : 8);
-+    __ beqz(copied_oop, L_store_element);
++  __ jr(t0);
 +
-+    __ load_klass(r9_klass, copied_oop);// query the object klass
-+    generate_type_check(r9_klass, ckoff, ckval, L_store_element);
-+    // ======== end loop ========
++  // Pending exception after the safepoint
 +
-+    // It was a real error; we must depend on the caller to finish the job.
-+    // Register count = remaining oops, count_orig = total oops.
-+    // Emit GC store barriers for the oops we have copied and report
-+    // their number to the caller.
++  __ bind(pending);
 +
-+    __ sub(count, count_save, count);     // K = partially copied oop count
-+    __ xori(count, count, -1);                   // report (-1^K) to caller
-+    __ beqz(count, L_done_pop);
++  reg_saver.restore_live_registers(masm);
 +
-+    __ BIND(L_do_card_marks);
-+    bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs);
++  // exception pending => remove activation and forward to exception handler
 +
-+    __ bind(L_done_pop);
-+    __ pop_reg(RegSet::of(x7, x9, x18, x19), sp);
-+    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
++  __ sd(zr, Address(xthread, JavaThread::vm_result_offset()));
 +
-+    __ bind(L_done);
-+    __ mv(x10, count);
-+    __ leave();
-+    __ ret();
++  __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
++  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 +
-+    return start;
-+  }
++  // -------------
++  // make sure all code is generated
++  masm->flush();
 +
-+  // Perform range checks on the proposed arraycopy.
-+  // Kills temp, but nothing else.
-+  // Also, clean the sign bits of src_pos and dst_pos.
-+  void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
-+                              Register src_pos, // source position (c_rarg1)
-+                              Register dst,     // destination array oo (c_rarg2)
-+                              Register dst_pos, // destination position (c_rarg3)
-+                              Register length,
-+                              Register temp,
-+                              Label& L_failed) {
-+    BLOCK_COMMENT("arraycopy_range_checks:");
++  // return the  blob
++  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
++}
 +
-+    assert_different_registers(t0, temp);
++#ifdef COMPILER2
++RuntimeStub* SharedRuntime::make_native_invoker(address call_target,
++                                                int shadow_space_bytes,
++                                                const GrowableArray<VMReg>& input_registers,
++                                                const GrowableArray<VMReg>& output_registers) {
++  Unimplemented();
++  return nullptr;
++}
 +
-+    // if [src_pos + length > arrayOop(src)->length()] then FAIL
-+    __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes()));
-+    __ addw(temp, length, src_pos);
-+    __ bgtu(temp, t0, L_failed);
++//------------------------------generate_exception_blob---------------------------
++// creates exception blob at the end
++// Using exception blob, this code is jumped from a compiled method.
++// (see emit_exception_handler in riscv.ad file)
++//
++// Given an exception pc at a call we call into the runtime for the
++// handler in this method. This handler might merely restore state
++// (i.e. callee save registers) unwind the frame and jump to the
++// exception handler for the nmethod if there is no Java level handler
++// for the nmethod.
++//
++// This code is entered with a jmp.
++//
++// Arguments:
++//   x10: exception oop
++//   x13: exception pc
++//
++// Results:
++//   x10: exception oop
++//   x13: exception pc in caller
++//   destination: exception handler of caller
++//
++// Note: the exception pc MUST be at a call (precise debug information)
++//       Registers x10, x13, x12, x14, x15, t0 are not callee saved.
++//
 +
-+    // if [dst_pos + length > arrayOop(dst)->length()] then FAIL
-+    __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes()));
-+    __ addw(temp, length, dst_pos);
-+    __ bgtu(temp, t0, L_failed);
++void OptoRuntime::generate_exception_blob() {
++  assert(!OptoRuntime::is_callee_saved_register(R13_num), "");
++  assert(!OptoRuntime::is_callee_saved_register(R10_num), "");
++  assert(!OptoRuntime::is_callee_saved_register(R12_num), "");
 +
-+    // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
-+    __ zero_extend(src_pos, src_pos, 32);
-+    __ zero_extend(dst_pos, dst_pos, 32);
++  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
 +
-+    BLOCK_COMMENT("arraycopy_range_checks done");
-+  }
++  // Allocate space for the code
++  ResourceMark rm;
++  // Setup code generation tools
++  CodeBuffer buffer("exception_blob", 2048, 1024);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++  assert_cond(masm != NULL);
 +
++  // TODO check various assumptions made here
 +  //
-+  //  Generate 'unsafe' array copy stub
-+  //  Though just as safe as the other stubs, it takes an unscaled
-+  //  size_t argument instead of an element count.
-+  //
-+  //  Input:
-+  //    c_rarg0   - source array address
-+  //    c_rarg1   - destination array address
-+  //    c_rarg2   - byte count, treated as ssize_t, can be zero
-+  //
-+  // Examines the alignment of the operands and dispatches
-+  // to a long, int, short, or byte copy loop.
-+  //
-+  address generate_unsafe_copy(const char* name,
-+                               address byte_copy_entry,
-+                               address short_copy_entry,
-+                               address int_copy_entry,
-+                               address long_copy_entry) {
-+    assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
-+                int_copy_entry != NULL && long_copy_entry != NULL);
-+    Label L_long_aligned, L_int_aligned, L_short_aligned;
-+    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
-+
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", name);
-+    address start = __ pc();
-+    __ enter(); // required for proper stackwalking of RuntimeStub frame
-+
-+    // bump this on entry, not on exit:
-+    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
-+
-+    __ orr(t0, s, d);
-+    __ orr(t0, t0, count);
-+
-+    __ andi(t0, t0, BytesPerLong - 1);
-+    __ beqz(t0, L_long_aligned);
-+    __ andi(t0, t0, BytesPerInt - 1);
-+    __ beqz(t0, L_int_aligned);
-+    __ andi(t0, t0, 1);
-+    __ beqz(t0, L_short_aligned);
-+    __ j(RuntimeAddress(byte_copy_entry));
++  // make sure we do so before running this
 +
-+    __ BIND(L_short_aligned);
-+    __ srli(count, count, LogBytesPerShort);  // size => short_count
-+    __ j(RuntimeAddress(short_copy_entry));
-+    __ BIND(L_int_aligned);
-+    __ srli(count, count, LogBytesPerInt);    // size => int_count
-+    __ j(RuntimeAddress(int_copy_entry));
-+    __ BIND(L_long_aligned);
-+    __ srli(count, count, LogBytesPerLong);   // size => long_count
-+    __ j(RuntimeAddress(long_copy_entry));
++  address start = __ pc();
 +
-+    return start;
-+  }
++  // push fp and retaddr by hand
++  // Exception pc is 'return address' for stack walker
++  __ addi(sp, sp, -2 * wordSize);
++  __ sd(ra, Address(sp, wordSize));
++  __ sd(fp, Address(sp));
++  // there are no callee save registers and we don't expect an
++  // arg reg save area
++#ifndef PRODUCT
++  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
++  // Store exception in Thread object. We cannot pass any arguments to the
++  // handle_exception call, since we do not want to make any assumption
++  // about the size of the frame where the exception happened in.
++  __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
++  __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
 +
++  // This call does all the hard work.  It checks if an exception handler
++  // exists in the method.
++  // If so, it returns the handler address.
++  // If not, it prepares for stack-unwinding, restoring the callee-save
++  // registers of the frame being removed.
 +  //
-+  //  Generate generic array copy stubs
-+  //
-+  //  Input:
-+  //    c_rarg0    -  src oop
-+  //    c_rarg1    -  src_pos (32-bits)
-+  //    c_rarg2    -  dst oop
-+  //    c_rarg3    -  dst_pos (32-bits)
-+  //    c_rarg4    -  element count (32-bits)
-+  //
-+  //  Output:
-+  //    x10 ==  0  -  success
-+  //    x10 == -1^K - failure, where K is partial transfer count
++  // address OptoRuntime::handle_exception_C(JavaThread* thread)
 +  //
-+  address generate_generic_copy(const char* name,
-+                                address byte_copy_entry, address short_copy_entry,
-+                                address int_copy_entry, address oop_copy_entry,
-+                                address long_copy_entry, address checkcast_copy_entry) {
-+    assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
-+                int_copy_entry != NULL && oop_copy_entry != NULL &&
-+                long_copy_entry != NULL && checkcast_copy_entry != NULL);
-+    Label L_failed, L_failed_0, L_objArray;
-+    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
-+
-+    // Input registers
-+    const Register src        = c_rarg0;  // source array oop
-+    const Register src_pos    = c_rarg1;  // source position
-+    const Register dst        = c_rarg2;  // destination array oop
-+    const Register dst_pos    = c_rarg3;  // destination position
-+    const Register length     = c_rarg4;
-+
-+    __ align(CodeEntryAlignment);
-+
-+    StubCodeMark mark(this, "StubRoutines", name);
++  // n.b. 1 gp arg, 0 fp args, integral return type
 +
-+    // Registers used as temps
-+    const Register dst_klass = c_rarg5;
++  // the stack should always be aligned
++  address the_pc = __ pc();
++  __ set_last_Java_frame(sp, noreg, the_pc, t0);
++  __ mv(c_rarg0, xthread);
++  int32_t offset = 0;
++  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset);
++  __ jalr(x1, t0, offset);
 +
-+    address start = __ pc();
 +
-+    __ enter(); // required for proper stackwalking of RuntimeStub frame
++  // handle_exception_C is a special VM call which does not require an explicit
++  // instruction sync afterwards.
 +
-+    // bump this on entry, not on exit:
-+    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
++  // Set an oopmap for the call site.  This oopmap will only be used if we
++  // are unwinding the stack.  Hence, all locations will be dead.
++  // Callee-saved registers will be the same as the frame above (i.e.,
++  // handle_exception_stub), since they were restored when we got the
++  // exception.
 +
-+    //-----------------------------------------------------------------------
-+    // Assembler stub will be used for this call to arraycopy
-+    // if the following conditions are met:
-+    //
-+    // (1) src and dst must not be null.
-+    // (2) src_pos must not be negative.
-+    // (3) dst_pos must not be negative.
-+    // (4) length  must not be negative.
-+    // (5) src klass and dst klass should be the same and not NULL.
-+    // (6) src and dst should be arrays.
-+    // (7) src_pos + length must not exceed length of src.
-+    // (8) dst_pos + length must not exceed length of dst.
-+    //
++  OopMapSet* oop_maps = new OopMapSet();
++  assert_cond(oop_maps != NULL);
 +
-+    // if [src == NULL] then return -1
-+    __ beqz(src, L_failed);
++  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
 +
-+    // if [src_pos < 0] then return -1
-+    // i.e. sign bit set
-+    __ andi(t0, src_pos, 1UL << 31);
-+    __ bnez(t0, L_failed);
++  __ reset_last_Java_frame(false);
 +
-+    // if [dst == NULL] then return -1
-+    __ beqz(dst, L_failed);
++  // Restore callee-saved registers
 +
-+    // if [dst_pos < 0] then return -1
-+    // i.e. sign bit set
-+    __ andi(t0, dst_pos, 1UL << 31);
-+    __ bnez(t0, L_failed);
++  // fp is an implicitly saved callee saved register (i.e. the calling
++  // convention will save restore it in prolog/epilog) Other than that
++  // there are no callee save registers now that adapter frames are gone.
++  // and we dont' expect an arg reg save area
++  __ ld(fp, Address(sp));
++  __ ld(x13, Address(sp, wordSize));
++  __ addi(sp, sp , 2 * wordSize);
 +
-+    // registers used as temp
-+    const Register scratch_length    = x28; // elements count to copy
-+    const Register scratch_src_klass = x29; // array klass
-+    const Register lh                = x30; // layout helper
++  // x10: exception handler
 +
-+    // if [length < 0] then return -1
-+    __ addw(scratch_length, length, zr);    // length (elements count, 32-bits value)
-+    // i.e. sign bit set
-+    __ andi(t0, scratch_length, 1UL << 31);
-+    __ bnez(t0, L_failed);
++  // We have a handler in x10 (could be deopt blob).
++  __ mv(t0, x10);
 +
-+    __ load_klass(scratch_src_klass, src);
++  // Get the exception oop
++  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
++  // Get the exception pc in case we are deoptimized
++  __ ld(x14, Address(xthread, JavaThread::exception_pc_offset()));
 +#ifdef ASSERT
-+    {
-+      BLOCK_COMMENT("assert klasses not null {");
-+      Label L1, L2;
-+      __ bnez(scratch_src_klass, L2);   // it is broken if klass is NULL
-+      __ bind(L1);
-+      __ stop("broken null klass");
-+      __ bind(L2);
-+      __ load_klass(t0, dst);
-+      __ beqz(t0, L1);     // this would be broken also
-+      BLOCK_COMMENT("} assert klasses not null done");
-+    }
++  __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset()));
++  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
 +#endif
++  // Clear the exception oop so GC no longer processes it as a root.
++  __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
 +
-+    // Load layout helper (32-bits)
-+    //
-+    //  |array_tag|     | header_size | element_type |     |log2_element_size|
-+    // 32        30    24            16              8     2                 0
-+    //
-+    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
-+    //
-+
-+    const int lh_offset = in_bytes(Klass::layout_helper_offset());
-+
-+    // Handle objArrays completely differently...
-+    const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
-+    __ lw(lh, Address(scratch_src_klass, lh_offset));
-+    __ mvw(t0, objArray_lh);
-+    __ beq(lh, t0, L_objArray);
++  // x10: exception oop
++  // t0:  exception handler
++  // x14: exception pc
++  // Jump to handler
 +
-+    // if [src->klass() != dst->klass()] then return -1
-+    __ load_klass(t1, dst);
-+    __ bne(t1, scratch_src_klass, L_failed);
++  __ jr(t0);
 +
-+    // if [src->is_Array() != NULL] then return -1
-+    // i.e. (lh >= 0)
-+    __ andi(t0, lh, 1UL << 31);
-+    __ beqz(t0, L_failed);
++  // Make sure all code is generated
++  masm->flush();
 +
-+    // At this point, it is known to be a typeArray (array_tag 0x3).
-+#ifdef ASSERT
-+    {
-+      BLOCK_COMMENT("assert primitive array {");
-+      Label L;
-+      __ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
-+      __ bge(lh, t1, L);
-+      __ stop("must be a primitive array");
-+      __ bind(L);
-+      BLOCK_COMMENT("} assert primitive array done");
-+    }
-+#endif
++  // Set exception blob
++  _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
++}
++#endif // COMPILER2
+diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+new file mode 100644
+index 00000000000..b3fdd04db1b
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+@@ -0,0 +1,3864 @@
++/*
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
-+                           t1, L_failed);
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/oopMap.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/instanceOop.hpp"
++#include "oops/method.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "utilities/align.hpp"
++#include "utilities/powerOfTwo.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++#if INCLUDE_ZGC
++#include "gc/z/zThreadLocalData.hpp"
++#endif
 +
-+    // TypeArrayKlass
-+    //
-+    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize)
-+    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize)
-+    //
++// Declaration and definition of StubGenerator (no .hpp file).
++// For a more detailed description of the stub routine structure
++// see the comment in stubRoutines.hpp
 +
-+    const Register t0_offset = t0;    // array offset
-+    const Register x22_elsize = lh;   // element size
++#undef __
++#define __ _masm->
 +
-+    // Get array_header_in_bytes()
-+    int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1);
-+    int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
-+    __ slli(t0_offset, lh, XLEN - lh_header_size_msb);          // left shift to remove 24 ~ 32;
-+    __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
 +
-+    __ add(src, src, t0_offset);           // src array offset
-+    __ add(dst, dst, t0_offset);           // dst array offset
-+    BLOCK_COMMENT("choose copy loop based on element size");
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 +
-+    // next registers should be set before the jump to corresponding stub
-+    const Register from     = c_rarg0;  // source array address
-+    const Register to       = c_rarg1;  // destination array address
-+    const Register count    = c_rarg2;  // elements count
++// Stub Code definitions
 +
-+    // 'from', 'to', 'count' registers should be set in such order
-+    // since they are the same as 'src', 'src_pos', 'dst'.
++class StubGenerator: public StubCodeGenerator {
++ private:
 +
-+    assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
++#ifdef PRODUCT
++#define inc_counter_np(counter) ((void)0)
++#else
++  void inc_counter_np_(int& counter) {
++    __ la(t1, ExternalAddress((address)&counter));
++    __ lwu(t0, Address(t1, 0));
++    __ addiw(t0, t0, 1);
++    __ sw(t0, Address(t1, 0));
++  }
++#define inc_counter_np(counter) \
++  BLOCK_COMMENT("inc_counter " #counter); \
++  inc_counter_np_(counter);
++#endif
 +
-+    // The possible values of elsize are 0-3, i.e. exact_log2(element
-+    // size in bytes).  We do a simple bitwise binary search.
-+  __ BIND(L_copy_bytes);
-+    __ andi(t0, x22_elsize, 2);
-+    __ bnez(t0, L_copy_ints);
-+    __ andi(t0, x22_elsize, 1);
-+    __ bnez(t0, L_copy_shorts);
-+    __ add(from, src, src_pos); // src_addr
-+    __ add(to, dst, dst_pos); // dst_addr
-+    __ addw(count, scratch_length, zr); // length
-+    __ j(RuntimeAddress(byte_copy_entry));
++  // Call stubs are used to call Java from C
++  //
++  // Arguments:
++  //    c_rarg0:   call wrapper address                   address
++  //    c_rarg1:   result                                 address
++  //    c_rarg2:   result type                            BasicType
++  //    c_rarg3:   method                                 Method*
++  //    c_rarg4:   (interpreter) entry point              address
++  //    c_rarg5:   parameters                             intptr_t*
++  //    c_rarg6:   parameter size (in words)              int
++  //    c_rarg7:   thread                                 Thread*
++  //
++  // There is no return from the stub itself as any Java result
++  // is written to result
++  //
++  // we save x1 (ra) as the return PC at the base of the frame and
++  // link x8 (fp) below it as the frame pointer installing sp (x2)
++  // into fp.
++  //
++  // we save x10-x17, which accounts for all the c arguments.
++  //
++  // TODO: strictly do we need to save them all? they are treated as
++  // volatile by C so could we omit saving the ones we are going to
++  // place in global registers (thread? method?) or those we only use
++  // during setup of the Java call?
++  //
++  // we don't need to save x5 which C uses as an indirect result location
++  // return register.
++  //
++  // we don't need to save x6-x7 and x28-x31 which both C and Java treat as
++  // volatile
++  //
++  // we save x18-x27 which Java uses as temporary registers and C
++  // expects to be callee-save
++  //
++  // so the stub frame looks like this when we enter Java code
++  //
++  //     [ return_from_Java     ] <--- sp
++  //     [ argument word n      ]
++  //      ...
++  // -22 [ argument word 1      ]
++  // -21 [ saved x27            ] <--- sp_after_call
++  // -20 [ saved x26            ]
++  // -19 [ saved x25            ]
++  // -18 [ saved x24            ]
++  // -17 [ saved x23            ]
++  // -16 [ saved x22            ]
++  // -15 [ saved x21            ]
++  // -14 [ saved x20            ]
++  // -13 [ saved x19            ]
++  // -12 [ saved x18            ]
++  // -11 [ saved x9             ]
++  // -10 [ call wrapper   (x10) ]
++  //  -9 [ result         (x11) ]
++  //  -8 [ result type    (x12) ]
++  //  -7 [ method         (x13) ]
++  //  -6 [ entry point    (x14) ]
++  //  -5 [ parameters     (x15) ]
++  //  -4 [ parameter size (x16) ]
++  //  -3 [ thread         (x17) ]
++  //  -2 [ saved fp       (x8)  ]
++  //  -1 [ saved ra       (x1)  ]
++  //   0 [                      ] <--- fp == saved sp (x2)
 +
-+  __ BIND(L_copy_shorts);
-+    __ shadd(from, src_pos, src, t0, 1); // src_addr
-+    __ shadd(to, dst_pos, dst, t0, 1); // dst_addr
-+    __ addw(count, scratch_length, zr); // length
-+    __ j(RuntimeAddress(short_copy_entry));
++  // Call stub stack layout word offsets from fp
++  enum call_stub_layout {
++    sp_after_call_off  = -21,
 +
-+  __ BIND(L_copy_ints);
-+    __ andi(t0, x22_elsize, 1);
-+    __ bnez(t0, L_copy_longs);
-+    __ shadd(from, src_pos, src, t0, 2); // src_addr
-+    __ shadd(to, dst_pos, dst, t0, 2); // dst_addr
-+    __ addw(count, scratch_length, zr); // length
-+    __ j(RuntimeAddress(int_copy_entry));
++    x27_off            = -21,
++    x26_off            = -20,
++    x25_off            = -19,
++    x24_off            = -18,
++    x23_off            = -17,
++    x22_off            = -16,
++    x21_off            = -15,
++    x20_off            = -14,
++    x19_off            = -13,
++    x18_off            = -12,
++    x9_off             = -11,
++
++    call_wrapper_off   = -10,
++    result_off         = -9,
++    result_type_off    = -8,
++    method_off         = -7,
++    entry_point_off    = -6,
++    parameters_off     = -5,
++    parameter_size_off = -4,
++    thread_off         = -3,
++    fp_f               = -2,
++    retaddr_off        = -1,
++  };
++
++  address generate_call_stub(address& return_address) {
++    assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 &&
++           (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
++           "adjust this code");
++
++    StubCodeMark mark(this, "StubRoutines", "call_stub");
++    address start = __ pc();
++
++    const Address sp_after_call (fp, sp_after_call_off  * wordSize);
++
++    const Address call_wrapper  (fp, call_wrapper_off   * wordSize);
++    const Address result        (fp, result_off         * wordSize);
++    const Address result_type   (fp, result_type_off    * wordSize);
++    const Address method        (fp, method_off         * wordSize);
++    const Address entry_point   (fp, entry_point_off    * wordSize);
++    const Address parameters    (fp, parameters_off     * wordSize);
++    const Address parameter_size(fp, parameter_size_off * wordSize);
++
++    const Address thread        (fp, thread_off         * wordSize);
++
++    const Address x27_save      (fp, x27_off            * wordSize);
++    const Address x26_save      (fp, x26_off            * wordSize);
++    const Address x25_save      (fp, x25_off            * wordSize);
++    const Address x24_save      (fp, x24_off            * wordSize);
++    const Address x23_save      (fp, x23_off            * wordSize);
++    const Address x22_save      (fp, x22_off            * wordSize);
++    const Address x21_save      (fp, x21_off            * wordSize);
++    const Address x20_save      (fp, x20_off            * wordSize);
++    const Address x19_save      (fp, x19_off            * wordSize);
++    const Address x18_save      (fp, x18_off            * wordSize);
++
++    const Address x9_save       (fp, x9_off             * wordSize);
++
++    // stub code
++
++    address riscv_entry = __ pc();
++
++    // set up frame and move sp to end of save area
++    __ enter();
++    __ addi(sp, fp, sp_after_call_off * wordSize);
++
++    // save register parameters and Java temporary/global registers
++    // n.b. we save thread even though it gets installed in
++    // xthread because we want to sanity check tp later
++    __ sd(c_rarg7, thread);
++    __ sw(c_rarg6, parameter_size);
++    __ sd(c_rarg5, parameters);
++    __ sd(c_rarg4, entry_point);
++    __ sd(c_rarg3, method);
++    __ sd(c_rarg2, result_type);
++    __ sd(c_rarg1, result);
++    __ sd(c_rarg0, call_wrapper);
++
++    __ sd(x9, x9_save);
++
++    __ sd(x18, x18_save);
++    __ sd(x19, x19_save);
++    __ sd(x20, x20_save);
++    __ sd(x21, x21_save);
++    __ sd(x22, x22_save);
++    __ sd(x23, x23_save);
++    __ sd(x24, x24_save);
++    __ sd(x25, x25_save);
++    __ sd(x26, x26_save);
++    __ sd(x27, x27_save);
++
++    // install Java thread in global register now we have saved
++    // whatever value it held
++    __ mv(xthread, c_rarg7);
++
++    // And method
++    __ mv(xmethod, c_rarg3);
++
++    // set up the heapbase register
++    __ reinit_heapbase();
 +
-+  __ BIND(L_copy_longs);
 +#ifdef ASSERT
++    // make sure we have no pending exceptions
 +    {
-+      BLOCK_COMMENT("assert long copy {");
 +      Label L;
-+      __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize
-+      __ addw(lh, lh, zr);
-+      __ mvw(t0, LogBytesPerLong);
-+      __ beq(x22_elsize, t0, L);
-+      __ stop("must be long copy, but elsize is wrong");
-+      __ bind(L);
-+      BLOCK_COMMENT("} assert long copy done");
++      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++      __ beqz(t0, L);
++      __ stop("StubRoutines::call_stub: entered with pending exception");
++      __ BIND(L);
 +    }
 +#endif
-+    __ shadd(from, src_pos, src, t0, 3); // src_addr
-+    __ shadd(to, dst_pos, dst, t0, 3); // dst_addr
-+    __ addw(count, scratch_length, zr); // length
-+    __ j(RuntimeAddress(long_copy_entry));
-+
-+    // ObjArrayKlass
-+  __ BIND(L_objArray);
-+    // live at this point:  scratch_src_klass, scratch_length, src[_pos], dst[_pos]
++    // pass parameters if any
++    __ mv(esp, sp);
++    __ slli(t0, c_rarg6, LogBytesPerWord);
++    __ sub(t0, sp, t0); // Move SP out of the way
++    __ andi(sp, t0, -2 * wordSize);
 +
-+    Label L_plain_copy, L_checkcast_copy;
-+    // test array classes for subtyping
-+    __ load_klass(t2, dst);
-+    __ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality
++    BLOCK_COMMENT("pass parameters if any");
++    Label parameters_done;
++    // parameter count is still in c_rarg6
++    // and parameter pointer identifying param 1 is in c_rarg5
++    __ beqz(c_rarg6, parameters_done);
 +
-+    // Identically typed arrays can be copied without element-wise checks.
-+    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
-+                           t1, L_failed);
++    address loop = __ pc();
++    __ ld(t0, c_rarg5, 0);
++    __ addi(c_rarg5, c_rarg5, wordSize);
++    __ addi(c_rarg6, c_rarg6, -1);
++    __ push_reg(t0);
++    __ bgtz(c_rarg6, loop);
 +
-+    __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
-+    __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-+    __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
-+    __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-+    __ addw(count, scratch_length, zr); // length
-+  __ BIND(L_plain_copy);
-+    __ j(RuntimeAddress(oop_copy_entry));
++    __ BIND(parameters_done);
 +
-+  __ BIND(L_checkcast_copy);
-+    // live at this point:  scratch_src_klass, scratch_length, t2 (dst_klass)
-+    {
-+      // Before looking at dst.length, make sure dst is also an objArray.
-+      __ lwu(t0, Address(t2, lh_offset));
-+      __ mvw(t1, objArray_lh);
-+      __ bne(t0, t1, L_failed);
++    // call Java entry -- passing methdoOop, and current sp
++    //      xmethod: Method*
++    //      x30: sender sp
++    BLOCK_COMMENT("call Java function");
++    __ mv(x30, sp);
++    __ jalr(c_rarg4);
 +
-+      // It is safe to examine both src.length and dst.length.
-+      arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
-+                             t2, L_failed);
++    // save current address for use by exception handling code
 +
-+      __ load_klass(dst_klass, dst); // reload
++    return_address = __ pc();
 +
-+      // Marshal the base address arguments now, freeing registers.
-+      __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
-+      __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-+      __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
-+      __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-+      __ addw(count, length, zr);           // length (reloaded)
-+      const Register sco_temp = c_rarg3;      // this register is free now
-+      assert_different_registers(from, to, count, sco_temp,
-+                                 dst_klass, scratch_src_klass);
++    // store result depending on type (everything that is not
++    // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
++    // n.b. this assumes Java returns an integral result in x10
++    // and a floating result in j_farg0
++    __ ld(j_rarg2, result);
++    Label is_long, is_float, is_double, exit;
++    __ ld(j_rarg1, result_type);
++    __ li(t0, (u1)T_OBJECT);
++    __ beq(j_rarg1, t0, is_long);
++    __ li(t0, (u1)T_LONG);
++    __ beq(j_rarg1, t0, is_long);
++    __ li(t0, (u1)T_FLOAT);
++    __ beq(j_rarg1, t0, is_float);
++    __ li(t0, (u1)T_DOUBLE);
++    __ beq(j_rarg1, t0, is_double);
 +
-+      // Generate the type check.
-+      const int sco_offset = in_bytes(Klass::super_check_offset_offset());
-+      __ lwu(sco_temp, Address(dst_klass, sco_offset));
++    // handle T_INT case
++    __ sw(x10, Address(j_rarg2));
 +
-+      // Smashes t0, t1
-+      generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy);
++    __ BIND(exit);
 +
-+      // Fetch destination element klass from the ObjArrayKlass header.
-+      int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
-+      __ ld(dst_klass, Address(dst_klass, ek_offset));
-+      __ lwu(sco_temp, Address(dst_klass, sco_offset));
++    // pop parameters
++    __ addi(esp, fp, sp_after_call_off * wordSize);
 +
-+      // the checkcast_copy loop needs two extra arguments:
-+      assert(c_rarg3 == sco_temp, "#3 already in place");
-+      // Set up arguments for checkcast_copy_entry.
-+      __ mv(c_rarg4, dst_klass);  // dst.klass.element_klass
-+      __ j(RuntimeAddress(checkcast_copy_entry));
++#ifdef ASSERT
++    // verify that threads correspond
++    {
++      Label L, S;
++      __ ld(t0, thread);
++      __ bne(xthread, t0, S);
++      __ get_thread(t0);
++      __ beq(xthread, t0, L);
++      __ BIND(S);
++      __ stop("StubRoutines::call_stub: threads must correspond");
++      __ BIND(L);
 +    }
++#endif
 +
-+  __ BIND(L_failed);
-+    __ mv(x10, -1);
-+    __ leave();   // required for proper stackwalking of RuntimeStub frame
++    // restore callee-save registers
++    __ ld(x27, x27_save);
++    __ ld(x26, x26_save);
++    __ ld(x25, x25_save);
++    __ ld(x24, x24_save);
++    __ ld(x23, x23_save);
++    __ ld(x22, x22_save);
++    __ ld(x21, x21_save);
++    __ ld(x20, x20_save);
++    __ ld(x19, x19_save);
++    __ ld(x18, x18_save);
++
++    __ ld(x9, x9_save);
++
++    __ ld(c_rarg0, call_wrapper);
++    __ ld(c_rarg1, result);
++    __ ld(c_rarg2, result_type);
++    __ ld(c_rarg3, method);
++    __ ld(c_rarg4, entry_point);
++    __ ld(c_rarg5, parameters);
++    __ ld(c_rarg6, parameter_size);
++    __ ld(c_rarg7, thread);
++
++    // leave frame and return to caller
++    __ leave();
 +    __ ret();
 +
++    // handle return types different from T_INT
++
++    __ BIND(is_long);
++    __ sd(x10, Address(j_rarg2, 0));
++    __ j(exit);
++
++    __ BIND(is_float);
++    __ fsw(j_farg0, Address(j_rarg2, 0), t0);
++    __ j(exit);
++
++    __ BIND(is_double);
++    __ fsd(j_farg0, Address(j_rarg2, 0), t0);
++    __ j(exit);
++
 +    return start;
 +  }
 +
++  // Return point for a Java call if there's an exception thrown in
++  // Java code.  The exception is caught and transformed into a
++  // pending exception stored in JavaThread that can be tested from
++  // within the VM.
 +  //
-+  // Generate stub for array fill. If "aligned" is true, the
-+  // "to" address is assumed to be heapword aligned.
-+  //
-+  // Arguments for generated stub:
-+  //   to:    c_rarg0
-+  //   value: c_rarg1
-+  //   count: c_rarg2 treated as signed
++  // Note: Usually the parameters are removed by the callee. In case
++  // of an exception crossing an activation frame boundary, that is
++  // not the case if the callee is compiled code => need to setup the
++  // sp.
 +  //
-+  address generate_fill(BasicType t, bool aligned, const char* name) {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", name);
++  // x10: exception oop
++
++  address generate_catch_exception() {
++    StubCodeMark mark(this, "StubRoutines", "catch_exception");
 +    address start = __ pc();
 +
-+    BLOCK_COMMENT("Entry:");
++    // same as in generate_call_stub():
++    const Address thread(fp, thread_off * wordSize);
 +
-+    const Register to        = c_rarg0;  // source array address
-+    const Register value     = c_rarg1;  // value
-+    const Register count     = c_rarg2;  // elements count
++#ifdef ASSERT
++    // verify that threads correspond
++    {
++      Label L, S;
++      __ ld(t0, thread);
++      __ bne(xthread, t0, S);
++      __ get_thread(t0);
++      __ beq(xthread, t0, L);
++      __ bind(S);
++      __ stop("StubRoutines::catch_exception: threads must correspond");
++      __ bind(L);
++    }
++#endif
 +
-+    const Register bz_base   = x28;      // base for block_zero routine
-+    const Register cnt_words = x29;      // temp register
-+    const Register tmp_reg   = t1;
++    // set pending exception
++    __ verify_oop(x10);
 +
-+    __ enter();
++    __ sd(x10, Address(xthread, Thread::pending_exception_offset()));
++    __ mv(t0, (address)__FILE__);
++    __ sd(t0, Address(xthread, Thread::exception_file_offset()));
++    __ mv(t0, (int)__LINE__);
++    __ sw(t0, Address(xthread, Thread::exception_line_offset()));
 +
-+    Label L_fill_elements, L_exit1;
++    // complete return to VM
++    assert(StubRoutines::_call_stub_return_address != NULL,
++           "_call_stub_return_address must have been generated before");
++    __ j(StubRoutines::_call_stub_return_address);
 +
-+    int shift = -1;
-+    switch (t) {
-+      case T_BYTE:
-+        shift = 0;
++    return start;
++  }
 +
-+        // Zero extend value
-+        // 8 bit -> 16 bit
-+        __ andi(value, value, 0xff);
-+        __ mv(tmp_reg, value);
-+        __ slli(tmp_reg, tmp_reg, 8);
-+        __ orr(value, value, tmp_reg);
-+
-+        // 16 bit -> 32 bit
-+        __ mv(tmp_reg, value);
-+        __ slli(tmp_reg, tmp_reg, 16);
-+        __ orr(value, value, tmp_reg);
++  // Continuation point for runtime calls returning with a pending
++  // exception.  The pending exception check happened in the runtime
++  // or native call stub.  The pending exception in Thread is
++  // converted into a Java-level exception.
++  //
++  // Contract with Java-level exception handlers:
++  // x10: exception
++  // x13: throwing pc
++  //
++  // NOTE: At entry of this stub, exception-pc must be in RA !!
 +
-+        __ mv(tmp_reg, 8 >> shift); // Short arrays (< 8 bytes) fill by element
-+        __ bltu(count, tmp_reg, L_fill_elements);
-+        break;
-+      case T_SHORT:
-+        shift = 1;
-+        // Zero extend value
-+        // 16 bit -> 32 bit
-+        __ andi(value, value, 0xffff);
-+        __ mv(tmp_reg, value);
-+        __ slli(tmp_reg, tmp_reg, 16);
-+        __ orr(value, value, tmp_reg);
++  // NOTE: this is always used as a jump target within generated code
++  // so it just needs to be generated code with no x86 prolog
 +
-+        // Short arrays (< 8 bytes) fill by element
-+        __ mv(tmp_reg, 8 >> shift);
-+        __ bltu(count, tmp_reg, L_fill_elements);
-+        break;
-+      case T_INT:
-+        shift = 2;
++  address generate_forward_exception() {
++    StubCodeMark mark(this, "StubRoutines", "forward exception");
++    address start = __ pc();
 +
-+        // Short arrays (< 8 bytes) fill by element
-+        __ mv(tmp_reg, 8 >> shift);
-+        __ bltu(count, tmp_reg, L_fill_elements);
-+        break;
-+      default: ShouldNotReachHere();
-+    }
++    // Upon entry, RA points to the return address returning into
++    // Java (interpreted or compiled) code; i.e., the return address
++    // becomes the throwing pc.
++    //
++    // Arguments pushed before the runtime call are still on the stack
++    // but the exception handler will reset the stack pointer ->
++    // ignore them.  A potential result in registers can be ignored as
++    // well.
 +
-+    // Align source address at 8 bytes address boundary.
-+    Label L_skip_align1, L_skip_align2, L_skip_align4;
-+    if (!aligned) {
-+      switch (t) {
-+        case T_BYTE:
-+          // One byte misalignment happens only for byte arrays.
-+          __ andi(t0, to, 1);
-+          __ beqz(t0, L_skip_align1);
-+          __ sb(value, Address(to, 0));
-+          __ addi(to, to, 1);
-+          __ addiw(count, count, -1);
-+          __ bind(L_skip_align1);
-+          // Fallthrough
-+        case T_SHORT:
-+          // Two bytes misalignment happens only for byte and short (char) arrays.
-+          __ andi(t0, to, 2);
-+          __ beqz(t0, L_skip_align2);
-+          __ sh(value, Address(to, 0));
-+          __ addi(to, to, 2);
-+          __ addiw(count, count, -(2 >> shift));
-+          __ bind(L_skip_align2);
-+          // Fallthrough
-+        case T_INT:
-+          // Align to 8 bytes, we know we are 4 byte aligned to start.
-+          __ andi(t0, to, 4);
-+          __ beqz(t0, L_skip_align4);
-+          __ sw(value, Address(to, 0));
-+          __ addi(to, to, 4);
-+          __ addiw(count, count, -(4 >> shift));
-+          __ bind(L_skip_align4);
-+          break;
-+        default: ShouldNotReachHere();
-+      }
++#ifdef ASSERT
++    // make sure this code is only executed if there is a pending exception
++    {
++      Label L;
++      __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++      __ bnez(t0, L);
++      __ stop("StubRoutines::forward exception: no pending exception (1)");
++      __ bind(L);
 +    }
++#endif
 +
-+    //
-+    //  Fill large chunks
-+    //
-+    __ srliw(cnt_words, count, 3 - shift); // number of words
++    // compute exception handler into x9
 +
-+    // 32 bit -> 64 bit
-+    __ andi(value, value, 0xffffffff);
-+    __ mv(tmp_reg, value);
-+    __ slli(tmp_reg, tmp_reg, 32);
-+    __ orr(value, value, tmp_reg);
++    // call the VM to find the handler address associated with the
++    // caller address. pass thread in x10 and caller pc (ret address)
++    // in x11. n.b. the caller pc is in ra, unlike x86 where it is on
++    // the stack.
++    __ mv(c_rarg1, ra);
++    // ra will be trashed by the VM call so we move it to x9
++    // (callee-saved) because we also need to pass it to the handler
++    // returned by this call.
++    __ mv(x9, ra);
++    BLOCK_COMMENT("call exception_handler_for_return_address");
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address,
++                         SharedRuntime::exception_handler_for_return_address),
++                    xthread, c_rarg1);
++    // we should not really care that ra is no longer the callee
++    // address. we saved the value the handler needs in x9 so we can
++    // just copy it to x13. however, the C2 handler will push its own
++    // frame and then calls into the VM and the VM code asserts that
++    // the PC for the frame above the handler belongs to a compiled
++    // Java method. So, we restore ra here to satisfy that assert.
++    __ mv(ra, x9);
++    // setup x10 & x13 & clear pending exception
++    __ mv(x13, x9);
++    __ mv(x9, x10);
++    __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
++    __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
 +
-+    __ slli(tmp_reg, cnt_words, 3 - shift);
-+    __ subw(count, count, tmp_reg);
++#ifdef ASSERT
++    // make sure exception is set
 +    {
-+      __ fill_words(to, cnt_words, value);
++      Label L;
++      __ bnez(x10, L);
++      __ stop("StubRoutines::forward exception: no pending exception (2)");
++      __ bind(L);
 +    }
++#endif
 +
-+    // Remaining count is less than 8 bytes. Fill it by a single store.
-+    // Note that the total length is no less than 8 bytes.
-+    if (t == T_BYTE || t == T_SHORT) {
-+      __ beqz(count, L_exit1);
-+      __ shadd(to, count, to, tmp_reg, shift); // points to the end
-+      __ sd(value, Address(to, -8)); // overwrite some elements
-+      __ bind(L_exit1);
-+      __ leave();
-+      __ ret();
-+    }
++    // continue at exception handler
++    // x10: exception
++    // x13: throwing pc
++    // x9: exception handler
++    __ verify_oop(x10);
++    __ jr(x9);
 +
-+    // Handle copies less than 8 bytes.
-+    Label L_fill_2, L_fill_4, L_exit2;
-+    __ bind(L_fill_elements);
-+    switch (t) {
-+      case T_BYTE:
-+        __ andi(t0, count, 1);
-+        __ beqz(t0, L_fill_2);
-+        __ sb(value, Address(to, 0));
-+        __ addi(to, to, 1);
-+        __ bind(L_fill_2);
-+        __ andi(t0, count, 2);
-+        __ beqz(t0, L_fill_4);
-+        __ sh(value, Address(to, 0));
-+        __ addi(to, to, 2);
-+        __ bind(L_fill_4);
-+        __ andi(t0, count, 4);
-+        __ beqz(t0, L_exit2);
-+        __ sw(value, Address(to, 0));
-+        break;
-+      case T_SHORT:
-+        __ andi(t0, count, 1);
-+        __ beqz(t0, L_fill_4);
-+        __ sh(value, Address(to, 0));
-+        __ addi(to, to, 2);
-+        __ bind(L_fill_4);
-+        __ andi(t0, count, 2);
-+        __ beqz(t0, L_exit2);
-+        __ sw(value, Address(to, 0));
-+        break;
-+      case T_INT:
-+        __ beqz(count, L_exit2);
-+        __ sw(value, Address(to, 0));
-+        break;
-+      default: ShouldNotReachHere();
-+    }
-+    __ bind(L_exit2);
-+    __ leave();
-+    __ ret();
 +    return start;
 +  }
 +
-+  void generate_arraycopy_stubs() {
-+    address entry                     = NULL;
-+    address entry_jbyte_arraycopy     = NULL;
-+    address entry_jshort_arraycopy    = NULL;
-+    address entry_jint_arraycopy      = NULL;
-+    address entry_oop_arraycopy       = NULL;
-+    address entry_jlong_arraycopy     = NULL;
-+    address entry_checkcast_arraycopy = NULL;
-+
-+    StubRoutines::riscv::_zero_blocks = generate_zero_blocks();
++  // Non-destructive plausibility checks for oops
++  //
++  // Arguments:
++  //    x10: oop to verify
++  //    t0: error message
++  //
++  // Stack after saving c_rarg3:
++  //    [tos + 0]: saved c_rarg3
++  //    [tos + 1]: saved c_rarg2
++  //    [tos + 2]: saved ra
++  //    [tos + 3]: saved t1
++  //    [tos + 4]: saved x10
++  //    [tos + 5]: saved t0
++  address generate_verify_oop() {
 +
-+    //*** jbyte
-+    // Always need aligned and unaligned versions
-+    StubRoutines::_jbyte_disjoint_arraycopy          = generate_disjoint_byte_copy(false, &entry,
-+                                                                                   "jbyte_disjoint_arraycopy");
-+    StubRoutines::_jbyte_arraycopy                   = generate_conjoint_byte_copy(false, entry,
-+                                                                                   &entry_jbyte_arraycopy,
-+                                                                                   "jbyte_arraycopy");
-+    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(true, &entry,
-+                                                                                   "arrayof_jbyte_disjoint_arraycopy");
-+    StubRoutines::_arrayof_jbyte_arraycopy           = generate_conjoint_byte_copy(true, entry, NULL,
-+                                                                                   "arrayof_jbyte_arraycopy");
++    StubCodeMark mark(this, "StubRoutines", "verify_oop");
++    address start = __ pc();
 +
-+    //*** jshort
-+    // Always need aligned and unaligned versions
-+    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
-+                                                                                    "jshort_disjoint_arraycopy");
-+    StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
-+                                                                                    &entry_jshort_arraycopy,
-+                                                                                    "jshort_arraycopy");
-+    StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
-+                                                                                    "arrayof_jshort_disjoint_arraycopy");
-+    StubRoutines::_arrayof_jshort_arraycopy          = generate_conjoint_short_copy(true, entry, NULL,
-+                                                                                    "arrayof_jshort_arraycopy");
++    Label exit, error;
 +
-+    //*** jint
-+    // Aligned versions
-+    StubRoutines::_arrayof_jint_disjoint_arraycopy   = generate_disjoint_int_copy(true, &entry,
-+                                                                                  "arrayof_jint_disjoint_arraycopy");
-+    StubRoutines::_arrayof_jint_arraycopy            = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
-+                                                                                  "arrayof_jint_arraycopy");
-+    // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
-+    // entry_jint_arraycopy always points to the unaligned version
-+    StubRoutines::_jint_disjoint_arraycopy           = generate_disjoint_int_copy(false, &entry,
-+                                                                                  "jint_disjoint_arraycopy");
-+    StubRoutines::_jint_arraycopy                    = generate_conjoint_int_copy(false, entry,
-+                                                                                  &entry_jint_arraycopy,
-+                                                                                  "jint_arraycopy");
++    __ push_reg(0x3000, sp);   // save c_rarg2 and c_rarg3
 +
-+    //*** jlong
-+    // It is always aligned
-+    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(true, &entry,
-+                                                                                   "arrayof_jlong_disjoint_arraycopy");
-+    StubRoutines::_arrayof_jlong_arraycopy           = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
-+                                                                                   "arrayof_jlong_arraycopy");
-+    StubRoutines::_jlong_disjoint_arraycopy          = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
-+    StubRoutines::_jlong_arraycopy                   = StubRoutines::_arrayof_jlong_arraycopy;
++    __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
++    __ ld(c_rarg3, Address(c_rarg2));
++    __ add(c_rarg3, c_rarg3, 1);
++    __ sd(c_rarg3, Address(c_rarg2));
 +
-+    //*** oops
-+    {
-+      // With compressed oops we need unaligned versions; notice that
-+      // we overwrite entry_oop_arraycopy.
-+      bool aligned = !UseCompressedOops;
++    // object is in x10
++    // make sure object is 'reasonable'
++    __ beqz(x10, exit); // if obj is NULL it is OK
 +
-+      StubRoutines::_arrayof_oop_disjoint_arraycopy
-+        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy",
-+                                     /*dest_uninitialized*/false);
-+      StubRoutines::_arrayof_oop_arraycopy
-+        = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy",
-+                                     /*dest_uninitialized*/false);
-+      // Aligned versions without pre-barriers
-+      StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit
-+        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit",
-+                                     /*dest_uninitialized*/true);
-+      StubRoutines::_arrayof_oop_arraycopy_uninit
-+        = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit",
-+                                     /*dest_uninitialized*/true);
++#if INCLUDE_ZGC
++    if (UseZGC) {
++      // Check if mask is good.
++      // verifies that ZAddressBadMask & x10 == 0
++      __ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
++      __ andr(c_rarg2, x10, c_rarg3);
++      __ bnez(c_rarg2, error);
 +    }
++#endif
 +
-+    StubRoutines::_oop_disjoint_arraycopy            = StubRoutines::_arrayof_oop_disjoint_arraycopy;
-+    StubRoutines::_oop_arraycopy                     = StubRoutines::_arrayof_oop_arraycopy;
-+    StubRoutines::_oop_disjoint_arraycopy_uninit     = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
-+    StubRoutines::_oop_arraycopy_uninit              = StubRoutines::_arrayof_oop_arraycopy_uninit;
++    // Check if the oop is in the right area of memory
++    __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask());
++    __ andr(c_rarg2, x10, c_rarg3);
++    __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits());
 +
-+    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
-+    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
-+                                                                        /*dest_uninitialized*/true);
++    // Compare c_rarg2 and c_rarg3.
++    __ bne(c_rarg2, c_rarg3, error);
 +
++    // make sure klass is 'reasonable', which is not zero.
++    __ load_klass(x10, x10);  // get klass
++    __ beqz(x10, error);      // if klass is NULL it is broken
 +
-+    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
-+                                                              entry_jbyte_arraycopy,
-+                                                              entry_jshort_arraycopy,
-+                                                              entry_jint_arraycopy,
-+                                                              entry_jlong_arraycopy);
++    // return if everything seems ok
++    __ bind(exit);
 +
-+    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
-+                                                               entry_jbyte_arraycopy,
-+                                                               entry_jshort_arraycopy,
-+                                                               entry_jint_arraycopy,
-+                                                               entry_oop_arraycopy,
-+                                                               entry_jlong_arraycopy,
-+                                                               entry_checkcast_arraycopy);
++    __ pop_reg(0x3000, sp);   // pop c_rarg2 and c_rarg3
++    __ ret();
 +
-+    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
-+    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
-+    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
-+    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
-+    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
-+    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
++    // handle errors
++    __ bind(error);
++    __ pop_reg(0x3000, sp);   // pop c_rarg2 and c_rarg3
++
++    __ pusha();
++    // debug(char* msg, int64_t pc, int64_t regs[])
++    __ mv(c_rarg0, t0);             // pass address of error message
++    __ mv(c_rarg1, ra);             // pass return address
++    __ mv(c_rarg2, sp);             // pass address of regs on stack
++#ifndef PRODUCT
++    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
++    BLOCK_COMMENT("call MacroAssembler::debug");
++    int32_t offset = 0;
++    __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset);
++    __ jalr(x1, t0, offset);
++    __ ebreak();
++
++    return start;
 +  }
 +
-+  // Safefetch stubs.
-+  void generate_safefetch(const char* name, int size, address* entry,
-+                          address* fault_pc, address* continuation_pc) {
-+    // safefetch signatures:
-+    //   int      SafeFetch32(int*      adr, int      errValue)
-+    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue)
-+    //
-+    // arguments:
-+    //   c_rarg0 = adr
-+    //   c_rarg1 = errValue
-+    //
-+    // result:
-+    //   PPC_RET  = *adr or errValue
-+    assert_cond(entry != NULL && fault_pc != NULL && continuation_pc != NULL);
-+    StubCodeMark mark(this, "StubRoutines", name);
++  // The inner part of zero_words().
++  //
++  // Inputs:
++  // x28: the HeapWord-aligned base address of an array to zero.
++  // x29: the count in HeapWords, x29 > 0.
++  //
++  // Returns x28 and x29, adjusted for the caller to clear.
++  // x28: the base address of the tail of words left to clear.
++  // x29: the number of words in the tail.
++  //      x29 < MacroAssembler::zero_words_block_size.
 +
-+    // Entry point, pc or function descriptor.
-+    *entry = __ pc();
++  address generate_zero_blocks() {
++    Label done;
 +
-+    // Load *adr into c_rarg1, may fault.
-+    *fault_pc = __ pc();
-+    switch (size) {
-+      case 4:
-+        // int32_t
-+        __ lw(c_rarg1, Address(c_rarg0, 0));
-+        break;
-+      case 8:
-+        // int64_t
-+        __ ld(c_rarg1, Address(c_rarg0, 0));
-+        break;
-+      default:
-+        ShouldNotReachHere();
++    const Register base = x28, cnt = x29;
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "zero_blocks");
++    address start = __ pc();
++
++    {
++      // Clear the remaining blocks.
++      Label loop;
++      __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
++      __ bltz(cnt, done);
++      __ bind(loop);
++      for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) {
++        __ sd(zr, Address(base, 0));
++        __ add(base, base, 8);
++      }
++      __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
++      __ bgez(cnt, loop);
++      __ bind(done);
++      __ add(cnt, cnt, MacroAssembler::zero_words_block_size);
 +    }
 +
-+    // return errValue or *adr
-+    *continuation_pc = __ pc();
-+    __ mv(x10, c_rarg1);
 +    __ ret();
-+  }
 +
-+#ifdef COMPILER2
-+  // code for comparing 16 bytes of strings with same encoding
-+  void compare_string_16_bytes_same(Label& DIFF1, Label& DIFF2) {
-+    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
-+    __ ld(tmp5, Address(str1));
-+    __ addi(str1, str1, wordSize);
-+    __ xorr(tmp4, tmp1, tmp2);
-+    __ ld(cnt1, Address(str2));
-+    __ addi(str2, str2, wordSize);
-+    __ bnez(tmp4, DIFF1);
-+    __ ld(tmp1, Address(str1));
-+    __ addi(str1, str1, wordSize);
-+    __ xorr(tmp4, tmp5, cnt1);
-+    __ ld(tmp2, Address(str2));
-+    __ addi(str2, str2, wordSize);
-+    __ bnez(tmp4, DIFF2);
++    return start;
 +  }
 +
-+  // code for comparing 8 characters of strings with Latin1 and Utf16 encoding
-+  void compare_string_8_x_LU(Register tmpL, Register tmpU, Register strL, Register strU, Label& DIFF) {
-+    const Register tmp = x30;
-+    __ ld(tmpL, Address(strL));
-+    __ addi(strL, strL, wordSize);
-+    __ ld(tmpU, Address(strU));
-+    __ addi(strU, strU, wordSize);
-+    __ inflate_lo32(tmp, tmpL);
-+    __ mv(t0, tmp);
-+    __ xorr(tmp, tmpU, t0);
-+    __ bnez(tmp, DIFF);
++  typedef enum {
++    copy_forwards = 1,
++    copy_backwards = -1
++  } copy_direction;
 +
-+    __ ld(tmpU, Address(strU));
-+    __ addi(strU, strU, wordSize);
-+    __ inflate_hi32(tmp, tmpL);
-+    __ mv(t0, tmp);
-+    __ xorr(tmp, tmpU, t0);
-+    __ bnez(tmp, DIFF);
-+  }
++  // Bulk copy of blocks of 8 words.
++  //
++  // count is a count of words.
++  //
++  // Precondition: count >= 8
++  //
++  // Postconditions:
++  //
++  // The least significant bit of count contains the remaining count
++  // of words to copy.  The rest of count is trash.
++  //
++  // s and d are adjusted to point to the remaining words to copy
++  //
++  void generate_copy_longs(Label &start, Register s, Register d, Register count,
++                           copy_direction direction) {
++    int unit = wordSize * direction;
++    int bias = wordSize;
 +
-+  // x10  = result
-+  // x11  = str1
-+  // x12  = cnt1
-+  // x13  = str2
-+  // x14  = cnt2
-+  // x28  = tmp1
-+  // x29  = tmp2
-+  // x30  = tmp3
-+  address generate_compare_long_string_different_encoding(bool isLU) {
++    const Register tmp_reg0 = x13, tmp_reg1 = x14, tmp_reg2 = x15, tmp_reg3 = x16,
++      tmp_reg4 = x17, tmp_reg5 = x7, tmp_reg6 = x28, tmp_reg7 = x29;
++
++    const Register stride = x30;
++
++    assert_different_registers(t0, tmp_reg0, tmp_reg1, tmp_reg2, tmp_reg3,
++      tmp_reg4, tmp_reg5, tmp_reg6, tmp_reg7);
++    assert_different_registers(s, d, count, t0);
++
++    Label again, drain;
++    const char* stub_name = NULL;
++    if (direction == copy_forwards) {
++      stub_name = "forward_copy_longs";
++    } else {
++      stub_name = "backward_copy_longs";
++    }
++    StubCodeMark mark(this, "StubRoutines", stub_name);
 +    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", isLU ? "compare_long_string_different_encoding LU" : "compare_long_string_different_encoding UL");
-+    address entry = __ pc();
-+    Label SMALL_LOOP, TAIL, LOAD_LAST, DIFF, DONE, CALCULATE_DIFFERENCE;
-+    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
-+                   tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
-+    RegSet spilled_regs = RegSet::of(tmp4, tmp5);
++    __ bind(start);
 +
-+    // cnt2 == amount of characters left to compare
-+    // Check already loaded first 4 symbols
-+    __ inflate_lo32(tmp3, isLU ? tmp1 : tmp2);
-+    __ mv(isLU ? tmp1 : tmp2, tmp3);
-+    __ addi(str1, str1, isLU ? wordSize / 2 : wordSize);
-+    __ addi(str2, str2, isLU ? wordSize : wordSize / 2);
-+    __ sub(cnt2, cnt2, wordSize / 2); // Already loaded 4 symbols.
-+    __ push_reg(spilled_regs, sp);
++    if (direction == copy_forwards) {
++      __ sub(s, s, bias);
++      __ sub(d, d, bias);
++    }
 +
-+    __ xorr(tmp3, tmp1, tmp2);
-+    __ mv(tmp5, tmp2);
-+    __ bnez(tmp3, CALCULATE_DIFFERENCE);
++#ifdef ASSERT
++    // Make sure we are never given < 8 words
++    {
++      Label L;
 +
-+    Register strU = isLU ? str2 : str1,
-+             strL = isLU ? str1 : str2,
-+             tmpU = isLU ? tmp5 : tmp1, // where to keep U for comparison
-+             tmpL = isLU ? tmp1 : tmp5; // where to keep L for comparison
++      __ li(t0, 8);
++      __ bge(count, t0, L);
++      __ stop("genrate_copy_longs called with < 8 words");
++      __ bind(L);
++    }
++#endif
 +
-+    // make sure main loop is byte-aligned, we should load another 4 bytes from strL
-+    __ beqz(cnt2, DONE);  // no characters left
-+    __ lwu(tmpL, Address(strL));
-+    __ addi(strL, strL, wordSize / 2);
-+    __ ld(tmpU, Address(strU));
-+    __ addi(strU, strU, wordSize);
-+    __ inflate_lo32(tmp3, tmpL);
-+    __ mv(tmpL, tmp3);
-+    __ xorr(tmp3, tmpU, tmpL);
-+    __ bnez(tmp3, CALCULATE_DIFFERENCE);
-+    __ addi(cnt2, cnt2, -wordSize / 2);
++    __ ld(tmp_reg0, Address(s, 1 * unit));
++    __ ld(tmp_reg1, Address(s, 2 * unit));
++    __ ld(tmp_reg2, Address(s, 3 * unit));
++    __ ld(tmp_reg3, Address(s, 4 * unit));
++    __ ld(tmp_reg4, Address(s, 5 * unit));
++    __ ld(tmp_reg5, Address(s, 6 * unit));
++    __ ld(tmp_reg6, Address(s, 7 * unit));
++    __ ld(tmp_reg7, Address(s, 8 * unit));
++    __ addi(s, s, 8 * unit);
++
++    __ sub(count, count, 16);
++    __ bltz(count, drain);
++
++    __ bind(again);
++
++    __ sd(tmp_reg0, Address(d, 1 * unit));
++    __ sd(tmp_reg1, Address(d, 2 * unit));
++    __ sd(tmp_reg2, Address(d, 3 * unit));
++    __ sd(tmp_reg3, Address(d, 4 * unit));
++    __ sd(tmp_reg4, Address(d, 5 * unit));
++    __ sd(tmp_reg5, Address(d, 6 * unit));
++    __ sd(tmp_reg6, Address(d, 7 * unit));
++    __ sd(tmp_reg7, Address(d, 8 * unit));
++
++    __ ld(tmp_reg0, Address(s, 1 * unit));
++    __ ld(tmp_reg1, Address(s, 2 * unit));
++    __ ld(tmp_reg2, Address(s, 3 * unit));
++    __ ld(tmp_reg3, Address(s, 4 * unit));
++    __ ld(tmp_reg4, Address(s, 5 * unit));
++    __ ld(tmp_reg5, Address(s, 6 * unit));
++    __ ld(tmp_reg6, Address(s, 7 * unit));
++    __ ld(tmp_reg7, Address(s, 8 * unit));
++
++    __ addi(s, s, 8 * unit);
++    __ addi(d, d, 8 * unit);
++
++    __ sub(count, count, 8);
++    __ bgez(count, again);
++
++    // Drain
++    __ bind(drain);
++
++    __ sd(tmp_reg0, Address(d, 1 * unit));
++    __ sd(tmp_reg1, Address(d, 2 * unit));
++    __ sd(tmp_reg2, Address(d, 3 * unit));
++    __ sd(tmp_reg3, Address(d, 4 * unit));
++    __ sd(tmp_reg4, Address(d, 5 * unit));
++    __ sd(tmp_reg5, Address(d, 6 * unit));
++    __ sd(tmp_reg6, Address(d, 7 * unit));
++    __ sd(tmp_reg7, Address(d, 8 * unit));
++    __ addi(d, d, 8 * unit);
 +
-+    __ beqz(cnt2, DONE);  // no character left
-+    __ sub(cnt2, cnt2, wordSize * 2);
-+    __ bltz(cnt2, TAIL);
-+    __ bind(SMALL_LOOP); // smaller loop
-+      __ sub(cnt2, cnt2, wordSize * 2);
-+      compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF);
-+      compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF);
-+      __ bgez(cnt2, SMALL_LOOP);
-+      __ addi(t0, cnt2, wordSize * 2);
-+      __ beqz(t0, DONE);
-+    __ bind(TAIL);  // 1..15 characters left
-+      if (AvoidUnalignedAccesses) {
-+        // Aligned access. Load bytes from byte-aligned address,
-+        // which may contain invalid bytes in last load.
-+        // Invalid bytes should be removed before comparison.
-+        Label LOAD_LAST, WORD_CMP;
-+        __ addi(t0, cnt2, wordSize);
-+        __ bgtz(t0, LOAD_LAST);
-+        // remaining characters is greater than or equals to 8, we can do one compare_string_8_x_LU
-+        compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF);
-+        __ addi(cnt2, cnt2, wordSize);
-+        __ beqz(cnt2, DONE);  // no character left
-+        __ bind(LOAD_LAST);   // 1..7 characters left
-+        __ lwu(tmpL, Address(strL));
-+        __ addi(strL, strL, wordSize / 2);
-+        __ ld(tmpU, Address(strU));
-+        __ addi(strU, strU, wordSize);
-+        __ inflate_lo32(tmp3, tmpL);
-+        __ mv(tmpL, tmp3);
-+        __ addi(t0, cnt2, wordSize / 2);
-+        __ blez(t0, WORD_CMP);
-+        __ slli(t0, t0, 1); // now in bytes
-+        __ slli(t0, t0, LogBitsPerByte);
-+        __ sll(tmpL, tmpL, t0);
-+        __ sll(tmpU, tmpU, t0);
-+        // remaining characters is greater than or equals to 4, we can do one full 4-byte comparison
-+        __ bind(WORD_CMP);
-+        __ xorr(tmp3, tmpU, tmpL);
-+        __ bnez(tmp3, CALCULATE_DIFFERENCE);
-+        __ addi(cnt2, cnt2, wordSize / 2);
-+        __ bltz(cnt2, LOAD_LAST); // 1..3 characters left
-+        __ j(DONE); // no character left
++    {
++      Label L1, L2;
++      __ andi(t0, count, 4);
++      __ beqz(t0, L1);
++
++      __ ld(tmp_reg0, Address(s, 1 * unit));
++      __ ld(tmp_reg1, Address(s, 2 * unit));
++      __ ld(tmp_reg2, Address(s, 3 * unit));
++      __ ld(tmp_reg3, Address(s, 4 * unit));
++      __ addi(s, s, 4 * unit);
++
++      __ sd(tmp_reg0, Address(d, 1 * unit));
++      __ sd(tmp_reg1, Address(d, 2 * unit));
++      __ sd(tmp_reg2, Address(d, 3 * unit));
++      __ sd(tmp_reg3, Address(d, 4 * unit));
++      __ addi(d, d, 4 * unit);
++
++      __ bind(L1);
++
++      if (direction == copy_forwards) {
++        __ addi(s, s, bias);
++        __ addi(d, d, bias);
++      }
++
++      __ andi(t0, count, 2);
++      __ beqz(t0, L2);
++      if (direction == copy_backwards) {
++        __ addi(s, s, 2 * unit);
++        __ ld(tmp_reg0, Address(s));
++        __ ld(tmp_reg1, Address(s, wordSize));
++        __ addi(d, d, 2 * unit);
++        __ sd(tmp_reg0, Address(d));
++        __ sd(tmp_reg1, Address(d, wordSize));
 +      } else {
-+        // Unaligned accesses. Load from non-byte aligned address.
-+        __ shadd(strU, cnt2, strU, t0, 1); // convert cnt2 into bytes and get Address of last 8 bytes in UTF-16 string
-+        __ add(strL, strL, cnt2); // Address of last 16 bytes in Latin1 string
-+        // last 16 characters
-+        compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF);
-+        compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF);
-+        __ j(DONE);
++        __ ld(tmp_reg0, Address(s));
++        __ ld(tmp_reg1, Address(s, wordSize));
++        __ addi(s, s, 2 * unit);
++        __ sd(tmp_reg0, Address(d));
++        __ sd(tmp_reg1, Address(d, wordSize));
++        __ addi(d, d, 2 * unit);
 +      }
-+    __ bind(DIFF);
-+      __ mv(tmpL, t0);
-+      // Find the first different characters in the longwords and
-+      // compute their difference.
-+    __ bind(CALCULATE_DIFFERENCE);
-+      __ ctzc_bit(tmp4, tmp3);
-+      __ srl(tmp1, tmp1, tmp4);
-+      __ srl(tmp5, tmp5, tmp4);
-+      __ andi(tmp1, tmp1, 0xFFFF);
-+      __ andi(tmp5, tmp5, 0xFFFF);
-+      __ sub(result, tmp1, tmp5);
-+    __ bind(DONE);
-+      __ pop_reg(spilled_regs, sp);
-+      __ ret();
-+    return entry;
++      __ bind(L2);
++    }
++
++    __ ret();
 +  }
 +
-+  // x10  = result
-+  // x11  = str1
-+  // x12  = cnt1
-+  // x13  = str2
-+  // x14  = cnt2
-+  // x28  = tmp1
-+  // x29  = tmp2
-+  // x30  = tmp3
-+  // x31  = tmp4
-+  address generate_compare_long_string_same_encoding(bool isLL) {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", isLL ?
-+                      "compare_long_string_same_encoding LL" : "compare_long_string_same_encoding UU");
-+    address entry = __ pc();
-+    Label SMALL_LOOP, CHECK_LAST, DIFF2, TAIL,
-+          LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF;
-+    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
-+                   tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
-+    RegSet spilled_regs = RegSet::of(tmp4, tmp5);
++  Label copy_f, copy_b;
 +
-+    // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
-+    // update cnt2 counter with already loaded 8 bytes
-+    __ sub(cnt2, cnt2, wordSize / (isLL ? 1 : 2));
-+    // update pointers, because of previous read
-+    __ add(str1, str1, wordSize);
-+    __ add(str2, str2, wordSize);
-+    // less than 16 bytes left?
-+    __ sub(cnt2, cnt2, isLL ? 2 * wordSize : wordSize);
-+    __ push_reg(spilled_regs, sp);
-+    __ bltz(cnt2, TAIL);
-+    __ bind(SMALL_LOOP);
-+      compare_string_16_bytes_same(DIFF, DIFF2);
-+      __ sub(cnt2, cnt2, isLL ? 2 * wordSize : wordSize);
-+      __ bgez(cnt2, SMALL_LOOP);
-+    __ bind(TAIL);
-+      __ addi(cnt2, cnt2, isLL ? 2 * wordSize : wordSize);
-+      __ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF);
-+      __ sub(cnt2, cnt2, isLL ? wordSize : wordSize / 2);
-+      __ blez(cnt2, CHECK_LAST);
-+      __ xorr(tmp4, tmp1, tmp2);
-+      __ bnez(tmp4, DIFF);
-+      __ ld(tmp1, Address(str1));
-+      __ addi(str1, str1, wordSize);
-+      __ ld(tmp2, Address(str2));
-+      __ addi(str2, str2, wordSize);
-+      __ sub(cnt2, cnt2, isLL ? wordSize : wordSize / 2);
-+    __ bind(CHECK_LAST);
-+      if (!isLL) {
-+        __ add(cnt2, cnt2, cnt2); // now in bytes
-+      }
-+      __ xorr(tmp4, tmp1, tmp2);
-+      __ bnez(tmp4, DIFF);
-+      if (AvoidUnalignedAccesses) {
-+        // Aligned access. Load bytes from byte-aligned address,
-+        // which may contain invalid bytes in last load.
-+        // Invalid bytes should be removed before comparison.
-+        __ ld(tmp5, Address(str1));
-+        __ ld(cnt1, Address(str2));
-+        __ neg(cnt2, cnt2);
-+        __ slli(cnt2, cnt2, LogBitsPerByte);
-+        __ sll(tmp5, tmp5, cnt2);
-+        __ sll(cnt1, cnt1, cnt2);
-+      } else {
-+        // Unaligned access. Load from non-byte aligned address.
-+        __ add(str1, str1, cnt2);
-+        __ ld(tmp5, Address(str1));
-+        __ add(str2, str2, cnt2);
-+        __ ld(cnt1, Address(str2));
-+      }
++  // All-singing all-dancing memory copy.
++  //
++  // Copy count units of memory from s to d.  The size of a unit is
++  // step, which can be positive or negative depending on the direction
++  // of copy.  If is_aligned is false, we align the source address.
++  //
++  /*
++   * if (is_aligned) {
++   *   goto copy_8_bytes;
++   * }
++   * bool is_backwards = step < 0;
++   * int granularity = uabs(step);
++   * count = count  *  granularity;   * count bytes
++   *
++   * if (is_backwards) {
++   *   s += count;
++   *   d += count;
++   * }
++   *
++   * count limit maybe greater than 16, for better performance
++   * if (count < 16) {
++   *   goto copy_small;
++   * }
++   *
++   * if ((dst % 8) == (src % 8)) {
++   *   aligned;
++   *   goto copy8;
++   * }
++   *
++   * copy_small:
++   *   load element one by one;
++   * done;
++   */
 +
-+      __ xorr(tmp4, tmp5, cnt1);
-+      __ beqz(tmp4, LENGTH_DIFF);
-+      // Find the first different characters in the longwords and
-+      // compute their difference.
-+    __ bind(DIFF2);
-+      __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
-+      __ srl(tmp5, tmp5, tmp3);
-+      __ srl(cnt1, cnt1, tmp3);
-+      if (isLL) {
-+        __ andi(tmp5, tmp5, 0xFF);
-+        __ andi(cnt1, cnt1, 0xFF);
-+      } else {
-+        __ andi(tmp5, tmp5, 0xFFFF);
-+        __ andi(cnt1, cnt1, 0xFFFF);
-+      }
-+      __ sub(result, tmp5, cnt1);
-+      __ j(LENGTH_DIFF);
-+    __ bind(DIFF);
-+      __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
-+      __ srl(tmp1, tmp1, tmp3);
-+      __ srl(tmp2, tmp2, tmp3);
-+      if (isLL) {
-+        __ andi(tmp1, tmp1, 0xFF);
-+        __ andi(tmp2, tmp2, 0xFF);
-+      } else {
-+        __ andi(tmp1, tmp1, 0xFFFF);
-+        __ andi(tmp2, tmp2, 0xFFFF);
-+      }
-+      __ sub(result, tmp1, tmp2);
-+      __ j(LENGTH_DIFF);
-+    __ bind(LAST_CHECK_AND_LENGTH_DIFF);
-+      __ xorr(tmp4, tmp1, tmp2);
-+      __ bnez(tmp4, DIFF);
-+    __ bind(LENGTH_DIFF);
-+      __ pop_reg(spilled_regs, sp);
-+      __ ret();
-+    return entry;
-+  }
++  typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp);
 +
-+  void generate_compare_long_strings() {
-+    StubRoutines::riscv::_compare_long_string_LL = generate_compare_long_string_same_encoding(true);
-+    StubRoutines::riscv::_compare_long_string_UU = generate_compare_long_string_same_encoding(false);
-+    StubRoutines::riscv::_compare_long_string_LU = generate_compare_long_string_different_encoding(true);
-+    StubRoutines::riscv::_compare_long_string_UL = generate_compare_long_string_different_encoding(false);
-+  }
++  void copy_memory_v(Register s, Register d, Register count, Register tmp, int step) {
++    bool is_backward = step < 0;
++    int granularity = uabs(step);
 +
-+  // x10 result
-+  // x11 src
-+  // x12 src count
-+  // x13 pattern
-+  // x14 pattern count
-+  address generate_string_indexof_linear(bool needle_isL, bool haystack_isL)
-+  {
-+    const char* stubName = needle_isL
-+           ? (haystack_isL ? "indexof_linear_ll" : "indexof_linear_ul")
-+           : "indexof_linear_uu";
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", stubName);
-+    address entry = __ pc();
++    const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17;
++    assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2);
++    Assembler::SEW sew = Assembler::elembytes_to_sew(granularity);
++    Label loop_forward, loop_backward, done;
 +
-+    int needle_chr_size = needle_isL ? 1 : 2;
-+    int haystack_chr_size = haystack_isL ? 1 : 2;
-+    int needle_chr_shift = needle_isL ? 0 : 1;
-+    int haystack_chr_shift = haystack_isL ? 0 : 1;
-+    bool isL = needle_isL && haystack_isL;
-+    // parameters
-+    Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14;
-+    // temporary registers
-+    Register mask1 = x20, match_mask = x21, first = x22, trailing_zero = x23, mask2 = x24, tmp = x25;
-+    // redefinitions
-+    Register ch1 = x28, ch2 = x29;
-+    RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29);
++    __ mv(dst, d);
++    __ mv(src, s);
++    __ mv(cnt, count);
 +
-+    __ push_reg(spilled_regs, sp);
++    __ bind(loop_forward);
++    __ vsetvli(vl, cnt, sew, Assembler::m8);
++    if (is_backward) {
++      __ bne(vl, cnt, loop_backward);
++    }
 +
-+    Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO,
-+          L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED,
-+          L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP,
-+          L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH,
-+          L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2,
-+          L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH;
++    __ vlex_v(v0, src, sew);
++    __ sub(cnt, cnt, vl);
++    __ slli(vl, vl, (int)sew);
++    __ add(src, src, vl);
 +
-+    __ ld(ch1, Address(needle));
-+    __ ld(ch2, Address(haystack));
-+    // src.length - pattern.length
-+    __ sub(haystack_len, haystack_len, needle_len);
++    __ vsex_v(v0, dst, sew);
++    __ add(dst, dst, vl);
++    __ bnez(cnt, loop_forward);
 +
-+    // first is needle[0]
-+    __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first);
-+    __ mv(mask1, haystack_isL ? 0x0101010101010101 : 0x0001000100010001);
-+    __ mul(first, first, mask1);
-+    __ mv(mask2, haystack_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
-+    if (needle_isL != haystack_isL) {
-+      __ mv(tmp, ch1);
-+    }
-+    __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size - 1);
-+    __ blez(haystack_len, L_SMALL);
++    if (is_backward) {
++      __ j(done);
 +
-+    if (needle_isL != haystack_isL) {
-+      __ inflate_lo32(ch1, tmp, match_mask, trailing_zero);
++      __ bind(loop_backward);
++      __ sub(tmp, cnt, vl);
++      __ slli(tmp, tmp, sew);
++      __ add(tmp1, s, tmp);
++      __ vlex_v(v0, tmp1, sew);
++      __ add(tmp2, d, tmp);
++      __ vsex_v(v0, tmp2, sew);
++      __ sub(cnt, cnt, vl);
++      __ bnez(cnt, loop_forward);
++      __ bind(done);
 +    }
-+    // xorr, sub, orr, notr, andr
-+    // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i]
-+    // eg:
-+    // first:        aa aa aa aa aa aa aa aa
-+    // ch2:          aa aa li nx jd ka aa aa
-+    // match_mask:   80 80 00 00 00 00 80 80
-+    __ compute_match_mask(ch2, first, match_mask, mask1, mask2);
++  }
 +
-+    // search first char of needle, if success, goto L_HAS_ZERO;
-+    __ bnez(match_mask, L_HAS_ZERO);
-+    __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
-+    __ add(result, result, wordSize / haystack_chr_size);
-+    __ add(haystack, haystack, wordSize);
-+    __ bltz(haystack_len, L_POST_LOOP);
++  void copy_memory(bool is_aligned, Register s, Register d,
++                   Register count, Register tmp, int step) {
++    if (UseRVV) {
++      return copy_memory_v(s, d, count, tmp, step);
++    }
 +
-+    __ bind(L_LOOP);
-+    __ ld(ch2, Address(haystack));
-+    __ compute_match_mask(ch2, first, match_mask, mask1, mask2);
-+    __ bnez(match_mask, L_HAS_ZERO);
++    bool is_backwards = step < 0;
++    int granularity = uabs(step);
 +
-+    __ bind(L_LOOP_PROCEED);
-+    __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
-+    __ add(haystack, haystack, wordSize);
-+    __ add(result, result, wordSize / haystack_chr_size);
-+    __ bgez(haystack_len, L_LOOP);
++    const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17;
 +
-+    __ bind(L_POST_LOOP);
-+    __ mv(ch2, -wordSize / haystack_chr_size);
-+    __ ble(haystack_len, ch2, NOMATCH); // no extra characters to check
-+    __ ld(ch2, Address(haystack));
-+    __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
-+    __ neg(haystack_len, haystack_len);
-+    __ xorr(ch2, first, ch2);
-+    __ sub(match_mask, ch2, mask1);
-+    __ orr(ch2, ch2, mask2);
-+    __ mv(trailing_zero, -1); // all bits set
-+    __ j(L_SMALL_PROCEED);
++    Label same_aligned;
++    Label copy8, copy_small, done;
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(L_SMALL);
-+    __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
-+    __ neg(haystack_len, haystack_len);
-+    if (needle_isL != haystack_isL) {
-+      __ inflate_lo32(ch1, tmp, match_mask, trailing_zero);
++    copy_insn ld_arr = NULL, st_arr = NULL;
++    switch (granularity) {
++      case 1 :
++        ld_arr = (copy_insn)&MacroAssembler::lbu;
++        st_arr = (copy_insn)&MacroAssembler::sb;
++        break;
++      case 2 :
++        ld_arr = (copy_insn)&MacroAssembler::lhu;
++        st_arr = (copy_insn)&MacroAssembler::sh;
++        break;
++      case 4 :
++        ld_arr = (copy_insn)&MacroAssembler::lwu;
++        st_arr = (copy_insn)&MacroAssembler::sw;
++        break;
++      case 8 :
++        ld_arr = (copy_insn)&MacroAssembler::ld;
++        st_arr = (copy_insn)&MacroAssembler::sd;
++        break;
++      default :
++        ShouldNotReachHere();
 +    }
-+    __ xorr(ch2, first, ch2);
-+    __ sub(match_mask, ch2, mask1);
-+    __ orr(ch2, ch2, mask2);
-+    __ mv(trailing_zero, -1); // all bits set
 +
-+    __ bind(L_SMALL_PROCEED);
-+    __ srl(trailing_zero, trailing_zero, haystack_len); // mask. zeroes on useless bits.
-+    __ notr(ch2, ch2);
-+    __ andr(match_mask, match_mask, ch2);
-+    __ andr(match_mask, match_mask, trailing_zero); // clear useless bits and check
-+    __ beqz(match_mask, NOMATCH);
++    __ beqz(count, done);
++    __ slli(cnt, count, exact_log2(granularity));
++    if (is_backwards) {
++      __ add(src, s, cnt);
++      __ add(dst, d, cnt);
++    } else {
++      __ mv(src, s);
++      __ mv(dst, d);
++    }
 +
-+    __ bind(L_SMALL_HAS_ZERO_LOOP);
-+    __ ctzc_bit(trailing_zero, match_mask, haystack_isL, ch2, tmp); // count trailing zeros
-+    __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
-+    __ mv(ch2, wordSize / haystack_chr_size);
-+    __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2);
-+    __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
-+    __ mv(trailing_zero, wordSize / haystack_chr_size);
-+    __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
++    if (is_aligned) {
++      __ addi(tmp, cnt, -8);
++      __ bgez(tmp, copy8);
++      __ j(copy_small);
++    }
 +
-+    __ bind(L_SMALL_CMP_LOOP);
-+    __ shadd(first, trailing_zero, needle, first, needle_chr_shift);
-+    __ shadd(ch2, trailing_zero, haystack, ch2, haystack_chr_shift);
-+    needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first));
-+    haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
-+    __ add(trailing_zero, trailing_zero, 1);
-+    __ bge(trailing_zero, needle_len, L_SMALL_CMP_LOOP_LAST_CMP);
-+    __ beq(first, ch2, L_SMALL_CMP_LOOP);
++    __ mv(tmp, 16);
++    __ blt(cnt, tmp, copy_small);
 +
-+    __ bind(L_SMALL_CMP_LOOP_NOMATCH);
-+    __ beqz(match_mask, NOMATCH);
-+    __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2);
-+    __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
-+    __ add(result, result, 1);
-+    __ add(haystack, haystack, haystack_chr_size);
-+    __ j(L_SMALL_HAS_ZERO_LOOP);
++    __ xorr(tmp, src, dst);
++    __ andi(tmp, tmp, 0b111);
++    __ bnez(tmp, copy_small);
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(L_SMALL_CMP_LOOP_LAST_CMP);
-+    __ bne(first, ch2, L_SMALL_CMP_LOOP_NOMATCH);
-+    __ j(DONE);
++    __ bind(same_aligned);
++    __ andi(tmp, src, 0b111);
++    __ beqz(tmp, copy8);
++    if (is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
++    }
++    (_masm->*ld_arr)(tmp3, Address(src), t0);
++    (_masm->*st_arr)(tmp3, Address(dst), t0);
++    if (!is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
++    }
++    __ addi(cnt, cnt, -granularity);
++    __ beqz(cnt, done);
++    __ j(same_aligned);
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(L_SMALL_CMP_LOOP_LAST_CMP2);
-+    __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
-+    __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
-+    __ j(DONE);
++    __ bind(copy8);
++    if (is_backwards) {
++      __ addi(src, src, -wordSize);
++      __ addi(dst, dst, -wordSize);
++    }
++    __ ld(tmp3, Address(src));
++    __ sd(tmp3, Address(dst));
++    if (!is_backwards) {
++      __ addi(src, src, wordSize);
++      __ addi(dst, dst, wordSize);
++    }
++    __ addi(cnt, cnt, -wordSize);
++    __ addi(tmp4, cnt, -8);
++    __ bgez(tmp4, copy8); // cnt >= 8, do next loop
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(L_HAS_ZERO);
-+    __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2);
-+    __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
-+    __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2);
-+    __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits)
-+    __ sub(result, result, 1); // array index from 0, so result -= 1
++    __ beqz(cnt, done);
 +
-+    __ bind(L_HAS_ZERO_LOOP);
-+    __ mv(needle_len, wordSize / haystack_chr_size);
-+    __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2);
-+    __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2);
-+    // load next 8 bytes from haystack, and increase result index
-+    __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
-+    __ add(result, result, 1);
-+    __ mv(trailing_zero, wordSize / haystack_chr_size);
-+    __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
++    __ bind(copy_small);
++    if (is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
++    }
++    (_masm->*ld_arr)(tmp3, Address(src), t0);
++    (_masm->*st_arr)(tmp3, Address(dst), t0);
++    if (!is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
++    }
++    __ addi(cnt, cnt, -granularity);
++    __ bgtz(cnt, copy_small);
 +
-+    // compare one char
-+    __ bind(L_CMP_LOOP);
-+    __ shadd(needle_len, trailing_zero, needle, needle_len, needle_chr_shift);
-+    needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len));
-+    __ shadd(ch2, trailing_zero, haystack, ch2, haystack_chr_shift);
-+    haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
-+    __ add(trailing_zero, trailing_zero, 1); // next char index
-+    __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2);
-+    __ bge(trailing_zero, tmp, L_CMP_LOOP_LAST_CMP);
-+    __ beq(needle_len, ch2, L_CMP_LOOP);
++    __ bind(done);
++  }
 +
-+    __ bind(L_CMP_LOOP_NOMATCH);
-+    __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH);
-+    __ ctzc_bit(trailing_zero, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index
-+    __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
-+    __ add(haystack, haystack, haystack_chr_size);
-+    __ j(L_HAS_ZERO_LOOP);
++  // Scan over array at a for count oops, verifying each one.
++  // Preserves a and count, clobbers t0 and t1.
++  void verify_oop_array(size_t size, Register a, Register count, Register temp) {
++    Label loop, end;
++    __ mv(t1, zr);
++    __ slli(t0, count, exact_log2(size));
++    __ bind(loop);
++    __ bgeu(t1, t0, end);
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(L_CMP_LOOP_LAST_CMP);
-+    __ bne(needle_len, ch2, L_CMP_LOOP_NOMATCH);
-+    __ j(DONE);
++    __ add(temp, a, t1);
++    if (size == (size_t)wordSize) {
++      __ ld(temp, Address(temp, 0));
++      __ verify_oop(temp);
++    } else {
++      __ lwu(temp, Address(temp, 0));
++      __ decode_heap_oop(temp); // calls verify_oop
++    }
++    __ add(t1, t1, size);
++    __ j(loop);
++    __ bind(end);
++  }
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(L_CMP_LOOP_LAST_CMP2);
-+    __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
-+    __ add(result, result, 1);
-+    __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
-+    __ j(DONE);
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry,
++                                 const char* name, bool dest_uninitialized = false) {
++    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
++    RegSet saved_reg = RegSet::of(s, d, count);
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++    __ enter();
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(L_HAS_ZERO_LOOP_NOMATCH);
-+    // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until
-+    // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP,
-+    // so, result was increased at max by wordSize/str2_chr_size - 1, so,
-+    // respective high bit wasn't changed. L_LOOP_PROCEED will increase
-+    // result by analyzed characters value, so, we can just reset lower bits
-+    // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL
-+    // 2) restore needle_len and haystack_len values from "compressed" haystack_len
-+    // 3) advance haystack value to represent next haystack octet. result & 7/3 is
-+    // index of last analyzed substring inside current octet. So, haystack in at
-+    // respective start address. We need to advance it to next octet
-+    __ andi(match_mask, result, wordSize / haystack_chr_size - 1);
-+    __ srli(needle_len, haystack_len, BitsPerByte * wordSize / 2);
-+    __ andi(result, result, haystack_isL ? -8 : -4);
-+    __ slli(tmp, match_mask, haystack_chr_shift);
-+    __ sub(haystack, haystack, tmp);
-+    __ addw(haystack_len, haystack_len, zr);
-+    __ j(L_LOOP_PROCEED);
++    if (entry != NULL) {
++      *entry = __ pc();
++      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
++      BLOCK_COMMENT("Entry:");
++    }
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(NOMATCH);
-+    __ mv(result, -1);
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
 +
-+    __ bind(DONE);
-+    __ pop_reg(spilled_regs, sp);
-+    __ ret();
-+    return entry;
-+  }
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg);
 +
-+  void generate_string_indexof_stubs()
-+  {
-+    StubRoutines::riscv::_string_indexof_linear_ll = generate_string_indexof_linear(true, true);
-+    StubRoutines::riscv::_string_indexof_linear_uu = generate_string_indexof_linear(false, false);
-+    StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false);
-+  }
++    if (is_oop) {
++      // save regs before copy_memory
++      __ push_reg(RegSet::of(d, count), sp);
++    }
 +
-+  address generate_mulAdd()
-+  {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", "mulAdd");
++    {
++      // UnsafeCopyMemory page error: continue after ucm
++      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
++      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
++      copy_memory(aligned, s, d, count, t0, size);
++    }
 +
-+    address start = __ pc();
++    if (is_oop) {
++      __ pop_reg(RegSet::of(d, count), sp);
++      if (VerifyOops) {
++        verify_oop_array(size, d, count, t2);
++      }
++    }
 +
-+    const Register out     = x10;
-+    const Register in      = x11;
-+    const Register offset  = x12;
-+    const Register len     = x13;
-+    const Register k       = x14;
-+    const Register tmp1    = x28;
-+    const Register tmp2    = x29;
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
 +
-+    BLOCK_COMMENT("Entry:");
-+    __ enter();
-+    __ mul_add(out, in, offset, len, k, tmp1, tmp2);
 +    __ leave();
++    __ mv(x10, zr); // return 0
 +    __ ret();
-+
 +    return start;
 +  }
 +
-+  /**
-+   *  Arguments:
-+   *
-+   *  Input:
-+   *    c_rarg0   - x address
-+   *    c_rarg1   - x length
-+   *    c_rarg2   - y address
-+   *    c_rarg3   - y lenth
-+   *    c_rarg4   - z address
-+   *    c_rarg5   - z length
-+   */
-+  address generate_multiplyToLen()
-+  {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
++                                 address* entry, const char* name,
++                                 bool dest_uninitialized = false) {
++    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
++    RegSet saved_regs = RegSet::of(s, d, count);
++    StubCodeMark mark(this, "StubRoutines", name);
 +    address start = __ pc();
++    __ enter();
 +
-+    const Register x     = x10;
-+    const Register xlen  = x11;
-+    const Register y     = x12;
-+    const Register ylen  = x13;
-+    const Register z     = x14;
-+    const Register zlen  = x15;
-+
-+    const Register tmp1  = x16;
-+    const Register tmp2  = x17;
-+    const Register tmp3  = x7;
-+    const Register tmp4  = x28;
-+    const Register tmp5  = x29;
-+    const Register tmp6  = x30;
-+    const Register tmp7  = x31;
++    if (entry != NULL) {
++      *entry = __ pc();
++      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
++      BLOCK_COMMENT("Entry:");
++    }
 +
-+    RegSet spilled_regs = RegSet::of(tmp1, tmp2);
-+    BLOCK_COMMENT("Entry:");
-+    __ enter(); // required for proper stackwalking of RuntimeStub frame
-+    __ push_reg(spilled_regs, sp);
-+    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
-+    __ pop_reg(spilled_regs, sp);
-+    __ leave(); // required for proper stackwalking of RuntimeStub frame
-+    __ ret();
++    // use fwd copy when (d-s) above_equal (count*size)
++    __ sub(t0, d, s);
++    __ slli(t1, count, exact_log2(size));
++    __ bgeu(t0, t1, nooverlap_target);
 +
-+    return start;
-+  }
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
 +
-+  address generate_squareToLen()
-+  {
-+    // squareToLen algorithm for sizes 1..127 described in java code works
-+    // faster than multiply_to_len on some CPUs and slower on others, but
-+    // multiply_to_len shows a bit better overall results
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", "squareToLen");
-+    address start = __ pc();
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs);
 +
-+    const Register x     = x10;
-+    const Register xlen  = x11;
-+    const Register z     = x12;
-+    const Register zlen  = x13;
-+    const Register y     = x14; // == x
-+    const Register ylen  = x15; // == xlen
++    if (is_oop) {
++      // save regs before copy_memory
++      __ push_reg(RegSet::of(d, count), sp);
++    }
 +
-+    const Register tmp1  = x16;
-+    const Register tmp2  = x17;
-+    const Register tmp3  = x7;
-+    const Register tmp4  = x28;
-+    const Register tmp5  = x29;
-+    const Register tmp6  = x30;
-+    const Register tmp7  = x31;
++    {
++      // UnsafeCopyMemory page error: continue after ucm
++      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
++      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
++      copy_memory(aligned, s, d, count, t0, -size);
++    }
 +
-+    RegSet spilled_regs = RegSet::of(y, tmp2);
-+    BLOCK_COMMENT("Entry:");
-+    __ enter();
-+    __ push_reg(spilled_regs, sp);
-+    __ mv(y, x);
-+    __ mv(ylen, xlen);
-+    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
-+    __ pop_reg(spilled_regs, sp);
++    if (is_oop) {
++      __ pop_reg(RegSet::of(d, count), sp);
++      if (VerifyOops) {
++        verify_oop_array(size, d, count, t2);
++      }
++    }
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
 +    __ leave();
++    __ mv(x10, zr); // return 0
 +    __ ret();
-+
 +    return start;
 +  }
-+#endif // COMPILER2
 +
-+  // Continuation point for throwing of implicit exceptions that are
-+  // not handled in the current activation. Fabricates an exception
-+  // oop and initiates normal exception dispatching in this
-+  // frame. Since we need to preserve callee-saved values (currently
-+  // only for C2, but done for C1 as well) we need a callee-saved oop
-+  // map and therefore have to make these stubs into RuntimeStubs
-+  // rather than BufferBlobs.  If the compiler needs all registers to
-+  // be preserved between the fault point and the exception handler
-+  // then it must assume responsibility for that in
-+  // AbstractCompiler::continuation_for_implicit_null_exception or
-+  // continuation_for_implicit_division_by_zero_exception. All other
-+  // implicit exceptions (e.g., NullPointerException or
-+  // AbstractMethodError on entry) are either at call sites or
-+  // otherwise assume that stack unwinding will be initiated, so
-+  // caller saved registers were assumed volatile in the compiler.
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_byte_copy().
++  //
++  address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) {
++    const bool not_oop = false;
++    return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name);
++  }
 +
-+#undef __
-+#define __ masm->
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
++                                      address* entry, const char* name) {
++    const bool not_oop = false;
++    return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name);
++  }
 +
-+  address generate_throw_exception(const char* name,
-+                                   address runtime_entry,
-+                                   Register arg1 = noreg,
-+                                   Register arg2 = noreg) {
-+    // Information about frame layout at time of blocking runtime call.
-+    // Note that we only have to preserve callee-saved registers since
-+    // the compilers are responsible for supplying a continuation point
-+    // if they expect all registers to be preserved.
-+    // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0
-+    assert_cond(runtime_entry != NULL);
-+    enum layout {
-+      fp_off = 0,
-+      fp_off2,
-+      return_off,
-+      return_off2,
-+      framesize // inclusive of return address
-+    };
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_short_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_short_copy().
++  //
++  address generate_disjoint_short_copy(bool aligned,
++                                       address* entry, const char* name) {
++    const bool not_oop = false;
++    return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name);
++  }
 +
-+    const int insts_size = 512;
-+    const int locs_size  = 64;
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
++                                       address* entry, const char* name) {
++    const bool not_oop = false;
++    return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name);
++  }
 +
-+    CodeBuffer code(name, insts_size, locs_size);
-+    OopMapSet* oop_maps  = new OopMapSet();
-+    MacroAssembler* masm = new MacroAssembler(&code);
-+    assert_cond(oop_maps != NULL && masm != NULL);
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_int_copy(bool aligned, address* entry,
++                                     const char* name, bool dest_uninitialized = false) {
++    const bool not_oop = false;
++    return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name);
++  }
 +
-+    address start = __ pc();
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
++                                     address* entry, const char* name,
++                                     bool dest_uninitialized = false) {
++    const bool not_oop = false;
++    return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name);
++  }
 +
-+    // This is an inlined and slightly modified version of call_VM
-+    // which has the ability to fetch the return PC out of
-+    // thread-local storage and also sets up last_Java_sp slightly
-+    // differently than the real call_VM
 +
-+    __ enter(); // Save FP and RA before call
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as size_t, can be zero
++  //
++  // Side Effects:
++  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
++  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
++  //
++  address generate_disjoint_long_copy(bool aligned, address* entry,
++                                      const char* name, bool dest_uninitialized = false) {
++    const bool not_oop = false;
++    return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name);
++  }
 +
-+    assert(is_even(framesize / 2), "sp not 16-byte aligned");
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as size_t, can be zero
++  //
++  address generate_conjoint_long_copy(bool aligned,
++                                      address nooverlap_target, address* entry,
++                                      const char* name, bool dest_uninitialized = false) {
++    const bool not_oop = false;
++    return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name);
++  }
 +
-+    // ra and fp are already in place
-+    __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as size_t, can be zero
++  //
++  // Side Effects:
++  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
++  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
++  //
++  address generate_disjoint_oop_copy(bool aligned, address* entry,
++                                     const char* name, bool dest_uninitialized) {
++    const bool is_oop = true;
++    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
++    return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
++  }
 +
-+    int frame_complete = __ pc() - start;
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as size_t, can be zero
++  //
++  address generate_conjoint_oop_copy(bool aligned,
++                                     address nooverlap_target, address* entry,
++                                     const char* name, bool dest_uninitialized) {
++    const bool is_oop = true;
++    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
++    return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
++                                  name, dest_uninitialized);
++  }
 +
-+    // Set up last_Java_sp and last_Java_fp
-+    address the_pc = __ pc();
-+    __ set_last_Java_frame(sp, fp, the_pc, t0);
++  // Helper for generating a dynamic type check.
++  // Smashes t0, t1.
++  void generate_type_check(Register sub_klass,
++                           Register super_check_offset,
++                           Register super_klass,
++                           Label& L_success) {
++    assert_different_registers(sub_klass, super_check_offset, super_klass);
 +
-+    // Call runtime
-+    if (arg1 != noreg) {
-+      assert(arg2 != c_rarg1, "clobbered");
-+      __ mv(c_rarg1, arg1);
-+    }
-+    if (arg2 != noreg) {
-+      __ mv(c_rarg2, arg2);
-+    }
-+    __ mv(c_rarg0, xthread);
-+    BLOCK_COMMENT("call runtime_entry");
-+    int32_t offset = 0;
-+    __ movptr_with_offset(t0, runtime_entry, offset);
-+    __ jalr(x1, t0, offset);
++    BLOCK_COMMENT("type_check:");
 +
-+    // Generate oop map
-+    OopMap* map = new OopMap(framesize, 0);
-+    assert_cond(map != NULL);
++    Label L_miss;
 +
-+    oop_maps->add_gc_map(the_pc - start, map);
++    __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset);
++    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
 +
-+    __ reset_last_Java_frame(true);
++    // Fall through on failure!
++    __ BIND(L_miss);
++  }
 +
-+    __ leave();
++  //
++  //  Generate checkcasting array copy stub
++  //
++  //  Input:
++  //    c_rarg0   - source array address
++  //    c_rarg1   - destination array address
++  //    c_rarg2   - element count, treated as ssize_t, can be zero
++  //    c_rarg3   - size_t ckoff (super_check_offset)
++  //    c_rarg4   - oop ckval (super_klass)
++  //
++  //  Output:
++  //    x10 ==  0  -  success
++  //    x10 == -1^K - failure, where K is partial transfer count
++  //
++  address generate_checkcast_copy(const char* name, address* entry,
++                                  bool dest_uninitialized = false) {
++    Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
 +
-+    // check for pending exceptions
-+#ifdef ASSERT
-+    Label L;
-+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+    __ bnez(t0, L);
-+    __ should_not_reach_here();
-+    __ bind(L);
-+#endif // ASSERT
-+    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++    // Input registers (after setup_arg_regs)
++    const Register from        = c_rarg0;   // source array address
++    const Register to          = c_rarg1;   // destination array address
++    const Register count       = c_rarg2;   // elementscount
++    const Register ckoff       = c_rarg3;   // super_check_offset
++    const Register ckval       = c_rarg4;   // super_klass
 +
++    RegSet wb_pre_saved_regs   = RegSet::range(c_rarg0, c_rarg4);
++    RegSet wb_post_saved_regs  = RegSet::of(count);
 +
-+    // codeBlob framesize is in words (not VMRegImpl::slot_size)
-+    RuntimeStub* stub =
-+      RuntimeStub::new_runtime_stub(name,
-+                                    &code,
-+                                    frame_complete,
-+                                    (framesize >> (LogBytesPerWord - LogBytesPerInt)),
-+                                    oop_maps, false);
-+    assert(stub != NULL, "create runtime stub fail!");
-+    return stub->entry_point();
-+  }
++    // Registers used as temps (x7, x9, x18 are save-on-entry)
++    const Register count_save  = x19;       // orig elementscount
++    const Register start_to    = x18;       // destination array start address
++    const Register copied_oop  = x7;        // actual oop copied
++    const Register r9_klass    = x9;        // oop._klass
 +
-+#ifdef COMPILER2
-+  class MontgomeryMultiplyGenerator : public MacroAssembler {
++    //---------------------------------------------------------------
++    // Assembler stub will be used for this call to arraycopy
++    // if the two arrays are subtypes of Object[] but the
++    // destination array type is not equal to or a supertype
++    // of the source type.  Each element must be separately
++    // checked.
 +
-+    Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
-+      Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2, Ri, Rj;
++    assert_different_registers(from, to, count, ckoff, ckval, start_to,
++                               copied_oop, r9_klass, count_save);
 +
-+    RegSet _toSave;
-+    bool _squaring;
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
 +
-+  public:
-+    MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
-+      : MacroAssembler(as->code()), _squaring(squaring) {
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
 +
-+      // Register allocation
++    // Caller of this entry point must set up the argument registers.
++    if (entry != NULL) {
++      *entry = __ pc();
++      BLOCK_COMMENT("Entry:");
++    }
 +
-+      Register reg = c_rarg0;
-+      Pa_base = reg;       // Argument registers
-+      if (squaring) {
-+        Pb_base = Pa_base;
-+      } else {
-+        Pb_base = ++reg;
-+      }
-+      Pn_base = ++reg;
-+      Rlen= ++reg;
-+      inv = ++reg;
-+      Pm_base = ++reg;
++    // Empty array:  Nothing to do
++    __ beqz(count, L_done);
 +
-+                        // Working registers:
-+      Ra =  ++reg;      // The current digit of a, b, n, and m.
-+      Rb =  ++reg;
-+      Rm =  ++reg;
-+      Rn =  ++reg;
++    __ push_reg(RegSet::of(x7, x9, x18, x19), sp);
 +
-+      Pa =  ++reg;      // Pointers to the current/next digit of a, b, n, and m.
-+      Pb =  ++reg;
-+      Pm =  ++reg;
-+      Pn =  ++reg;
++#ifdef ASSERT
++    BLOCK_COMMENT("assert consistent ckoff/ckval");
++    // The ckoff and ckval must be mutually consistent,
++    // even though caller generates both.
++    { Label L;
++      int sco_offset = in_bytes(Klass::super_check_offset_offset());
++      __ lwu(start_to, Address(ckval, sco_offset));
++      __ beq(ckoff, start_to, L);
++      __ stop("super_check_offset inconsistent");
++      __ bind(L);
++    }
++#endif //ASSERT
 +
-+      tmp0 =  ++reg;    // Three registers which form a
-+      tmp1 =  ++reg;    // triple-precision accumuator.
-+      tmp2 =  ++reg;
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
++    bool is_oop = true;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
 +
-+      Ri =  x6;         // Inner and outer loop indexes.
-+      Rj =  x7;
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs);
 +
-+      Rhi_ab = x28;     // Product registers: low and high parts
-+      Rlo_ab = x29;     // of a*b and m*n.
-+      Rhi_mn = x30;
-+      Rlo_mn = x31;
++    // save the original count
++    __ mv(count_save, count);
 +
-+      // x18 and up are callee-saved.
-+      _toSave = RegSet::range(x18, reg) + Pm_base;
-+    }
++    // Copy from low to high addresses
++    __ mv(start_to, to);              // Save destination array start address
++    __ j(L_load_element);
 +
-+  private:
-+    void save_regs() {
-+      push_reg(_toSave, sp);
-+    }
++    // ======== begin loop ========
++    // (Loop is rotated; its entry is L_load_element.)
++    // Loop control:
++    //   for count to 0 do
++    //     copied_oop = load_heap_oop(from++)
++    //     ... generate_type_check ...
++    //     store_heap_oop(to++, copied_oop)
++    //   end
 +
-+    void restore_regs() {
-+      pop_reg(_toSave, sp);
-+    }
++    __ align(OptoLoopAlignment);
 +
-+    template <typename T>
-+    void unroll_2(Register count, T block) {
-+      Label loop, end, odd;
-+      beqz(count, end);
-+      andi(t0, count, 0x1);
-+      bnez(t0, odd);
-+      align(16);
-+      bind(loop);
-+      (this->*block)();
-+      bind(odd);
-+      (this->*block)();
-+      addi(count, count, -2);
-+      bgtz(count, loop);
-+      bind(end);
-+    }
++    __ BIND(L_store_element);
++    __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, AS_RAW);  // store the oop
++    __ add(to, to, UseCompressedOops ? 4 : 8);
++    __ sub(count, count, 1);
++    __ beqz(count, L_do_card_marks);
 +
-+    template <typename T>
-+    void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
-+      Label loop, end, odd;
-+      beqz(count, end);
-+      andi(tmp, count, 0x1);
-+      bnez(tmp, odd);
-+      align(16);
-+      bind(loop);
-+      (this->*block)(d, s, tmp);
-+      bind(odd);
-+      (this->*block)(d, s, tmp);
-+      addi(count, count, -2);
-+      bgtz(count, loop);
-+      bind(end);
-+    }
++    // ======== loop entry is here ========
++    __ BIND(L_load_element);
++    __ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop
++    __ add(from, from, UseCompressedOops ? 4 : 8);
++    __ beqz(copied_oop, L_store_element);
 +
-+    void pre1(RegisterOrConstant i) {
-+      block_comment("pre1");
-+      // Pa = Pa_base;
-+      // Pb = Pb_base + i;
-+      // Pm = Pm_base;
-+      // Pn = Pn_base + i;
-+      // Ra = *Pa;
-+      // Rb = *Pb;
-+      // Rm = *Pm;
-+      // Rn = *Pn;
-+      if (i.is_register()) {
-+        slli(t0, i.as_register(), LogBytesPerWord);
-+      } else {
-+        mv(t0, i.as_constant());
-+        slli(t0, t0, LogBytesPerWord);
-+      }
++    __ load_klass(r9_klass, copied_oop);// query the object klass
++    generate_type_check(r9_klass, ckoff, ckval, L_store_element);
++    // ======== end loop ========
 +
-+      mv(Pa, Pa_base);
-+      add(Pb, Pb_base, t0);
-+      mv(Pm, Pm_base);
-+      add(Pn, Pn_base, t0);
++    // It was a real error; we must depend on the caller to finish the job.
++    // Register count = remaining oops, count_orig = total oops.
++    // Emit GC store barriers for the oops we have copied and report
++    // their number to the caller.
 +
-+      ld(Ra, Address(Pa));
-+      ld(Rb, Address(Pb));
-+      ld(Rm, Address(Pm));
-+      ld(Rn, Address(Pn));
++    __ sub(count, count_save, count);     // K = partially copied oop count
++    __ xori(count, count, -1);                   // report (-1^K) to caller
++    __ beqz(count, L_done_pop);
 +
-+      // Zero the m*n result.
-+      mv(Rhi_mn, zr);
-+      mv(Rlo_mn, zr);
-+    }
++    __ BIND(L_do_card_marks);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs);
 +
-+    // The core multiply-accumulate step of a Montgomery
-+    // multiplication.  The idea is to schedule operations as a
-+    // pipeline so that instructions with long latencies (loads and
-+    // multiplies) have time to complete before their results are
-+    // used.  This most benefits in-order implementations of the
-+    // architecture but out-of-order ones also benefit.
-+    void step() {
-+      block_comment("step");
-+      // MACC(Ra, Rb, tmp0, tmp1, tmp2);
-+      // Ra = *++Pa;
-+      // Rb = *--Pb;
-+      mulhu(Rhi_ab, Ra, Rb);
-+      mul(Rlo_ab, Ra, Rb);
-+      addi(Pa, Pa, wordSize);
-+      ld(Ra, Address(Pa));
-+      addi(Pb, Pb, -wordSize);
-+      ld(Rb, Address(Pb));
-+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the
-+                                            // previous iteration.
-+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
-+      // Rm = *++Pm;
-+      // Rn = *--Pn;
-+      mulhu(Rhi_mn, Rm, Rn);
-+      mul(Rlo_mn, Rm, Rn);
-+      addi(Pm, Pm, wordSize);
-+      ld(Rm, Address(Pm));
-+      addi(Pn, Pn, -wordSize);
-+      ld(Rn, Address(Pn));
-+      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
-+    }
++    __ bind(L_done_pop);
++    __ pop_reg(RegSet::of(x7, x9, x18, x19), sp);
++    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
 +
-+    void post1() {
-+      block_comment("post1");
++    __ bind(L_done);
++    __ mv(x10, count);
++    __ leave();
++    __ ret();
 +
-+      // MACC(Ra, Rb, tmp0, tmp1, tmp2);
-+      // Ra = *++Pa;
-+      // Rb = *--Pb;
-+      mulhu(Rhi_ab, Ra, Rb);
-+      mul(Rlo_ab, Ra, Rb);
-+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
-+      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
++    return start;
++  }
 +
-+      // *Pm = Rm = tmp0 * inv;
-+      mul(Rm, tmp0, inv);
-+      sd(Rm, Address(Pm));
++  // Perform range checks on the proposed arraycopy.
++  // Kills temp, but nothing else.
++  // Also, clean the sign bits of src_pos and dst_pos.
++  void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
++                              Register src_pos, // source position (c_rarg1)
++                              Register dst,     // destination array oo (c_rarg2)
++                              Register dst_pos, // destination position (c_rarg3)
++                              Register length,
++                              Register temp,
++                              Label& L_failed) {
++    BLOCK_COMMENT("arraycopy_range_checks:");
 +
-+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
-+      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
-+      mulhu(Rhi_mn, Rm, Rn);
++    assert_different_registers(t0, temp);
 +
-+#ifndef PRODUCT
-+      // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
-+      {
-+        mul(Rlo_mn, Rm, Rn);
-+        add(Rlo_mn, tmp0, Rlo_mn);
-+        Label ok;
-+        beqz(Rlo_mn, ok);
-+        stop("broken Montgomery multiply");
-+        bind(ok);
-+      }
-+#endif
-+      // We have very carefully set things up so that
-+      // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
-+      // the lower half of Rm * Rn because we know the result already:
-+      // it must be -tmp0.  tmp0 + (-tmp0) must generate a carry iff
-+      // tmp0 != 0.  So, rather than do a mul and an cad we just set
-+      // the carry flag iff tmp0 is nonzero.
-+      //
-+      // mul(Rlo_mn, Rm, Rn);
-+      // cad(zr, tmp0, Rlo_mn);
-+      addi(t0, tmp0, -1);
-+      sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
-+      cadc(tmp0, tmp1, Rhi_mn, t0);
-+      adc(tmp1, tmp2, zr, t0);
-+      mv(tmp2, zr);
-+    }
++    // if [src_pos + length > arrayOop(src)->length()] then FAIL
++    __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes()));
++    __ addw(temp, length, src_pos);
++    __ bgtu(temp, t0, L_failed);
 +
-+    void pre2(Register i, Register len) {
-+      block_comment("pre2");
-+      // Pa = Pa_base + i-len;
-+      // Pb = Pb_base + len;
-+      // Pm = Pm_base + i-len;
-+      // Pn = Pn_base + len;
++    // if [dst_pos + length > arrayOop(dst)->length()] then FAIL
++    __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes()));
++    __ addw(temp, length, dst_pos);
++    __ bgtu(temp, t0, L_failed);
 +
-+      sub(Rj, i, len);
-+      // Rj == i-len
++    // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
++    __ zero_extend(src_pos, src_pos, 32);
++    __ zero_extend(dst_pos, dst_pos, 32);
 +
-+      // Ra as temp register
-+      shadd(Pa, Rj, Pa_base, Ra, LogBytesPerWord);
-+      shadd(Pm, Rj, Pm_base, Ra, LogBytesPerWord);
-+      shadd(Pb, len, Pb_base, Ra, LogBytesPerWord);
-+      shadd(Pn, len, Pn_base, Ra, LogBytesPerWord);
++    BLOCK_COMMENT("arraycopy_range_checks done");
++  }
 +
-+      // Ra = *++Pa;
-+      // Rb = *--Pb;
-+      // Rm = *++Pm;
-+      // Rn = *--Pn;
-+      add(Pa, Pa, wordSize);
-+      ld(Ra, Address(Pa));
-+      add(Pb, Pb, -wordSize);
-+      ld(Rb, Address(Pb));
-+      add(Pm, Pm, wordSize);
-+      ld(Rm, Address(Pm));
-+      add(Pn, Pn, -wordSize);
-+      ld(Rn, Address(Pn));
++  //
++  //  Generate 'unsafe' array copy stub
++  //  Though just as safe as the other stubs, it takes an unscaled
++  //  size_t argument instead of an element count.
++  //
++  //  Input:
++  //    c_rarg0   - source array address
++  //    c_rarg1   - destination array address
++  //    c_rarg2   - byte count, treated as ssize_t, can be zero
++  //
++  // Examines the alignment of the operands and dispatches
++  // to a long, int, short, or byte copy loop.
++  //
++  address generate_unsafe_copy(const char* name,
++                               address byte_copy_entry,
++                               address short_copy_entry,
++                               address int_copy_entry,
++                               address long_copy_entry) {
++    assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
++                int_copy_entry != NULL && long_copy_entry != NULL);
++    Label L_long_aligned, L_int_aligned, L_short_aligned;
++    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
 +
-+      mv(Rhi_mn, zr);
-+      mv(Rlo_mn, zr);
-+    }
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
 +
-+    void post2(Register i, Register len) {
-+      block_comment("post2");
-+      sub(Rj, i, len);
++    // bump this on entry, not on exit:
++    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
 +
-+      cad(tmp0, tmp0, Rlo_mn, t0); // The pending m*n, low part
++    __ orr(t0, s, d);
++    __ orr(t0, t0, count);
 +
-+      // As soon as we know the least significant digit of our result,
-+      // store it.
-+      // Pm_base[i-len] = tmp0;
-+      // Rj as temp register
-+      shadd(Rj, Rj, Pm_base, Rj, LogBytesPerWord);
-+      sd(tmp0, Address(Rj));
++    __ andi(t0, t0, BytesPerLong - 1);
++    __ beqz(t0, L_long_aligned);
++    __ andi(t0, t0, BytesPerInt - 1);
++    __ beqz(t0, L_int_aligned);
++    __ andi(t0, t0, 1);
++    __ beqz(t0, L_short_aligned);
++    __ j(RuntimeAddress(byte_copy_entry));
 +
-+      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
-+      cadc(tmp0, tmp1, Rhi_mn, t0); // The pending m*n, high part
-+      adc(tmp1, tmp2, zr, t0);
-+      mv(tmp2, zr);
-+    }
++    __ BIND(L_short_aligned);
++    __ srli(count, count, LogBytesPerShort);  // size => short_count
++    __ j(RuntimeAddress(short_copy_entry));
++    __ BIND(L_int_aligned);
++    __ srli(count, count, LogBytesPerInt);    // size => int_count
++    __ j(RuntimeAddress(int_copy_entry));
++    __ BIND(L_long_aligned);
++    __ srli(count, count, LogBytesPerLong);   // size => long_count
++    __ j(RuntimeAddress(long_copy_entry));
 +
-+    // A carry in tmp0 after Montgomery multiplication means that we
-+    // should subtract multiples of n from our result in m.  We'll
-+    // keep doing that until there is no carry.
-+    void normalize(Register len) {
-+      block_comment("normalize");
-+      // while (tmp0)
-+      //   tmp0 = sub(Pm_base, Pn_base, tmp0, len);
-+      Label loop, post, again;
-+      Register cnt = tmp1, i = tmp2; // Re-use registers; we're done with them now
-+      beqz(tmp0, post); {
-+        bind(again); {
-+          mv(i, zr);
-+          mv(cnt, len);
-+          slli(Rn, i, LogBytesPerWord);
-+          add(Rm, Pm_base, Rn);
-+          ld(Rm, Address(Rm));
-+          add(Rn, Pn_base, Rn);
-+          ld(Rn, Address(Rn));
-+          mv(t0, 1); // set carry flag, i.e. no borrow
-+          align(16);
-+          bind(loop); {
-+            notr(Rn, Rn);
-+            add(Rm, Rm, t0);
-+            add(Rm, Rm, Rn);
-+            sltu(t0, Rm, Rn);
-+            shadd(Rn, i, Pm_base, Rn, LogBytesPerWord); // Rn as temp register
-+            sd(Rm, Address(Rn));
-+            add(i, i, 1);
-+            slli(Rn, i, LogBytesPerWord);
-+            add(Rm, Pm_base, Rn);
-+            ld(Rm, Address(Rm));
-+            add(Rn, Pn_base, Rn);
-+            ld(Rn, Address(Rn));
-+            sub(cnt, cnt, 1);
-+          } bnez(cnt, loop);
-+          addi(tmp0, tmp0, -1);
-+          add(tmp0, tmp0, t0);
-+        } bnez(tmp0, again);
-+      } bind(post);
-+    }
++    return start;
++  }
 +
-+    // Move memory at s to d, reversing words.
-+    //    Increments d to end of copied memory
-+    //    Destroys tmp1, tmp2
-+    //    Preserves len
-+    //    Leaves s pointing to the address which was in d at start
-+    void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
-+      assert(tmp1 < x28 && tmp2 < x28, "register corruption");
++  //
++  //  Generate generic array copy stubs
++  //
++  //  Input:
++  //    c_rarg0    -  src oop
++  //    c_rarg1    -  src_pos (32-bits)
++  //    c_rarg2    -  dst oop
++  //    c_rarg3    -  dst_pos (32-bits)
++  //    c_rarg4    -  element count (32-bits)
++  //
++  //  Output:
++  //    x10 ==  0  -  success
++  //    x10 == -1^K - failure, where K is partial transfer count
++  //
++  address generate_generic_copy(const char* name,
++                                address byte_copy_entry, address short_copy_entry,
++                                address int_copy_entry, address oop_copy_entry,
++                                address long_copy_entry, address checkcast_copy_entry) {
++    assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
++                int_copy_entry != NULL && oop_copy_entry != NULL &&
++                long_copy_entry != NULL && checkcast_copy_entry != NULL);
++    Label L_failed, L_failed_0, L_objArray;
++    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
 +
-+      shadd(s, len, s, tmp1, LogBytesPerWord);
-+      mv(tmp1, len);
-+      unroll_2(tmp1,  &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
-+      slli(tmp1, len, LogBytesPerWord);
-+      sub(s, d, tmp1);
-+    }
-+    // [63...0] -> [31...0][63...32]
-+    void reverse1(Register d, Register s, Register tmp) {
-+      addi(s, s, -wordSize);
-+      ld(tmp, Address(s));
-+      ror_imm(tmp, tmp, 32, t0);
-+      sd(tmp, Address(d));
-+      addi(d, d, wordSize);
-+    }
++    // Input registers
++    const Register src        = c_rarg0;  // source array oop
++    const Register src_pos    = c_rarg1;  // source position
++    const Register dst        = c_rarg2;  // destination array oop
++    const Register dst_pos    = c_rarg3;  // destination position
++    const Register length     = c_rarg4;
 +
-+    void step_squaring() {
-+      // An extra ACC
-+      step();
-+      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
-+    }
++    // Registers used as temps
++    const Register dst_klass = c_rarg5;
 +
-+    void last_squaring(Register i) {
-+      Label dont;
-+      // if ((i & 1) == 0) {
-+      andi(t0, i, 0x1);
-+      bnez(t0, dont); {
-+        // MACC(Ra, Rb, tmp0, tmp1, tmp2);
-+        // Ra = *++Pa;
-+        // Rb = *--Pb;
-+        mulhu(Rhi_ab, Ra, Rb);
-+        mul(Rlo_ab, Ra, Rb);
-+        acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
-+      } bind(dont);
-+    }
++    __ align(CodeEntryAlignment);
 +
-+    void extra_step_squaring() {
-+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
++    StubCodeMark mark(this, "StubRoutines", name);
 +
-+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
-+      // Rm = *++Pm;
-+      // Rn = *--Pn;
-+      mulhu(Rhi_mn, Rm, Rn);
-+      mul(Rlo_mn, Rm, Rn);
-+      addi(Pm, Pm, wordSize);
-+      ld(Rm, Address(Pm));
-+      addi(Pn, Pn, -wordSize);
-+      ld(Rn, Address(Pn));
-+    }
++    address start = __ pc();
 +
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
 +
-+    void post1_squaring() {
-+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
++    // bump this on entry, not on exit:
++    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
 +
-+      // *Pm = Rm = tmp0 * inv;
-+      mul(Rm, tmp0, inv);
-+      sd(Rm, Address(Pm));
++    //-----------------------------------------------------------------------
++    // Assembler stub will be used for this call to arraycopy
++    // if the following conditions are met:
++    //
++    // (1) src and dst must not be null.
++    // (2) src_pos must not be negative.
++    // (3) dst_pos must not be negative.
++    // (4) length  must not be negative.
++    // (5) src klass and dst klass should be the same and not NULL.
++    // (6) src and dst should be arrays.
++    // (7) src_pos + length must not exceed length of src.
++    // (8) dst_pos + length must not exceed length of dst.
++    //
 +
-+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
-+      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
-+      mulhu(Rhi_mn, Rm, Rn);
++    // if [src == NULL] then return -1
++    __ beqz(src, L_failed);
 +
-+#ifndef PRODUCT
-+      // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
-+      {
-+        mul(Rlo_mn, Rm, Rn);
-+        add(Rlo_mn, tmp0, Rlo_mn);
-+        Label ok;
-+        beqz(Rlo_mn, ok); {
-+          stop("broken Montgomery multiply");
-+        } bind(ok);
-+      }
-+#endif
-+      // We have very carefully set things up so that
-+      // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
-+      // the lower half of Rm * Rn because we know the result already:
-+      // it must be -tmp0.  tmp0 + (-tmp0) must generate a carry iff
-+      // tmp0 != 0.  So, rather than do a mul and a cad we just set
-+      // the carry flag iff tmp0 is nonzero.
-+      //
-+      // mul(Rlo_mn, Rm, Rn);
-+      // cad(zr, tmp0, Rlo_mn);
-+      addi(t0, tmp0, -1);
-+      sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
-+      cadc(tmp0, tmp1, Rhi_mn, t0);
-+      adc(tmp1, tmp2, zr, t0);
-+      mv(tmp2, zr);
-+    }
++    // if [src_pos < 0] then return -1
++    // i.e. sign bit set
++    __ andi(t0, src_pos, 1UL << 31);
++    __ bnez(t0, L_failed);
 +
-+    // use t0 as carry
-+    void acc(Register Rhi, Register Rlo,
-+             Register tmp0, Register tmp1, Register tmp2) {
-+      cad(tmp0, tmp0, Rlo, t0);
-+      cadc(tmp1, tmp1, Rhi, t0);
-+      adc(tmp2, tmp2, zr, t0);
-+    }
++    // if [dst == NULL] then return -1
++    __ beqz(dst, L_failed);
 +
-+  public:
-+    /**
-+     * Fast Montgomery multiplication.  The derivation of the
-+     * algorithm is in A Cryptographic Library for the Motorola
-+     * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
-+     *
-+     * Arguments:
-+     *
-+     * Inputs for multiplication:
-+     *   c_rarg0   - int array elements a
-+     *   c_rarg1   - int array elements b
-+     *   c_rarg2   - int array elements n (the modulus)
-+     *   c_rarg3   - int length
-+     *   c_rarg4   - int inv
-+     *   c_rarg5   - int array elements m (the result)
-+     *
-+     * Inputs for squaring:
-+     *   c_rarg0   - int array elements a
-+     *   c_rarg1   - int array elements n (the modulus)
-+     *   c_rarg2   - int length
-+     *   c_rarg3   - int inv
-+     *   c_rarg4   - int array elements m (the result)
-+     *
-+     */
-+    address generate_multiply() {
-+      Label argh, nothing;
-+      bind(argh);
-+      stop("MontgomeryMultiply total_allocation must be <= 8192");
++    // if [dst_pos < 0] then return -1
++    // i.e. sign bit set
++    __ andi(t0, dst_pos, 1UL << 31);
++    __ bnez(t0, L_failed);
 +
-+      align(CodeEntryAlignment);
-+      address entry = pc();
++    // registers used as temp
++    const Register scratch_length    = x28; // elements count to copy
++    const Register scratch_src_klass = x29; // array klass
++    const Register lh                = x30; // layout helper
 +
-+      beqz(Rlen, nothing);
++    // if [length < 0] then return -1
++    __ addw(scratch_length, length, zr);    // length (elements count, 32-bits value)
++    // i.e. sign bit set
++    __ andi(t0, scratch_length, 1UL << 31);
++    __ bnez(t0, L_failed);
 +
-+      enter();
++    __ load_klass(scratch_src_klass, src);
++#ifdef ASSERT
++    {
++      BLOCK_COMMENT("assert klasses not null {");
++      Label L1, L2;
++      __ bnez(scratch_src_klass, L2);   // it is broken if klass is NULL
++      __ bind(L1);
++      __ stop("broken null klass");
++      __ bind(L2);
++      __ load_klass(t0, dst);
++      __ beqz(t0, L1);     // this would be broken also
++      BLOCK_COMMENT("} assert klasses not null done");
++    }
++#endif
 +
-+      // Make room.
-+      mv(Ra, 512);
-+      bgt(Rlen, Ra, argh);
-+      slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
-+      sub(Ra, sp, Ra);
-+      andi(sp, Ra, -2 * wordSize);
++    // Load layout helper (32-bits)
++    //
++    //  |array_tag|     | header_size | element_type |     |log2_element_size|
++    // 32        30    24            16              8     2                 0
++    //
++    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
++    //
 +
-+      srliw(Rlen, Rlen, 1);  // length in longwords = len/2
++    const int lh_offset = in_bytes(Klass::layout_helper_offset());
 +
-+      {
-+        // Copy input args, reversing as we go.  We use Ra as a
-+        // temporary variable.
-+        reverse(Ra, Pa_base, Rlen, Ri, Rj);
-+        if (!_squaring)
-+          reverse(Ra, Pb_base, Rlen, Ri, Rj);
-+        reverse(Ra, Pn_base, Rlen, Ri, Rj);
-+      }
++    // Handle objArrays completely differently...
++    const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
++    __ lw(lh, Address(scratch_src_klass, lh_offset));
++    __ mvw(t0, objArray_lh);
++    __ beq(lh, t0, L_objArray);
 +
-+      // Push all call-saved registers and also Pm_base which we'll need
-+      // at the end.
-+      save_regs();
++    // if [src->klass() != dst->klass()] then return -1
++    __ load_klass(t1, dst);
++    __ bne(t1, scratch_src_klass, L_failed);
 +
-+#ifndef PRODUCT
-+      // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
-+      {
-+        ld(Rn, Address(Pn_base));
-+        mul(Rlo_mn, Rn, inv);
-+        mv(t0, -1);
-+        Label ok;
-+        beq(Rlo_mn, t0, ok);
-+        stop("broken inverse in Montgomery multiply");
-+        bind(ok);
-+      }
-+#endif
++    // if [src->is_Array() != NULL] then return -1
++    // i.e. (lh >= 0)
++    __ andi(t0, lh, 1UL << 31);
++    __ beqz(t0, L_failed);
 +
-+      mv(Pm_base, Ra);
++    // At this point, it is known to be a typeArray (array_tag 0x3).
++#ifdef ASSERT
++    {
++      BLOCK_COMMENT("assert primitive array {");
++      Label L;
++      __ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
++      __ bge(lh, t1, L);
++      __ stop("must be a primitive array");
++      __ bind(L);
++      BLOCK_COMMENT("} assert primitive array done");
++    }
++#endif
 +
-+      mv(tmp0, zr);
-+      mv(tmp1, zr);
-+      mv(tmp2, zr);
++    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
++                           t1, L_failed);
 +
-+      block_comment("for (int i = 0; i < len; i++) {");
-+      mv(Ri, zr); {
-+        Label loop, end;
-+        bge(Ri, Rlen, end);
++    // TypeArrayKlass
++    //
++    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize)
++    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize)
++    //
 +
-+        bind(loop);
-+        pre1(Ri);
++    const Register t0_offset = t0;    // array offset
++    const Register x22_elsize = lh;   // element size
 +
-+        block_comment("  for (j = i; j; j--) {"); {
-+          mv(Rj, Ri);
-+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
-+        } block_comment("  } // j");
-+
-+        post1();
-+        addw(Ri, Ri, 1);
-+        blt(Ri, Rlen, loop);
-+        bind(end);
-+        block_comment("} // i");
-+      }
-+
-+      block_comment("for (int i = len; i < 2*len; i++) {");
-+      mv(Ri, Rlen); {
-+        Label loop, end;
-+        slli(Rj, Rlen, 1); // Rj as temp register
-+        bge(Ri, Rj, end);
-+
-+        bind(loop);
-+        pre2(Ri, Rlen);
++    // Get array_header_in_bytes()
++    int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1);
++    int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
++    __ slli(t0_offset, lh, XLEN - lh_header_size_msb);          // left shift to remove 24 ~ 32;
++    __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset
 +
-+        block_comment("  for (j = len*2-i-1; j; j--) {"); {
-+          slliw(Rj, Rlen, 1);
-+          subw(Rj, Rj, Ri);
-+          subw(Rj, Rj, 1);
-+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
-+        } block_comment("  } // j");
++    __ add(src, src, t0_offset);           // src array offset
++    __ add(dst, dst, t0_offset);           // dst array offset
++    BLOCK_COMMENT("choose copy loop based on element size");
 +
-+        post2(Ri, Rlen);
-+        addw(Ri, Ri, 1);
-+        slli(Rj, Rlen, 1);
-+        blt(Ri, Rj, loop);
-+        bind(end);
-+      }
-+      block_comment("} // i");
++    // next registers should be set before the jump to corresponding stub
++    const Register from     = c_rarg0;  // source array address
++    const Register to       = c_rarg1;  // destination array address
++    const Register count    = c_rarg2;  // elements count
 +
++    // 'from', 'to', 'count' registers should be set in such order
++    // since they are the same as 'src', 'src_pos', 'dst'.
 +
-+      normalize(Rlen);
++    assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
 +
-+      mv(Ra, Pm_base);  // Save Pm_base in Ra
-+      restore_regs();  // Restore caller's Pm_base
++    // The possible values of elsize are 0-3, i.e. exact_log2(element
++    // size in bytes).  We do a simple bitwise binary search.
++  __ BIND(L_copy_bytes);
++    __ andi(t0, x22_elsize, 2);
++    __ bnez(t0, L_copy_ints);
++    __ andi(t0, x22_elsize, 1);
++    __ bnez(t0, L_copy_shorts);
++    __ add(from, src, src_pos); // src_addr
++    __ add(to, dst, dst_pos); // dst_addr
++    __ addw(count, scratch_length, zr); // length
++    __ j(RuntimeAddress(byte_copy_entry));
 +
-+      // Copy our result into caller's Pm_base
-+      reverse(Pm_base, Ra, Rlen, Ri, Rj);
++  __ BIND(L_copy_shorts);
++    __ shadd(from, src_pos, src, t0, 1); // src_addr
++    __ shadd(to, dst_pos, dst, t0, 1); // dst_addr
++    __ addw(count, scratch_length, zr); // length
++    __ j(RuntimeAddress(short_copy_entry));
 +
-+      leave();
-+      bind(nothing);
-+      ret();
++  __ BIND(L_copy_ints);
++    __ andi(t0, x22_elsize, 1);
++    __ bnez(t0, L_copy_longs);
++    __ shadd(from, src_pos, src, t0, 2); // src_addr
++    __ shadd(to, dst_pos, dst, t0, 2); // dst_addr
++    __ addw(count, scratch_length, zr); // length
++    __ j(RuntimeAddress(int_copy_entry));
 +
-+      return entry;
++  __ BIND(L_copy_longs);
++#ifdef ASSERT
++    {
++      BLOCK_COMMENT("assert long copy {");
++      Label L;
++      __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize
++      __ addw(lh, lh, zr);
++      __ mvw(t0, LogBytesPerLong);
++      __ beq(x22_elsize, t0, L);
++      __ stop("must be long copy, but elsize is wrong");
++      __ bind(L);
++      BLOCK_COMMENT("} assert long copy done");
 +    }
++#endif
++    __ shadd(from, src_pos, src, t0, 3); // src_addr
++    __ shadd(to, dst_pos, dst, t0, 3); // dst_addr
++    __ addw(count, scratch_length, zr); // length
++    __ j(RuntimeAddress(long_copy_entry));
 +
-+    /**
-+     *
-+     * Arguments:
-+     *
-+     * Inputs:
-+     *   c_rarg0   - int array elements a
-+     *   c_rarg1   - int array elements n (the modulus)
-+     *   c_rarg2   - int length
-+     *   c_rarg3   - int inv
-+     *   c_rarg4   - int array elements m (the result)
-+     *
-+     */
-+    address generate_square() {
-+      Label argh;
-+      bind(argh);
-+      stop("MontgomeryMultiply total_allocation must be <= 8192");
-+
-+      align(CodeEntryAlignment);
-+      address entry = pc();
-+
-+      enter();
++    // ObjArrayKlass
++  __ BIND(L_objArray);
++    // live at this point:  scratch_src_klass, scratch_length, src[_pos], dst[_pos]
 +
-+      // Make room.
-+      mv(Ra, 512);
-+      bgt(Rlen, Ra, argh);
-+      slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
-+      sub(Ra, sp, Ra);
-+      andi(sp, Ra, -2 * wordSize);
++    Label L_plain_copy, L_checkcast_copy;
++    // test array classes for subtyping
++    __ load_klass(t2, dst);
++    __ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality
 +
-+      srliw(Rlen, Rlen, 1);  // length in longwords = len/2
++    // Identically typed arrays can be copied without element-wise checks.
++    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
++                           t1, L_failed);
 +
-+      {
-+        // Copy input args, reversing as we go.  We use Ra as a
-+        // temporary variable.
-+        reverse(Ra, Pa_base, Rlen, Ri, Rj);
-+        reverse(Ra, Pn_base, Rlen, Ri, Rj);
-+      }
++    __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
++    __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++    __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
++    __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++    __ addw(count, scratch_length, zr); // length
++  __ BIND(L_plain_copy);
++    __ j(RuntimeAddress(oop_copy_entry));
 +
-+      // Push all call-saved registers and also Pm_base which we'll need
-+      // at the end.
-+      save_regs();
++  __ BIND(L_checkcast_copy);
++    // live at this point:  scratch_src_klass, scratch_length, t2 (dst_klass)
++    {
++      // Before looking at dst.length, make sure dst is also an objArray.
++      __ lwu(t0, Address(t2, lh_offset));
++      __ mvw(t1, objArray_lh);
++      __ bne(t0, t1, L_failed);
 +
-+      mv(Pm_base, Ra);
++      // It is safe to examine both src.length and dst.length.
++      arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
++                             t2, L_failed);
 +
-+      mv(tmp0, zr);
-+      mv(tmp1, zr);
-+      mv(tmp2, zr);
++      __ load_klass(dst_klass, dst); // reload
 +
-+      block_comment("for (int i = 0; i < len; i++) {");
-+      mv(Ri, zr); {
-+        Label loop, end;
-+        bind(loop);
-+        bge(Ri, Rlen, end);
++      // Marshal the base address arguments now, freeing registers.
++      __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
++      __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++      __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
++      __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++      __ addw(count, length, zr);           // length (reloaded)
++      const Register sco_temp = c_rarg3;      // this register is free now
++      assert_different_registers(from, to, count, sco_temp,
++                                 dst_klass, scratch_src_klass);
 +
-+        pre1(Ri);
++      // Generate the type check.
++      const int sco_offset = in_bytes(Klass::super_check_offset_offset());
++      __ lwu(sco_temp, Address(dst_klass, sco_offset));
 +
-+        block_comment("for (j = (i+1)/2; j; j--) {"); {
-+          addi(Rj, Ri, 1);
-+          srliw(Rj, Rj, 1);
-+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
-+        } block_comment("  } // j");
++      // Smashes t0, t1
++      generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy);
 +
-+        last_squaring(Ri);
++      // Fetch destination element klass from the ObjArrayKlass header.
++      int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
++      __ ld(dst_klass, Address(dst_klass, ek_offset));
++      __ lwu(sco_temp, Address(dst_klass, sco_offset));
 +
-+        block_comment("  for (j = i/2; j; j--) {"); {
-+          srliw(Rj, Ri, 1);
-+          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
-+        } block_comment("  } // j");
++      // the checkcast_copy loop needs two extra arguments:
++      assert(c_rarg3 == sco_temp, "#3 already in place");
++      // Set up arguments for checkcast_copy_entry.
++      __ mv(c_rarg4, dst_klass);  // dst.klass.element_klass
++      __ j(RuntimeAddress(checkcast_copy_entry));
++    }
 +
-+        post1_squaring();
-+        addi(Ri, Ri, 1);
-+        blt(Ri, Rlen, loop);
++  __ BIND(L_failed);
++    __ li(x10, -1);
++    __ leave();   // required for proper stackwalking of RuntimeStub frame
++    __ ret();
 +
-+        bind(end);
-+        block_comment("} // i");
-+      }
++    return start;
++  }
 +
-+      block_comment("for (int i = len; i < 2*len; i++) {");
-+      mv(Ri, Rlen); {
-+        Label loop, end;
-+        bind(loop);
-+        slli(Rj, Rlen, 1);
-+        bge(Ri, Rj, end);
++  //
++  // Generate stub for array fill. If "aligned" is true, the
++  // "to" address is assumed to be heapword aligned.
++  //
++  // Arguments for generated stub:
++  //   to:    c_rarg0
++  //   value: c_rarg1
++  //   count: c_rarg2 treated as signed
++  //
++  address generate_fill(BasicType t, bool aligned, const char* name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
 +
-+        pre2(Ri, Rlen);
++    BLOCK_COMMENT("Entry:");
 +
-+        block_comment("  for (j = (2*len-i-1)/2; j; j--) {"); {
-+          slli(Rj, Rlen, 1);
-+          sub(Rj, Rj, Ri);
-+          sub(Rj, Rj, 1);
-+          srliw(Rj, Rj, 1);
-+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
-+        } block_comment("  } // j");
++    const Register to        = c_rarg0;  // source array address
++    const Register value     = c_rarg1;  // value
++    const Register count     = c_rarg2;  // elements count
 +
-+        last_squaring(Ri);
++    const Register bz_base   = x28;      // base for block_zero routine
++    const Register cnt_words = x29;      // temp register
++    const Register tmp_reg   = t1;
 +
-+        block_comment("  for (j = (2*len-i)/2; j; j--) {"); {
-+          slli(Rj, Rlen, 1);
-+          sub(Rj, Rj, Ri);
-+          srliw(Rj, Rj, 1);
-+          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
-+        } block_comment("  } // j");
++    __ enter();
 +
-+        post2(Ri, Rlen);
-+        addi(Ri, Ri, 1);
-+        slli(t0, Rlen, 1);
-+        blt(Ri, t0, loop);
++    Label L_fill_elements, L_exit1;
 +
-+        bind(end);
-+        block_comment("} // i");
-+      }
++    int shift = -1;
++    switch (t) {
++      case T_BYTE:
++        shift = 0;
 +
-+      normalize(Rlen);
++        // Zero extend value
++        // 8 bit -> 16 bit
++        __ andi(value, value, 0xff);
++        __ mv(tmp_reg, value);
++        __ slli(tmp_reg, tmp_reg, 8);
++        __ orr(value, value, tmp_reg);
 +
-+      mv(Ra, Pm_base);  // Save Pm_base in Ra
-+      restore_regs();  // Restore caller's Pm_base
++        // 16 bit -> 32 bit
++        __ mv(tmp_reg, value);
++        __ slli(tmp_reg, tmp_reg, 16);
++        __ orr(value, value, tmp_reg);
 +
-+      // Copy our result into caller's Pm_base
-+      reverse(Pm_base, Ra, Rlen, Ri, Rj);
++        __ mv(tmp_reg, 8 >> shift); // Short arrays (< 8 bytes) fill by element
++        __ bltu(count, tmp_reg, L_fill_elements);
++        break;
++      case T_SHORT:
++        shift = 1;
++        // Zero extend value
++        // 16 bit -> 32 bit
++        __ andi(value, value, 0xffff);
++        __ mv(tmp_reg, value);
++        __ slli(tmp_reg, tmp_reg, 16);
++        __ orr(value, value, tmp_reg);
 +
-+      leave();
-+      ret();
++        // Short arrays (< 8 bytes) fill by element
++        __ mv(tmp_reg, 8 >> shift);
++        __ bltu(count, tmp_reg, L_fill_elements);
++        break;
++      case T_INT:
++        shift = 2;
 +
-+      return entry;
++        // Short arrays (< 8 bytes) fill by element
++        __ mv(tmp_reg, 8 >> shift);
++        __ bltu(count, tmp_reg, L_fill_elements);
++        break;
++      default: ShouldNotReachHere();
 +    }
-+  };
-+#endif // COMPILER2
-+
-+  // Initialization
-+  void generate_initial() {
-+    // Generate initial stubs and initializes the entry points
-+
-+    // entry points that exist in all platforms Note: This is code
-+    // that could be shared among different platforms - however the
-+    // benefit seems to be smaller than the disadvantage of having a
-+    // much more complicated generator structure. See also comment in
-+    // stubRoutines.hpp.
-+
-+    StubRoutines::_forward_exception_entry = generate_forward_exception();
-+
-+    StubRoutines::_call_stub_entry =
-+      generate_call_stub(StubRoutines::_call_stub_return_address);
-+
-+    // is referenced by megamorphic call
-+    StubRoutines::_catch_exception_entry = generate_catch_exception();
-+
-+    // Build this early so it's available for the interpreter.
-+    StubRoutines::_throw_StackOverflowError_entry =
-+      generate_throw_exception("StackOverflowError throw_exception",
-+                               CAST_FROM_FN_PTR(address,
-+                                                SharedRuntime::throw_StackOverflowError));
-+    StubRoutines::_throw_delayed_StackOverflowError_entry =
-+      generate_throw_exception("delayed StackOverflowError throw_exception",
-+                               CAST_FROM_FN_PTR(address,
-+                                                SharedRuntime::throw_delayed_StackOverflowError));
-+  }
 +
-+  void generate_all() {
-+    // support for verify_oop (must happen after universe_init)
-+    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
-+    StubRoutines::_throw_AbstractMethodError_entry =
-+      generate_throw_exception("AbstractMethodError throw_exception",
-+                               CAST_FROM_FN_PTR(address,
-+                                                SharedRuntime::
-+                                                throw_AbstractMethodError));
++    // Align source address at 8 bytes address boundary.
++    Label L_skip_align1, L_skip_align2, L_skip_align4;
++    if (!aligned) {
++      switch (t) {
++        case T_BYTE:
++          // One byte misalignment happens only for byte arrays.
++          __ andi(t0, to, 1);
++          __ beqz(t0, L_skip_align1);
++          __ sb(value, Address(to, 0));
++          __ addi(to, to, 1);
++          __ addiw(count, count, -1);
++          __ bind(L_skip_align1);
++          // Fallthrough
++        case T_SHORT:
++          // Two bytes misalignment happens only for byte and short (char) arrays.
++          __ andi(t0, to, 2);
++          __ beqz(t0, L_skip_align2);
++          __ sh(value, Address(to, 0));
++          __ addi(to, to, 2);
++          __ addiw(count, count, -(2 >> shift));
++          __ bind(L_skip_align2);
++          // Fallthrough
++        case T_INT:
++          // Align to 8 bytes, we know we are 4 byte aligned to start.
++          __ andi(t0, to, 4);
++          __ beqz(t0, L_skip_align4);
++          __ sw(value, Address(to, 0));
++          __ addi(to, to, 4);
++          __ addiw(count, count, -(4 >> shift));
++          __ bind(L_skip_align4);
++          break;
++        default: ShouldNotReachHere();
++      }
++    }
 +
-+    StubRoutines::_throw_IncompatibleClassChangeError_entry =
-+      generate_throw_exception("IncompatibleClassChangeError throw_exception",
-+                               CAST_FROM_FN_PTR(address,
-+                                                SharedRuntime::
-+                                                throw_IncompatibleClassChangeError));
++    //
++    //  Fill large chunks
++    //
++    __ srliw(cnt_words, count, 3 - shift); // number of words
 +
-+    StubRoutines::_throw_NullPointerException_at_call_entry =
-+      generate_throw_exception("NullPointerException at call throw_exception",
-+                               CAST_FROM_FN_PTR(address,
-+                                                SharedRuntime::
-+                                                throw_NullPointerException_at_call));
-+    // arraycopy stubs used by compilers
-+    generate_arraycopy_stubs();
++    // 32 bit -> 64 bit
++    __ andi(value, value, 0xffffffff);
++    __ mv(tmp_reg, value);
++    __ slli(tmp_reg, tmp_reg, 32);
++    __ orr(value, value, tmp_reg);
 +
-+#ifdef COMPILER2
-+    if (UseMulAddIntrinsic) {
-+      StubRoutines::_mulAdd = generate_mulAdd();
++    __ slli(tmp_reg, cnt_words, 3 - shift);
++    __ subw(count, count, tmp_reg);
++    {
++      __ fill_words(to, cnt_words, value);
 +    }
 +
-+    if (UseMultiplyToLenIntrinsic) {
-+      StubRoutines::_multiplyToLen = generate_multiplyToLen();
++    // Remaining count is less than 8 bytes. Fill it by a single store.
++    // Note that the total length is no less than 8 bytes.
++    if (t == T_BYTE || t == T_SHORT) {
++      __ beqz(count, L_exit1);
++      __ shadd(to, count, to, tmp_reg, shift); // points to the end
++      __ sd(value, Address(to, -8)); // overwrite some elements
++      __ bind(L_exit1);
++      __ leave();
++      __ ret();
 +    }
 +
-+    if (UseSquareToLenIntrinsic) {
-+      StubRoutines::_squareToLen = generate_squareToLen();
++    // Handle copies less than 8 bytes.
++    Label L_fill_2, L_fill_4, L_exit2;
++    __ bind(L_fill_elements);
++    switch (t) {
++      case T_BYTE:
++        __ andi(t0, count, 1);
++        __ beqz(t0, L_fill_2);
++        __ sb(value, Address(to, 0));
++        __ addi(to, to, 1);
++        __ bind(L_fill_2);
++        __ andi(t0, count, 2);
++        __ beqz(t0, L_fill_4);
++        __ sh(value, Address(to, 0));
++        __ addi(to, to, 2);
++        __ bind(L_fill_4);
++        __ andi(t0, count, 4);
++        __ beqz(t0, L_exit2);
++        __ sw(value, Address(to, 0));
++        break;
++      case T_SHORT:
++        __ andi(t0, count, 1);
++        __ beqz(t0, L_fill_4);
++        __ sh(value, Address(to, 0));
++        __ addi(to, to, 2);
++        __ bind(L_fill_4);
++        __ andi(t0, count, 2);
++        __ beqz(t0, L_exit2);
++        __ sw(value, Address(to, 0));
++        break;
++      case T_INT:
++        __ beqz(count, L_exit2);
++        __ sw(value, Address(to, 0));
++        break;
++      default: ShouldNotReachHere();
 +    }
++    __ bind(L_exit2);
++    __ leave();
++    __ ret();
++    return start;
++  }
 +
-+    generate_compare_long_strings();
++  void generate_arraycopy_stubs() {
++    address entry                     = NULL;
++    address entry_jbyte_arraycopy     = NULL;
++    address entry_jshort_arraycopy    = NULL;
++    address entry_jint_arraycopy      = NULL;
++    address entry_oop_arraycopy       = NULL;
++    address entry_jlong_arraycopy     = NULL;
++    address entry_checkcast_arraycopy = NULL;
 +
-+    generate_string_indexof_stubs();
++    generate_copy_longs(copy_f, c_rarg0, c_rarg1, t1, copy_forwards);
++    generate_copy_longs(copy_b, c_rarg0, c_rarg1, t1, copy_backwards);
 +
-+    if (UseMontgomeryMultiplyIntrinsic) {
-+      StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
-+      MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
-+      StubRoutines::_montgomeryMultiply = g.generate_multiply();
-+    }
++    StubRoutines::riscv::_zero_blocks = generate_zero_blocks();
 +
-+    if (UseMontgomerySquareIntrinsic) {
-+      StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
-+      MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
-+      StubRoutines::_montgomerySquare = g.generate_square();
-+    }
-+#endif // COMPILER2
-+    // Safefetch stubs.
-+    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
-+                                                       &StubRoutines::_safefetch32_fault_pc,
-+                                                       &StubRoutines::_safefetch32_continuation_pc);
-+    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
-+                                                       &StubRoutines::_safefetchN_fault_pc,
-+                                                       &StubRoutines::_safefetchN_continuation_pc);
++    //*** jbyte
++    // Always need aligned and unaligned versions
++    StubRoutines::_jbyte_disjoint_arraycopy          = generate_disjoint_byte_copy(false, &entry,
++                                                                                   "jbyte_disjoint_arraycopy");
++    StubRoutines::_jbyte_arraycopy                   = generate_conjoint_byte_copy(false, entry,
++                                                                                   &entry_jbyte_arraycopy,
++                                                                                   "jbyte_arraycopy");
++    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(true, &entry,
++                                                                                   "arrayof_jbyte_disjoint_arraycopy");
++    StubRoutines::_arrayof_jbyte_arraycopy           = generate_conjoint_byte_copy(true, entry, NULL,
++                                                                                   "arrayof_jbyte_arraycopy");
 +
-+    StubRoutines::riscv::set_completed();
-+  }
++    //*** jshort
++    // Always need aligned and unaligned versions
++    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
++                                                                                    "jshort_disjoint_arraycopy");
++    StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
++                                                                                    &entry_jshort_arraycopy,
++                                                                                    "jshort_arraycopy");
++    StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
++                                                                                    "arrayof_jshort_disjoint_arraycopy");
++    StubRoutines::_arrayof_jshort_arraycopy          = generate_conjoint_short_copy(true, entry, NULL,
++                                                                                    "arrayof_jshort_arraycopy");
 +
-+ public:
-+  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
-+    if (all) {
-+      generate_all();
-+    } else {
-+      generate_initial();
++    //*** jint
++    // Aligned versions
++    StubRoutines::_arrayof_jint_disjoint_arraycopy   = generate_disjoint_int_copy(true, &entry,
++                                                                                  "arrayof_jint_disjoint_arraycopy");
++    StubRoutines::_arrayof_jint_arraycopy            = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
++                                                                                  "arrayof_jint_arraycopy");
++    // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
++    // entry_jint_arraycopy always points to the unaligned version
++    StubRoutines::_jint_disjoint_arraycopy           = generate_disjoint_int_copy(false, &entry,
++                                                                                  "jint_disjoint_arraycopy");
++    StubRoutines::_jint_arraycopy                    = generate_conjoint_int_copy(false, entry,
++                                                                                  &entry_jint_arraycopy,
++                                                                                  "jint_arraycopy");
++
++    //*** jlong
++    // It is always aligned
++    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(true, &entry,
++                                                                                   "arrayof_jlong_disjoint_arraycopy");
++    StubRoutines::_arrayof_jlong_arraycopy           = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
++                                                                                   "arrayof_jlong_arraycopy");
++    StubRoutines::_jlong_disjoint_arraycopy          = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
++    StubRoutines::_jlong_arraycopy                   = StubRoutines::_arrayof_jlong_arraycopy;
++
++    //*** oops
++    {
++      // With compressed oops we need unaligned versions; notice that
++      // we overwrite entry_oop_arraycopy.
++      bool aligned = !UseCompressedOops;
++
++      StubRoutines::_arrayof_oop_disjoint_arraycopy
++        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy",
++                                     /*dest_uninitialized*/false);
++      StubRoutines::_arrayof_oop_arraycopy
++        = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy",
++                                     /*dest_uninitialized*/false);
++      // Aligned versions without pre-barriers
++      StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit
++        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit",
++                                     /*dest_uninitialized*/true);
++      StubRoutines::_arrayof_oop_arraycopy_uninit
++        = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit",
++                                     /*dest_uninitialized*/true);
 +    }
-+  }
 +
-+  ~StubGenerator() {}
-+}; // end class declaration
++    StubRoutines::_oop_disjoint_arraycopy            = StubRoutines::_arrayof_oop_disjoint_arraycopy;
++    StubRoutines::_oop_arraycopy                     = StubRoutines::_arrayof_oop_arraycopy;
++    StubRoutines::_oop_disjoint_arraycopy_uninit     = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
++    StubRoutines::_oop_arraycopy_uninit              = StubRoutines::_arrayof_oop_arraycopy_uninit;
 +
-+void StubGenerator_generate(CodeBuffer* code, bool all) {
-+  StubGenerator g(code, all);
-+}
-diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
-new file mode 100644
-index 000000000..633108b95
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
-@@ -0,0 +1,60 @@
-+/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
++    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
++                                                                        /*dest_uninitialized*/true);
 +
-+#include "precompiled.hpp"
-+#include "runtime/deoptimization.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/thread.inline.hpp"
-+#include "utilities/globalDefinitions.hpp"
 +
-+// Implementation of the platform-specific part of StubRoutines - for
-+// a description of how to extend it, see the stubRoutines.hpp file.
++    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
++                                                              entry_jbyte_arraycopy,
++                                                              entry_jshort_arraycopy,
++                                                              entry_jint_arraycopy,
++                                                              entry_jlong_arraycopy);
 +
-+address StubRoutines::riscv::_get_previous_fp_entry = NULL;
-+address StubRoutines::riscv::_get_previous_sp_entry = NULL;
++    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
++                                                               entry_jbyte_arraycopy,
++                                                               entry_jshort_arraycopy,
++                                                               entry_jint_arraycopy,
++                                                               entry_oop_arraycopy,
++                                                               entry_jlong_arraycopy,
++                                                               entry_checkcast_arraycopy);
 +
-+address StubRoutines::riscv::_f2i_fixup = NULL;
-+address StubRoutines::riscv::_f2l_fixup = NULL;
-+address StubRoutines::riscv::_d2i_fixup = NULL;
-+address StubRoutines::riscv::_d2l_fixup = NULL;
-+address StubRoutines::riscv::_float_sign_mask = NULL;
-+address StubRoutines::riscv::_float_sign_flip = NULL;
-+address StubRoutines::riscv::_double_sign_mask = NULL;
-+address StubRoutines::riscv::_double_sign_flip = NULL;
-+address StubRoutines::riscv::_zero_blocks = NULL;
-+address StubRoutines::riscv::_has_negatives = NULL;
-+address StubRoutines::riscv::_has_negatives_long = NULL;
-+address StubRoutines::riscv::_compare_long_string_LL = NULL;
-+address StubRoutines::riscv::_compare_long_string_UU = NULL;
-+address StubRoutines::riscv::_compare_long_string_LU = NULL;
-+address StubRoutines::riscv::_compare_long_string_UL = NULL;
-+address StubRoutines::riscv::_string_indexof_linear_ll = NULL;
-+address StubRoutines::riscv::_string_indexof_linear_uu = NULL;
-+address StubRoutines::riscv::_string_indexof_linear_ul = NULL;
-+address StubRoutines::riscv::_large_byte_array_inflate = NULL;
++    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
++    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
++    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
++    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
++    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
++    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
++  }
 +
-+bool StubRoutines::riscv::_completed = false;
-diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
-new file mode 100644
-index 000000000..8aa81980e
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
-@@ -0,0 +1,179 @@
-+/*
-+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // Safefetch stubs.
++  void generate_safefetch(const char* name, int size, address* entry,
++                          address* fault_pc, address* continuation_pc) {
++    // safefetch signatures:
++    //   int      SafeFetch32(int*      adr, int      errValue)
++    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue)
++    //
++    // arguments:
++    //   c_rarg0 = adr
++    //   c_rarg1 = errValue
++    //
++    // result:
++    //   PPC_RET  = *adr or errValue
++    assert_cond(entry != NULL && fault_pc != NULL && continuation_pc != NULL);
++    StubCodeMark mark(this, "StubRoutines", name);
 +
-+#ifndef CPU_RISCV_STUBROUTINES_RISCV_HPP
-+#define CPU_RISCV_STUBROUTINES_RISCV_HPP
++    // Entry point, pc or function descriptor.
++    *entry = __ pc();
 +
-+// This file holds the platform specific parts of the StubRoutines
-+// definition. See stubRoutines.hpp for a description on how to
-+// extend it.
++    // Load *adr into c_rarg1, may fault.
++    *fault_pc = __ pc();
++    switch (size) {
++      case 4:
++        // int32_t
++        __ lw(c_rarg1, Address(c_rarg0, 0));
++        break;
++      case 8:
++        // int64_t
++        __ ld(c_rarg1, Address(c_rarg0, 0));
++        break;
++      default:
++        ShouldNotReachHere();
++    }
 +
-+static bool    returns_to_call_stub(address return_pc)   {
-+  return return_pc == _call_stub_return_address;
-+}
++    // return errValue or *adr
++    *continuation_pc = __ pc();
++    __ mv(x10, c_rarg1);
++    __ ret();
++  }
 +
-+enum platform_dependent_constants {
-+  code_size1 = 19000,          // simply increase if too small (assembler will crash if too small)
-+  code_size2 = 36000           // simply increase if too small (assembler will crash if too small)
-+};
++  // code for comparing 16 bytes of strings with same encoding
++  void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
++    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
++    __ ld(tmp5, Address(str1));
++    __ addi(str1, str1, 8);
++    __ xorr(tmp4, tmp1, tmp2);
++    __ ld(cnt1, Address(str2));
++    __ addi(str2, str2, 8);
++    __ bnez(tmp4, DIFF1);
++    __ ld(tmp1, Address(str1));
++    __ addi(str1, str1, 8);
++    __ xorr(tmp4, tmp5, cnt1);
++    __ ld(tmp2, Address(str2));
++    __ addi(str2, str2, 8);
++    __ bnez(tmp4, DIFF2);
++  }
 +
-+class riscv {
-+ friend class StubGenerator;
++  // code for comparing 8 characters of strings with Latin1 and Utf16 encoding
++  void compare_string_8_x_LU(Register tmpL, Register tmpU, Label &DIFF1,
++                              Label &DIFF2) {
++    const Register strU = x12, curU = x7, strL = x29, tmp = x30;
++    __ ld(tmpL, Address(strL));
++    __ addi(strL, strL, 8);
++    __ ld(tmpU, Address(strU));
++    __ addi(strU, strU, 8);
++    __ inflate_lo32(tmp, tmpL);
++    __ mv(t0, tmp);
++    __ xorr(tmp, curU, t0);
++    __ bnez(tmp, DIFF2);
 +
-+ private:
-+  static address _get_previous_fp_entry;
-+  static address _get_previous_sp_entry;
++    __ ld(curU, Address(strU));
++    __ addi(strU, strU, 8);
++    __ inflate_hi32(tmp, tmpL);
++    __ mv(t0, tmp);
++    __ xorr(tmp, tmpU, t0);
++    __ bnez(tmp, DIFF1);
++  }
 +
-+  static address _f2i_fixup;
-+  static address _f2l_fixup;
-+  static address _d2i_fixup;
-+  static address _d2l_fixup;
++  // x10  = result
++  // x11  = str1
++  // x12  = cnt1
++  // x13  = str2
++  // x14  = cnt2
++  // x28  = tmp1
++  // x29  = tmp2
++  // x30  = tmp3
++  address generate_compare_long_string_different_encoding(bool isLU) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", isLU ? "compare_long_string_different_encoding LU" : "compare_long_string_different_encoding UL");
++    address entry = __ pc();
++    Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
++          DONE, CALCULATE_DIFFERENCE;
++    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
++                   tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
++    RegSet spilled_regs = RegSet::of(tmp4, tmp5);
 +
-+  static address _float_sign_mask;
-+  static address _float_sign_flip;
-+  static address _double_sign_mask;
-+  static address _double_sign_flip;
++    // cnt2 == amount of characters left to compare
++    // Check already loaded first 4 symbols
++    __ inflate_lo32(tmp3, isLU ? tmp1 : tmp2);
++    __ mv(isLU ? tmp1 : tmp2, tmp3);
++    __ addi(str1, str1, isLU ? wordSize / 2 : wordSize);
++    __ addi(str2, str2, isLU ? wordSize : wordSize / 2);
++    __ sub(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case.
++    __ push_reg(spilled_regs, sp);
 +
-+  static address _zero_blocks;
++    if (isLU) {
++      __ add(str1, str1, cnt2);
++      __ shadd(str2, cnt2, str2, t0, 1);
++    } else {
++      __ shadd(str1, cnt2, str1, t0, 1);
++      __ add(str2, str2, cnt2);
++    }
++    __ xorr(tmp3, tmp1, tmp2);
++    __ mv(tmp5, tmp2);
++    __ bnez(tmp3, CALCULATE_DIFFERENCE);
 +
-+  static address _has_negatives;
-+  static address _has_negatives_long;
-+  static address _compare_long_string_LL;
-+  static address _compare_long_string_LU;
-+  static address _compare_long_string_UL;
-+  static address _compare_long_string_UU;
-+  static address _string_indexof_linear_ll;
-+  static address _string_indexof_linear_uu;
-+  static address _string_indexof_linear_ul;
-+  static address _large_byte_array_inflate;
-+  static bool _completed;
++    Register strU = isLU ? str2 : str1,
++             strL = isLU ? str1 : str2,
++             tmpU = isLU ? tmp5 : tmp1, // where to keep U for comparison
++             tmpL = isLU ? tmp1 : tmp5; // where to keep L for comparison
 +
-+ public:
++    __ sub(tmp2, strL, cnt2); // strL pointer to load from
++    __ slli(t0, cnt2, 1);
++    __ sub(cnt1, strU, t0); // strU pointer to load from
 +
-+  static address get_previous_fp_entry()
-+  {
-+    return _get_previous_fp_entry;
-+  }
++    __ ld(tmp4, Address(cnt1));
++    __ addi(cnt1, cnt1, 8);
++    __ beqz(cnt2, LOAD_LAST); // no characters left except last load
++    __ sub(cnt2, cnt2, 16);
++    __ bltz(cnt2, TAIL);
++    __ bind(SMALL_LOOP); // smaller loop
++      __ sub(cnt2, cnt2, 16);
++      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
++      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
++      __ bgez(cnt2, SMALL_LOOP);
++      __ addi(t0, cnt2, 16);
++      __ beqz(t0, LOAD_LAST);
++    __ bind(TAIL); // 1..15 characters left until last load (last 4 characters)
++      // Address of 8 bytes before last 4 characters in UTF-16 string
++      __ shadd(cnt1, cnt2, cnt1, t0, 1);
++      // Address of 16 bytes before last 4 characters in Latin1 string
++      __ add(tmp2, tmp2, cnt2);
++      __ ld(tmp4, Address(cnt1, -8));
++      // last 16 characters before last load
++      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
++      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
++      __ j(LOAD_LAST);
++    __ bind(DIFF2);
++      __ mv(tmpU, tmp4);
++    __ bind(DIFF1);
++      __ mv(tmpL, t0);
++      __ j(CALCULATE_DIFFERENCE);
++    __ bind(LOAD_LAST);
++      // Last 4 UTF-16 characters are already pre-loaded into tmp4 by compare_string_8_x_LU.
++      // No need to load it again
++      __ mv(tmpU, tmp4);
++      __ ld(tmpL, Address(strL));
++      __ inflate_lo32(tmp3, tmpL);
++      __ mv(tmpL, tmp3);
++      __ xorr(tmp3, tmpU, tmpL);
++      __ beqz(tmp3, DONE);
 +
-+  static address get_previous_sp_entry()
-+  {
-+    return _get_previous_sp_entry;
++      // Find the first different characters in the longwords and
++      // compute their difference.
++    __ bind(CALCULATE_DIFFERENCE);
++      __ ctzc_bit(tmp4, tmp3);
++      __ srl(tmp1, tmp1, tmp4);
++      __ srl(tmp5, tmp5, tmp4);
++      __ andi(tmp1, tmp1, 0xFFFF);
++      __ andi(tmp5, tmp5, 0xFFFF);
++      __ sub(result, tmp1, tmp5);
++    __ bind(DONE);
++      __ pop_reg(spilled_regs, sp);
++      __ ret();
++    return entry;
 +  }
 +
-+  static address f2i_fixup()
-+  {
-+    return _f2i_fixup;
-+  }
++  address generate_method_entry_barrier() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
 +
-+  static address f2l_fixup()
-+  {
-+    return _f2l_fixup;
-+  }
++    Label deoptimize_label;
 +
-+  static address d2i_fixup()
-+  {
-+    return _d2i_fixup;
-+  }
++    address start = __ pc();
 +
-+  static address d2l_fixup()
-+  {
-+    return _d2l_fixup;
-+  }
++    __ set_last_Java_frame(sp, fp, ra, t0);
 +
-+  static address float_sign_mask()
-+  {
-+    return _float_sign_mask;
-+  }
++    __ enter();
++    __ add(t1, sp, wordSize);
 +
-+  static address float_sign_flip()
-+  {
-+    return _float_sign_flip;
-+  }
++    __ sub(sp, sp, 4 * wordSize);
 +
-+  static address double_sign_mask()
-+  {
-+    return _double_sign_mask;
-+  }
++    __ push_call_clobbered_registers();
 +
-+  static address double_sign_flip()
-+  {
-+    return _double_sign_flip;
-+  }
++    __ mv(c_rarg0, t1);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1);
 +
-+  static address zero_blocks() {
-+    return _zero_blocks;
-+  }
++    __ reset_last_Java_frame(true);
 +
-+  static address has_negatives() {
-+    return _has_negatives;
-+  }
++    __ mv(t0, x10);
 +
-+  static address has_negatives_long() {
-+    return _has_negatives_long;
-+  }
++    __ pop_call_clobbered_registers();
 +
-+  static address compare_long_string_LL() {
-+    return _compare_long_string_LL;
-+  }
++    __ bnez(t0, deoptimize_label);
 +
-+  static address compare_long_string_LU() {
-+    return _compare_long_string_LU;
-+  }
++    __ leave();
++    __ ret();
 +
-+  static address compare_long_string_UL() {
-+    return _compare_long_string_UL;
-+  }
++    __ BIND(deoptimize_label);
 +
-+  static address compare_long_string_UU() {
-+    return _compare_long_string_UU;
-+  }
++    __ ld(t0, Address(sp, 0));
++    __ ld(fp, Address(sp, wordSize));
++    __ ld(ra, Address(sp, wordSize * 2));
++    __ ld(t1, Address(sp, wordSize * 3));
 +
-+  static address string_indexof_linear_ul() {
-+    return _string_indexof_linear_ul;
-+  }
++    __ mv(sp, t0);
++    __ jr(t1);
 +
-+  static address string_indexof_linear_ll() {
-+    return _string_indexof_linear_ll;
++    return start;
 +  }
 +
-+  static address string_indexof_linear_uu() {
-+    return _string_indexof_linear_uu;
-+  }
++  // x10  = result
++  // x11  = str1
++  // x12  = cnt1
++  // x13  = str2
++  // x14  = cnt2
++  // x28  = tmp1
++  // x29  = tmp2
++  // x30  = tmp3
++  // x31  = tmp4
++  address generate_compare_long_string_same_encoding(bool isLL) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", isLL ?
++                      "compare_long_string_same_encoding LL" : "compare_long_string_same_encoding UU");
++    address entry = __ pc();
++    Label SMALL_LOOP, CHECK_LAST, DIFF2, TAIL,
++          LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF;
++    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
++                   tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
++    RegSet spilled_regs = RegSet::of(tmp4, tmp5);
 +
-+  static address large_byte_array_inflate() {
-+    return _large_byte_array_inflate;
++    // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
++    // update cnt2 counter with already loaded 8 bytes
++    __ sub(cnt2, cnt2, wordSize / (isLL ? 1 : 2));
++    // update pointers, because of previous read
++    __ add(str1, str1, wordSize);
++    __ add(str2, str2, wordSize);
++    // less than 16 bytes left?
++    __ sub(cnt2, cnt2, isLL ? 16 : 8);
++    __ push_reg(spilled_regs, sp);
++    __ bltz(cnt2, TAIL);
++    __ bind(SMALL_LOOP);
++      compare_string_16_bytes_same(DIFF, DIFF2);
++      __ sub(cnt2, cnt2, isLL ? 16 : 8);
++      __ bgez(cnt2, SMALL_LOOP);
++    __ bind(TAIL);
++      __ addi(cnt2, cnt2, isLL ? 16 : 8);
++      __ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF);
++      __ sub(cnt2, cnt2, isLL ? 8 : 4);
++      __ blez(cnt2, CHECK_LAST);
++      __ xorr(tmp4, tmp1, tmp2);
++      __ bnez(tmp4, DIFF);
++      __ ld(tmp1, Address(str1));
++      __ addi(str1, str1, 8);
++      __ ld(tmp2, Address(str2));
++      __ addi(str2, str2, 8);
++      __ sub(cnt2, cnt2, isLL ? 8 : 4);
++    __ bind(CHECK_LAST);
++      if (!isLL) {
++        __ add(cnt2, cnt2, cnt2); // now in bytes
++      }
++      __ xorr(tmp4, tmp1, tmp2);
++      __ bnez(tmp4, DIFF);
++      __ add(str1, str1, cnt2);
++      __ ld(tmp5, Address(str1));
++      __ add(str2, str2, cnt2);
++      __ ld(cnt1, Address(str2));
++      __ xorr(tmp4, tmp5, cnt1);
++      __ beqz(tmp4, LENGTH_DIFF);
++      // Find the first different characters in the longwords and
++      // compute their difference.
++    __ bind(DIFF2);
++      __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
++      __ srl(tmp5, tmp5, tmp3);
++      __ srl(cnt1, cnt1, tmp3);
++      if (isLL) {
++        __ andi(tmp5, tmp5, 0xFF);
++        __ andi(cnt1, cnt1, 0xFF);
++      } else {
++        __ andi(tmp5, tmp5, 0xFFFF);
++        __ andi(cnt1, cnt1, 0xFFFF);
++      }
++      __ sub(result, tmp5, cnt1);
++      __ j(LENGTH_DIFF);
++    __ bind(DIFF);
++      __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
++      __ srl(tmp1, tmp1, tmp3);
++      __ srl(tmp2, tmp2, tmp3);
++      if (isLL) {
++        __ andi(tmp1, tmp1, 0xFF);
++        __ andi(tmp2, tmp2, 0xFF);
++      } else {
++        __ andi(tmp1, tmp1, 0xFFFF);
++        __ andi(tmp2, tmp2, 0xFFFF);
++      }
++      __ sub(result, tmp1, tmp2);
++      __ j(LENGTH_DIFF);
++    __ bind(LAST_CHECK_AND_LENGTH_DIFF);
++      __ xorr(tmp4, tmp1, tmp2);
++      __ bnez(tmp4, DIFF);
++    __ bind(LENGTH_DIFF);
++      __ pop_reg(spilled_regs, sp);
++      __ ret();
++    return entry;
 +  }
 +
-+  static bool complete() {
-+    return _completed;
++  void generate_compare_long_strings() {
++    StubRoutines::riscv::_compare_long_string_LL = generate_compare_long_string_same_encoding(true);
++    StubRoutines::riscv::_compare_long_string_UU = generate_compare_long_string_same_encoding(false);
++    StubRoutines::riscv::_compare_long_string_LU = generate_compare_long_string_different_encoding(true);
++    StubRoutines::riscv::_compare_long_string_UL = generate_compare_long_string_different_encoding(false);
 +  }
 +
-+  static void set_completed() {
-+    _completed = true;
-+  }
-+};
++  // x10 result
++  // x11 src
++  // x12 src count
++  // x13 pattern
++  // x14 pattern count
++  address generate_string_indexof_linear(bool needle_isL, bool haystack_isL)
++  {
++    const char* stubName = needle_isL
++           ? (haystack_isL ? "indexof_linear_ll" : "indexof_linear_ul")
++           : "indexof_linear_uu";
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", stubName);
++    address entry = __ pc();
 +
-+#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-new file mode 100644
-index 000000000..f5e212204
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -0,0 +1,1841 @@
-+/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++    int needle_chr_size = needle_isL ? 1 : 2;
++    int haystack_chr_size = haystack_isL ? 1 : 2;
++    int needle_chr_shift = needle_isL ? 0 : 1;
++    int haystack_chr_shift = haystack_isL ? 0 : 1;
++    bool isL = needle_isL && haystack_isL;
++    // parameters
++    Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14;
++    // temporary registers
++    Register mask1 = x20, match_mask = x21, first = x22, trailing_zeros = x23, mask2 = x24, tmp = x25;
++    // redefinitions
++    Register ch1 = x28, ch2 = x29;
++    RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29);
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "classfile/javaClasses.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "interpreter/bytecodeHistogram.hpp"
-+#include "interpreter/bytecodeTracer.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "interpreter/interpreterRuntime.hpp"
-+#include "interpreter/templateInterpreterGenerator.hpp"
-+#include "interpreter/templateTable.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "oops/arrayOop.hpp"
-+#include "oops/method.hpp"
-+#include "oops/methodData.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "prims/jvmtiExport.hpp"
-+#include "prims/jvmtiThreadState.hpp"
-+#include "runtime/arguments.hpp"
-+#include "runtime/deoptimization.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/synchronizer.hpp"
-+#include "runtime/timer.hpp"
-+#include "runtime/vframeArray.hpp"
-+#include "utilities/debug.hpp"
-+#include "utilities/macros.hpp"
-+#include <sys/types.h>
++    __ push_reg(spilled_regs, sp);
 +
-+#ifndef PRODUCT
-+#include "oops/method.hpp"
-+#endif // !PRODUCT
++    Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO,
++          L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED,
++          L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP,
++          L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH,
++          L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2,
++          L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH;
 +
-+// Size of interpreter code.  Increase if too small.  Interpreter will
-+// fail with a guarantee ("not enough space for interpreter generation");
-+// if too small.
-+// Run with +PrintInterpreter to get the VM to print out the size.
-+// Max size with JVMTI
-+int TemplateInterpreter::InterpreterCodeSize = 256 * 1024;
++    __ ld(ch1, Address(needle));
++    __ ld(ch2, Address(haystack));
++    // src.length - pattern.length
++    __ sub(haystack_len, haystack_len, needle_len);
 +
-+#define __ _masm->
++    // first is needle[0]
++    __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first);
++    uint64_t mask0101 = UCONST64(0x0101010101010101);
++    uint64_t mask0001 = UCONST64(0x0001000100010001);
++    __ mv(mask1, haystack_isL ? mask0101 : mask0001);
++    __ mul(first, first, mask1);
++    uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
++    uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
++    __ mv(mask2, haystack_isL ? mask7f7f : mask7fff);
++    if (needle_isL != haystack_isL) {
++      __ mv(tmp, ch1);
++    }
++    __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size - 1);
++    __ blez(haystack_len, L_SMALL);
 +
-+//-----------------------------------------------------------------------------
++    if (needle_isL != haystack_isL) {
++      __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
++    }
++    // xorr, sub, orr, notr, andr
++    // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i]
++    // eg:
++    // first:        aa aa aa aa aa aa aa aa
++    // ch2:          aa aa li nx jd ka aa aa
++    // match_mask:   80 80 00 00 00 00 80 80
++    __ compute_match_mask(ch2, first, match_mask, mask1, mask2);
 +
-+address TemplateInterpreterGenerator::generate_slow_signature_handler() {
-+  address entry = __ pc();
++    // search first char of needle, if success, goto L_HAS_ZERO;
++    __ bnez(match_mask, L_HAS_ZERO);
++    __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
++    __ add(result, result, wordSize / haystack_chr_size);
++    __ add(haystack, haystack, wordSize);
++    __ bltz(haystack_len, L_POST_LOOP);
 +
-+  __ andi(esp, esp, -16);
-+  __ mv(c_rarg3, esp);
-+  // xmethod
-+  // xlocals
-+  // c_rarg3: first stack arg - wordSize
-+  // adjust sp
++    __ bind(L_LOOP);
++    __ ld(ch2, Address(haystack));
++    __ compute_match_mask(ch2, first, match_mask, mask1, mask2);
++    __ bnez(match_mask, L_HAS_ZERO);
 +
-+  __ addi(sp, c_rarg3, -18 * wordSize);
-+  __ addi(sp, sp, -2 * wordSize);
-+  __ sd(ra, Address(sp, 0));
++    __ bind(L_LOOP_PROCEED);
++    __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
++    __ add(haystack, haystack, wordSize);
++    __ add(result, result, wordSize / haystack_chr_size);
++    __ bgez(haystack_len, L_LOOP);
 +
-+  __ call_VM(noreg,
-+             CAST_FROM_FN_PTR(address,
-+                              InterpreterRuntime::slow_signature_handler),
-+             xmethod, xlocals, c_rarg3);
++    __ bind(L_POST_LOOP);
++    __ mv(ch2, -wordSize / haystack_chr_size);
++    __ ble(haystack_len, ch2, NOMATCH); // no extra characters to check
++    __ ld(ch2, Address(haystack));
++    __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
++    __ neg(haystack_len, haystack_len);
++    __ xorr(ch2, first, ch2);
++    __ sub(match_mask, ch2, mask1);
++    __ orr(ch2, ch2, mask2);
++    __ mv(trailing_zeros, -1); // all bits set
++    __ j(L_SMALL_PROCEED);
 +
-+  // x10: result handler
++    __ align(OptoLoopAlignment);
++    __ bind(L_SMALL);
++    __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
++    __ neg(haystack_len, haystack_len);
++    if (needle_isL != haystack_isL) {
++      __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
++    }
++    __ xorr(ch2, first, ch2);
++    __ sub(match_mask, ch2, mask1);
++    __ orr(ch2, ch2, mask2);
++    __ mv(trailing_zeros, -1); // all bits set
 +
-+  // Stack layout:
-+  // sp: return address           <- sp
-+  //      1 garbage
-+  //      8 integer args (if static first is unused)
-+  //      1 float/double identifiers
-+  //      8 double args
-+  //        stack args              <- esp
-+  //        garbage
-+  //        expression stack bottom
-+  //        bcp (NULL)
-+  //        ...
++    __ bind(L_SMALL_PROCEED);
++    __ srl(trailing_zeros, trailing_zeros, haystack_len); // mask. zeroes on useless bits.
++    __ notr(ch2, ch2);
++    __ andr(match_mask, match_mask, ch2);
++    __ andr(match_mask, match_mask, trailing_zeros); // clear useless bits and check
++    __ beqz(match_mask, NOMATCH);
 +
-+  // Restore RA
-+  __ ld(ra, Address(sp, 0));
-+  __ addi(sp, sp , 2 * wordSize);
++    __ bind(L_SMALL_HAS_ZERO_LOOP);
++    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, ch2, tmp); // count trailing zeros
++    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
++    __ mv(ch2, wordSize / haystack_chr_size);
++    __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2);
++    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
++    __ mv(trailing_zeros, wordSize / haystack_chr_size);
++    __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
 +
-+  // Do FP first so we can use c_rarg3 as temp
-+  __ lwu(c_rarg3, Address(sp, 9 * wordSize)); // float/double identifiers
++    __ bind(L_SMALL_CMP_LOOP);
++    __ shadd(first, trailing_zeros, needle, first, needle_chr_shift);
++    __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
++    needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first));
++    haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
++    __ add(trailing_zeros, trailing_zeros, 1);
++    __ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP);
++    __ beq(first, ch2, L_SMALL_CMP_LOOP);
 +
-+  for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
-+    const FloatRegister r = g_FPArgReg[i];
-+    Label d, done;
++    __ bind(L_SMALL_CMP_LOOP_NOMATCH);
++    __ beqz(match_mask, NOMATCH);
++    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
++    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
++    __ add(result, result, 1);
++    __ add(haystack, haystack, haystack_chr_size);
++    __ j(L_SMALL_HAS_ZERO_LOOP);
 +
-+    __ andi(t0, c_rarg3, 1UL << i);
-+    __ bnez(t0, d);
-+    __ flw(r, Address(sp, (10 + i) * wordSize));
-+    __ j(done);
-+    __ bind(d);
-+    __ fld(r, Address(sp, (10 + i) * wordSize));
-+    __ bind(done);
-+  }
++    __ align(OptoLoopAlignment);
++    __ bind(L_SMALL_CMP_LOOP_LAST_CMP);
++    __ bne(first, ch2, L_SMALL_CMP_LOOP_NOMATCH);
++    __ j(DONE);
 +
-+  // c_rarg0 contains the result from the call of
-+  // InterpreterRuntime::slow_signature_handler so we don't touch it
-+  // here.  It will be loaded with the JNIEnv* later.
-+  for (int i = 1; i < Argument::n_int_register_parameters_c; i++) {
-+    const Register rm = g_INTArgReg[i];
-+    __ ld(rm, Address(sp, i * wordSize));
-+  }
++    __ align(OptoLoopAlignment);
++    __ bind(L_SMALL_CMP_LOOP_LAST_CMP2);
++    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
++    __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
++    __ j(DONE);
 +
-+  __ addi(sp, sp, 18 * wordSize);
-+  __ ret();
++    __ align(OptoLoopAlignment);
++    __ bind(L_HAS_ZERO);
++    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
++    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
++    __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2);
++    __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits)
++    __ sub(result, result, 1); // array index from 0, so result -= 1
 +
-+  return entry;
-+}
++    __ bind(L_HAS_ZERO_LOOP);
++    __ mv(needle_len, wordSize / haystack_chr_size);
++    __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2);
++    __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2);
++    // load next 8 bytes from haystack, and increase result index
++    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
++    __ add(result, result, 1);
++    __ mv(trailing_zeros, wordSize / haystack_chr_size);
++    __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
 +
-+// Various method entries
-+address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
-+  // xmethod: Method*
-+  // x30: sender sp
-+  // esp: args
++    // compare one char
++    __ bind(L_CMP_LOOP);
++    __ shadd(needle_len, trailing_zeros, needle, needle_len, needle_chr_shift);
++    needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len));
++    __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
++    haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
++    __ add(trailing_zeros, trailing_zeros, 1); // next char index
++    __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2);
++    __ bge(trailing_zeros, tmp, L_CMP_LOOP_LAST_CMP);
++    __ beq(needle_len, ch2, L_CMP_LOOP);
 +
-+  if (!InlineIntrinsics) {
-+    return NULL; // Generate a vanilla entry
-+  }
++    __ bind(L_CMP_LOOP_NOMATCH);
++    __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH);
++    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index
++    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
++    __ add(haystack, haystack, haystack_chr_size);
++    __ j(L_HAS_ZERO_LOOP);
 +
-+  // These don't need a safepoint check because they aren't virtually
-+  // callable. We won't enter these intrinsics from compiled code.
-+  // If in the future we added an intrinsic which was virtually callable
-+  // we'd have to worry about how to safepoint so that this code is used.
++    __ align(OptoLoopAlignment);
++    __ bind(L_CMP_LOOP_LAST_CMP);
++    __ bne(needle_len, ch2, L_CMP_LOOP_NOMATCH);
++    __ j(DONE);
 +
-+  // mathematical functions inlined by compiler
-+  // (interpreter must provide identical implementation
-+  // in order to avoid monotonicity bugs when switching
-+  // from interpreter to compiler in the middle of some
-+  // computation)
-+  //
-+  // stack:
-+  //        [ arg ] <-- esp
-+  //        [ arg ]
-+  // retaddr in ra
++    __ align(OptoLoopAlignment);
++    __ bind(L_CMP_LOOP_LAST_CMP2);
++    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
++    __ add(result, result, 1);
++    __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
++    __ j(DONE);
 +
-+  address fn = NULL;
-+  address entry_point = NULL;
-+  Register continuation = ra;
-+  switch (kind) {
-+    case Interpreter::java_lang_math_abs:
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ fabs_d(f10, f10);
-+      __ mv(sp, x30); // Restore caller's SP
-+      break;
-+    case Interpreter::java_lang_math_sqrt:
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ fsqrt_d(f10, f10);
-+      __ mv(sp, x30);
-+      break;
-+    case Interpreter::java_lang_math_sin :
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ mv(sp, x30);
-+      __ mv(x9, ra);
-+      continuation = x9;  // The first callee-saved register
-+      if (StubRoutines::dsin() == NULL) {
-+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
-+      } else {
-+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin());
-+      }
-+      __ mv(t0, fn);
-+      __ jalr(t0);
-+      break;
-+    case Interpreter::java_lang_math_cos :
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ mv(sp, x30);
-+      __ mv(x9, ra);
-+      continuation = x9;  // The first callee-saved register
-+      if (StubRoutines::dcos() == NULL) {
-+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
-+      } else {
-+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos());
-+      }
-+      __ mv(t0, fn);
-+      __ jalr(t0);
-+      break;
-+    case Interpreter::java_lang_math_tan :
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ mv(sp, x30);
-+      __ mv(x9, ra);
-+      continuation = x9;  // The first callee-saved register
-+      if (StubRoutines::dtan() == NULL) {
-+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
-+      } else {
-+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan());
-+      }
-+      __ mv(t0, fn);
-+      __ jalr(t0);
-+      break;
-+    case Interpreter::java_lang_math_log :
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ mv(sp, x30);
-+      __ mv(x9, ra);
-+      continuation = x9;  // The first callee-saved register
-+      if (StubRoutines::dlog() == NULL) {
-+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
-+      } else {
-+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog());
-+      }
-+      __ mv(t0, fn);
-+      __ jalr(t0);
-+      break;
-+    case Interpreter::java_lang_math_log10 :
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ mv(sp, x30);
-+      __ mv(x9, ra);
-+      continuation = x9;  // The first callee-saved register
-+      if (StubRoutines::dlog10() == NULL) {
-+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
-+      } else {
-+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10());
-+      }
-+      __ mv(t0, fn);
-+      __ jalr(t0);
-+      break;
-+    case Interpreter::java_lang_math_exp :
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ mv(sp, x30);
-+      __ mv(x9, ra);
-+      continuation = x9;  // The first callee-saved register
-+      if (StubRoutines::dexp() == NULL) {
-+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
-+      } else {
-+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp());
-+      }
-+      __ mv(t0, fn);
-+      __ jalr(t0);
-+      break;
-+    case Interpreter::java_lang_math_pow :
-+      entry_point = __ pc();
-+      __ mv(x9, ra);
-+      continuation = x9;
-+      __ fld(f10, Address(esp, 2 * Interpreter::stackElementSize));
-+      __ fld(f11, Address(esp));
-+      __ mv(sp, x30);
-+      if (StubRoutines::dpow() == NULL) {
-+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
-+      } else {
-+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow());
-+      }
-+      __ mv(t0, fn);
-+      __ jalr(t0);
-+      break;
-+    case Interpreter::java_lang_math_fmaD :
-+      if (UseFMA) {
-+        entry_point = __ pc();
-+        __ fld(f10, Address(esp, 4 * Interpreter::stackElementSize));
-+        __ fld(f11, Address(esp, 2 * Interpreter::stackElementSize));
-+        __ fld(f12, Address(esp));
-+        __ fmadd_d(f10, f10, f11, f12);
-+        __ mv(sp, x30); // Restore caller's SP
-+      }
-+      break;
-+    case Interpreter::java_lang_math_fmaF :
-+      if (UseFMA) {
-+        entry_point = __ pc();
-+        __ flw(f10, Address(esp, 2 * Interpreter::stackElementSize));
-+        __ flw(f11, Address(esp, Interpreter::stackElementSize));
-+        __ flw(f12, Address(esp));
-+        __ fmadd_s(f10, f10, f11, f12);
-+        __ mv(sp, x30); // Restore caller's SP
-+      }
-+      break;
-+    default:
-+      ;
++    __ align(OptoLoopAlignment);
++    __ bind(L_HAS_ZERO_LOOP_NOMATCH);
++    // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until
++    // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP,
++    // so, result was increased at max by wordSize/str2_chr_size - 1, so,
++    // respective high bit wasn't changed. L_LOOP_PROCEED will increase
++    // result by analyzed characters value, so, we can just reset lower bits
++    // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL
++    // 2) restore needle_len and haystack_len values from "compressed" haystack_len
++    // 3) advance haystack value to represent next haystack octet. result & 7/3 is
++    // index of last analyzed substring inside current octet. So, haystack in at
++    // respective start address. We need to advance it to next octet
++    __ andi(match_mask, result, wordSize / haystack_chr_size - 1);
++    __ srli(needle_len, haystack_len, BitsPerByte * wordSize / 2);
++    __ andi(result, result, haystack_isL ? -8 : -4);
++    __ slli(tmp, match_mask, haystack_chr_shift);
++    __ sub(haystack, haystack, tmp);
++    __ addw(haystack_len, haystack_len, zr);
++    __ j(L_LOOP_PROCEED);
++
++    __ align(OptoLoopAlignment);
++    __ bind(NOMATCH);
++    __ mv(result, -1);
++
++    __ bind(DONE);
++    __ pop_reg(spilled_regs, sp);
++    __ ret();
++    return entry;
 +  }
-+  if (entry_point != NULL) {
-+    __ jr(continuation);
++
++  void generate_string_indexof_stubs()
++  {
++    StubRoutines::riscv::_string_indexof_linear_ll = generate_string_indexof_linear(true, true);
++    StubRoutines::riscv::_string_indexof_linear_uu = generate_string_indexof_linear(false, false);
++    StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false);
 +  }
 +
-+  return entry_point;
-+}
++#ifdef COMPILER2
++  address generate_mulAdd()
++  {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "mulAdd");
 +
-+// Abstract method entry
-+// Attempt to execute abstract method. Throw exception
-+address TemplateInterpreterGenerator::generate_abstract_entry(void) {
-+  // xmethod: Method*
-+  // x30: sender SP
++    address entry = __ pc();
 +
-+  address entry_point = __ pc();
++    const Register out     = x10;
++    const Register in      = x11;
++    const Register offset  = x12;
++    const Register len     = x13;
++    const Register k       = x14;
++    const Register tmp     = x28;
 +
-+  // abstract method entry
++    BLOCK_COMMENT("Entry:");
++    __ enter();
++    __ mul_add(out, in, offset, len, k, tmp);
++    __ leave();
++    __ ret();
 +
-+  //  pop return address, reset last_sp to NULL
-+  __ empty_expression_stack();
-+  __ restore_bcp();      // bcp must be correct for exception handler   (was destroyed)
-+  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
++    return entry;
++  }
 +
-+  // throw exception
-+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                     InterpreterRuntime::throw_AbstractMethodErrorWithMethod),
-+                                     xmethod);
-+  // the call_VM checks for exception, so we should never return here.
-+  __ should_not_reach_here();
++  /**
++   *  Arguments:
++   *
++   *  Input:
++   *    c_rarg0   - x address
++   *    c_rarg1   - x length
++   *    c_rarg2   - y address
++   *    c_rarg3   - y length
++   *    c_rarg4   - z address
++   *    c_rarg5   - z length
++   */
++  address generate_multiplyToLen()
++  {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
++    address entry = __ pc();
 +
-+  return entry_point;
-+}
++    const Register x     = x10;
++    const Register xlen  = x11;
++    const Register y     = x12;
++    const Register ylen  = x13;
++    const Register z     = x14;
++    const Register zlen  = x15;
 +
-+address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
-+  address entry = __ pc();
++    const Register tmp1  = x16;
++    const Register tmp2  = x17;
++    const Register tmp3  = x7;
++    const Register tmp4  = x28;
++    const Register tmp5  = x29;
++    const Register tmp6  = x30;
++    const Register tmp7  = x31;
 +
-+#ifdef ASSERT
-+  {
-+    Label L;
-+    __ ld(t0, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
-+    __ mv(t1, sp);
-+    // maximal sp for current fp (stack grows negative)
-+    // check if frame is complete
-+    __ bge(t0, t1, L);
-+    __ stop ("interpreter frame not set up");
-+    __ bind(L);
-+  }
-+#endif // ASSERT
-+  // Restore bcp under the assumption that the current frame is still
-+  // interpreted
-+  __ restore_bcp();
++    BLOCK_COMMENT("Entry:");
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret();
 +
-+  // expression stack must be empty before entering the VM if an
-+  // exception happened
-+  __ empty_expression_stack();
-+  // throw exception
-+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
-+  return entry;
-+}
++    return entry;
++  }
 +
-+address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
-+  address entry = __ pc();
-+  // expression stack must be empty before entering the VM if an
-+  // exception happened
-+  __ empty_expression_stack();
-+  // setup parameters
++  address generate_squareToLen()
++  {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "squareToLen");
++    address entry = __ pc();
 +
-+  // convention: expect aberrant index in register x11
-+  __ zero_extend(c_rarg2, x11, 32);
-+  // convention: expect array in register x13
-+  __ mv(c_rarg1, x13);
-+  __ call_VM(noreg,
-+             CAST_FROM_FN_PTR(address,
-+                              InterpreterRuntime::
-+                              throw_ArrayIndexOutOfBoundsException),
-+             c_rarg1, c_rarg2);
-+  return entry;
-+}
++    const Register x     = x10;
++    const Register xlen  = x11;
++    const Register z     = x12;
++    const Register zlen  = x13;
++    const Register y     = x14; // == x
++    const Register ylen  = x15; // == xlen
 +
-+address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
-+  address entry = __ pc();
++    const Register tmp1  = x16;
++    const Register tmp2  = x17;
++    const Register tmp3  = x7;
++    const Register tmp4  = x28;
++    const Register tmp5  = x29;
++    const Register tmp6  = x30;
++    const Register tmp7  = x31;
 +
-+  // object is at TOS
-+  __ pop_reg(c_rarg1);
++    BLOCK_COMMENT("Entry:");
++    __ enter();
++    __ mv(y, x);
++    __ mv(ylen, xlen);
++    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
++    __ leave();
++    __ ret();
 +
-+  // expression stack must be empty before entering the VM if an
-+  // exception happened
-+  __ empty_expression_stack();
++    return entry;
++  }
 +
-+  __ call_VM(noreg,
-+             CAST_FROM_FN_PTR(address,
-+                              InterpreterRuntime::
-+                              throw_ClassCastException),
-+             c_rarg1);
-+  return entry;
-+}
++  // Arguments:
++  //
++  // Input:
++  //   c_rarg0   - newArr address
++  //   c_rarg1   - oldArr address
++  //   c_rarg2   - newIdx
++  //   c_rarg3   - shiftCount
++  //   c_rarg4   - numIter
++  //
++  address generate_bigIntegerLeftShift() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker");
++    address entry = __ pc();
 +
-+address TemplateInterpreterGenerator::generate_exception_handler_common(
-+  const char* name, const char* message, bool pass_oop) {
-+  assert(!pass_oop || message == NULL, "either oop or message but not both");
-+  address entry = __ pc();
-+  if (pass_oop) {
-+    // object is at TOS
-+    __ pop_reg(c_rarg2);
-+  }
-+  // expression stack must be empty before entering the VM if an
-+  // exception happened
-+  __ empty_expression_stack();
-+  // setup parameters
-+  __ la(c_rarg1, Address((address)name));
-+  if (pass_oop) {
-+    __ call_VM(x10, CAST_FROM_FN_PTR(address,
-+                                     InterpreterRuntime::
-+                                     create_klass_exception),
-+               c_rarg1, c_rarg2);
-+  } else {
-+    // kind of lame ExternalAddress can't take NULL because
-+    // external_word_Relocation will assert.
-+    if (message != NULL) {
-+      __ la(c_rarg2, Address((address)message));
-+    } else {
-+      __ mv(c_rarg2, NULL_WORD);
-+    }
-+    __ call_VM(x10,
-+               CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception),
-+               c_rarg1, c_rarg2);
-+  }
-+  // throw exception
-+  __ j(address(Interpreter::throw_exception_entry()));
-+  return entry;
-+}
++    Label loop, exit;
 +
-+address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
-+  address entry = __ pc();
++    Register newArr        = c_rarg0;
++    Register oldArr        = c_rarg1;
++    Register newIdx        = c_rarg2;
++    Register shiftCount    = c_rarg3;
++    Register numIter       = c_rarg4;
 +
-+  // Restore stack bottom in case i2c adjusted stack
-+  __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  // and NULL it as marker that esp is now tos until next java call
-+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  __ restore_bcp();
-+  __ restore_locals();
-+  __ restore_constant_pool_cache();
-+  __ get_method(xmethod);
++    Register shiftRevCount = c_rarg5;
++    Register oldArrNext    = t1;
 +
-+  if (state == atos) {
-+    Register obj = x10;
-+    Register mdp = x11;
-+    Register tmp = x12;
-+    __ ld(mdp, Address(xmethod, Method::method_data_offset()));
-+    __ profile_return_type(mdp, obj, tmp);
-+  }
++    __ beqz(numIter, exit);
++    __ shadd(newArr, newIdx, newArr, t0, 2);
 +
-+  // Pop N words from the stack
-+  __ get_cache_and_index_at_bcp(x11, x12, 1, index_size);
-+  __ ld(x11, Address(x11, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
-+  __ andi(x11, x11, ConstantPoolCacheEntry::parameter_size_mask);
++    __ li(shiftRevCount, 32);
++    __ sub(shiftRevCount, shiftRevCount, shiftCount);
 +
-+  __ shadd(esp, x11, esp, t0, 3);
++    __ bind(loop);
++    __ addi(oldArrNext, oldArr, 4);
++    __ vsetvli(t0, numIter, Assembler::e32, Assembler::m4);
++    __ vle32_v(v0, oldArr);
++    __ vle32_v(v4, oldArrNext);
++    __ vsll_vx(v0, v0, shiftCount);
++    __ vsrl_vx(v4, v4, shiftRevCount);
++    __ vor_vv(v0, v0, v4);
++    __ vse32_v(v0, newArr);
++    __ sub(numIter, numIter, t0);
++    __ shadd(oldArr, t0, oldArr, t1, 2);
++    __ shadd(newArr, t0, newArr, t1, 2);
++    __ bnez(numIter, loop);
 +
-+  // Restore machine SP
-+  __ ld(t0, Address(xmethod, Method::const_offset()));
-+  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
-+  __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2);
-+  __ ld(t1,
-+        Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
-+  __ slli(t0, t0, 3);
-+  __ sub(t0, t1, t0);
-+  __ andi(sp, t0, -16);
++    __ bind(exit);
++    __ ret();
 +
-+ __ check_and_handle_popframe(xthread);
-+ __ check_and_handle_earlyret(xthread);
++    return entry;
++  }
 +
-+  __ get_dispatch();
-+  __ dispatch_next(state, step);
++  // Arguments:
++  //
++  // Input:
++  //   c_rarg0   - newArr address
++  //   c_rarg1   - oldArr address
++  //   c_rarg2   - newIdx
++  //   c_rarg3   - shiftCount
++  //   c_rarg4   - numIter
++  //
++  address generate_bigIntegerRightShift() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
++    address entry = __ pc();
 +
-+  return entry;
-+}
++    Label loop, exit;
 +
-+address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
-+                                                               int step,
-+                                                               address continuation) {
-+  address entry = __ pc();
-+  __ restore_bcp();
-+  __ restore_locals();
-+  __ restore_constant_pool_cache();
-+  __ get_method(xmethod);
-+  __ get_dispatch();
++    Register newArr        = c_rarg0;
++    Register oldArr        = c_rarg1;
++    Register newIdx        = c_rarg2;
++    Register shiftCount    = c_rarg3;
++    Register numIter       = c_rarg4;
++    Register idx           = numIter;
 +
-+  // Calculate stack limit
-+  __ ld(t0, Address(xmethod, Method::const_offset()));
-+  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
-+  __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2);
-+  __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
-+  __ slli(t0, t0, 3);
-+  __ sub(t0, t1, t0);
-+  __ andi(sp, t0, -16);
++    Register shiftRevCount = c_rarg5;
++    Register oldArrNext    = c_rarg6;
++    Register newArrCur     = t0;
++    Register oldArrCur     = t1;
 +
-+  // Restore expression stack pointer
-+  __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  // NULL last_sp until next java call
-+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++    __ beqz(idx, exit);
++    __ shadd(newArr, newIdx, newArr, t0, 2);
 +
-+  // handle exceptions
-+  {
-+    Label L;
-+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+    __ beqz(t0, L);
-+    __ call_VM(noreg,
-+               CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
-+    __ should_not_reach_here();
-+    __ bind(L);
-+  }
++    __ li(shiftRevCount, 32);
++    __ sub(shiftRevCount, shiftRevCount, shiftCount);
 +
-+  if (continuation == NULL) {
-+    __ dispatch_next(state, step);
-+  } else {
-+    __ jump_to_entry(continuation);
-+  }
-+  return entry;
-+}
++    __ bind(loop);
++    __ vsetvli(t0, idx, Assembler::e32, Assembler::m4);
++    __ sub(idx, idx, t0);
++    __ shadd(oldArrNext, idx, oldArr, t1, 2);
++    __ shadd(newArrCur, idx, newArr, t1, 2);
++    __ addi(oldArrCur, oldArrNext, 4);
++    __ vle32_v(v0, oldArrCur);
++    __ vle32_v(v4, oldArrNext);
++    __ vsrl_vx(v0, v0, shiftCount);
++    __ vsll_vx(v4, v4, shiftRevCount);
++    __ vor_vv(v0, v0, v4);
++    __ vse32_v(v0, newArrCur);
++    __ bnez(idx, loop);
 +
-+address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) {
-+  address entry = __ pc();
-+  if (type == T_OBJECT) {
-+    // retrieve result from frame
-+    __ ld(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
-+    // and verify it
-+    __ verify_oop(x10);
-+  } else {
-+   __ cast_primitive_type(type, x10);
++    __ bind(exit);
++    __ ret();
++
++    return entry;
 +  }
++#endif
 +
-+  __ ret();                                  // return from result handler
-+  return entry;
-+}
++#ifdef COMPILER2
++  class MontgomeryMultiplyGenerator : public MacroAssembler {
 +
-+address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state,
-+                                                                address runtime_entry) {
-+  assert_cond(runtime_entry != NULL);
-+  address entry = __ pc();
-+  __ push(state);
-+  __ call_VM(noreg, runtime_entry);
-+  __ membar(MacroAssembler::AnyAny);
-+  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
-+  return entry;
-+}
++    Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
++      Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2, Ri, Rj;
 +
-+// Helpers for commoning out cases in the various type of method entries.
-+//
++    RegSet _toSave;
++    bool _squaring;
 +
++  public:
++    MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
++      : MacroAssembler(as->code()), _squaring(squaring) {
 +
-+// increment invocation count & check for overflow
-+//
-+// Note: checking for negative value instead of overflow
-+//       so we have a 'sticky' overflow test
-+//
-+// xmethod: method
-+//
-+void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow,
-+                                                         Label* profile_method,
-+                                                         Label* profile_method_continue) {
-+  Label done;
-+  // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
-+  if (TieredCompilation) {
-+    int increment = InvocationCounter::count_increment;
-+    Label no_mdo;
-+    if (ProfileInterpreter) {
-+      // Are we profiling?
-+      __ ld(x10, Address(xmethod, Method::method_data_offset()));
-+      __ beqz(x10, no_mdo);
-+      // Increment counter in the MDO
-+      const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
-+                                           in_bytes(InvocationCounter::counter_offset()));
-+      const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
-+      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
-+      __ j(done);
-+    }
-+    __ bind(no_mdo);
-+    // Increment counter in MethodCounters
-+    const Address invocation_counter(t1,
-+                                     MethodCounters::invocation_counter_offset() +
-+                                     InvocationCounter::counter_offset());
-+    __ get_method_counters(xmethod, t1, done);
-+    const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
-+    __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
-+    __ bind(done);
-+  } else { // not TieredCompilation
-+    const Address backedge_counter(t1,
-+                                   MethodCounters::backedge_counter_offset() +
-+                                   InvocationCounter::counter_offset());
-+    const Address invocation_counter(t1,
-+                                     MethodCounters::invocation_counter_offset() +
-+                                     InvocationCounter::counter_offset());
++      // Register allocation
 +
-+    __ get_method_counters(xmethod, t1, done);
++      Register reg = c_rarg0;
++      Pa_base = reg;       // Argument registers
++      if (squaring) {
++        Pb_base = Pa_base;
++      } else {
++        Pb_base = ++reg;
++      }
++      Pn_base = ++reg;
++      Rlen= ++reg;
++      inv = ++reg;
++      Pm_base = ++reg;
 +
-+    if (ProfileInterpreter) { // %%% Merge this into MethodData*
-+      __ lwu(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
-+      __ addw(x11, x11, 1);
-+      __ sw(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
-+    }
-+    // Update standard invocation counters
-+    __ lwu(x11, invocation_counter);
-+    __ lwu(x10, backedge_counter);
++                        // Working registers:
++      Ra =  ++reg;      // The current digit of a, b, n, and m.
++      Rb =  ++reg;
++      Rm =  ++reg;
++      Rn =  ++reg;
 +
-+    __ addw(x11, x11, InvocationCounter::count_increment);
-+    __ andi(x10, x10, InvocationCounter::count_mask_value);
++      Pa =  ++reg;      // Pointers to the current/next digit of a, b, n, and m.
++      Pb =  ++reg;
++      Pm =  ++reg;
++      Pn =  ++reg;
 +
-+    __ sw(x11, invocation_counter);
-+    __ addw(x10, x10, x11);                // add both counters
++      tmp0 =  ++reg;    // Three registers which form a
++      tmp1 =  ++reg;    // triple-precision accumuator.
++      tmp2 =  ++reg;
 +
-+    // profile_method is non-null only for interpreted method so
-+    // profile_method != NULL == !native_call
++      Ri =  x6;         // Inner and outer loop indexes.
++      Rj =  x7;
 +
-+    if (ProfileInterpreter && profile_method != NULL) {
-+      // Test to see if we should create a method data oop
-+      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
-+      __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
-+      __ blt(x10, t1, *profile_method_continue);
++      Rhi_ab = x28;     // Product registers: low and high parts
++      Rlo_ab = x29;     // of a*b and m*n.
++      Rhi_mn = x30;
++      Rlo_mn = x31;
 +
-+      // if no method data exists, go to profile_method
-+      __ test_method_data_pointer(t1, *profile_method);
++      // x18 and up are callee-saved.
++      _toSave = RegSet::range(x18, reg) + Pm_base;
 +    }
 +
-+    {
-+      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
-+      __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset())));
-+      __ bltu(x10, t1, done);
-+      __ j(*overflow); // offset is too large so we have to use j instead of bgeu here
++  private:
++    void save_regs() {
++      push_reg(_toSave, sp);
 +    }
-+    __ bind(done);
-+  }
-+}
 +
-+void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
-+  __ mv(c_rarg1, zr);
-+  __ call_VM(noreg,
-+             CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), c_rarg1);
-+  __ j(do_continue);
-+}
++    void restore_regs() {
++      pop_reg(_toSave, sp);
++    }
 +
-+// See if we've got enough room on the stack for locals plus overhead
-+// below JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError
-+// without going through the signal handler, i.e., reserved and yellow zones
-+// will not be made usable. The shadow zone must suffice to handle the
-+// overflow.
-+// The expression stack grows down incrementally, so the normal guard
-+// page mechanism will work for that.
-+//
-+// NOTE: Since the additional locals are also always pushed (wasn't
-+// obvious in generate_method_entry) so the guard should work for them
-+// too.
-+//
-+// Args:
-+//      x13: number of additional locals this frame needs (what we must check)
-+//      xmethod: Method*
-+//
-+// Kills:
-+//      x10
-+void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
++    template <typename T>
++    void unroll_2(Register count, T block) {
++      Label loop, end, odd;
++      beqz(count, end);
++      andi(t0, count, 0x1);
++      bnez(t0, odd);
++      align(16);
++      bind(loop);
++      (this->*block)();
++      bind(odd);
++      (this->*block)();
++      addi(count, count, -2);
++      bgtz(count, loop);
++      bind(end);
++    }
 +
-+  // monitor entry size: see picture of stack set
-+  // (generate_method_entry) and frame_amd64.hpp
-+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++    template <typename T>
++    void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
++      Label loop, end, odd;
++      beqz(count, end);
++      andi(tmp, count, 0x1);
++      bnez(tmp, odd);
++      align(16);
++      bind(loop);
++      (this->*block)(d, s, tmp);
++      bind(odd);
++      (this->*block)(d, s, tmp);
++      addi(count, count, -2);
++      bgtz(count, loop);
++      bind(end);
++    }
 +
-+  // total overhead size: entry_size + (saved fp through expr stack
-+  // bottom).  be sure to change this if you add/subtract anything
-+  // to/from the overhead area
-+  const int overhead_size =
-+    -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size;
++    void pre1(RegisterOrConstant i) {
++      block_comment("pre1");
++      // Pa = Pa_base;
++      // Pb = Pb_base + i;
++      // Pm = Pm_base;
++      // Pn = Pn_base + i;
++      // Ra = *Pa;
++      // Rb = *Pb;
++      // Rm = *Pm;
++      // Rn = *Pn;
++      if (i.is_register()) {
++        slli(t0, i.as_register(), LogBytesPerWord);
++      } else {
++        mv(t0, i.as_constant());
++        slli(t0, t0, LogBytesPerWord);
++      }
 +
-+  const int page_size = os::vm_page_size();
++      mv(Pa, Pa_base);
++      add(Pb, Pb_base, t0);
++      mv(Pm, Pm_base);
++      add(Pn, Pn_base, t0);
 +
-+  Label after_frame_check;
++      ld(Ra, Address(Pa));
++      ld(Rb, Address(Pb));
++      ld(Rm, Address(Pm));
++      ld(Rn, Address(Pn));
 +
-+  // see if the frame is greater than one page in size. If so,
-+  // then we need to verify there is enough stack space remaining
-+  // for the additional locals.
-+  __ mv(t0, (page_size - overhead_size) / Interpreter::stackElementSize);
-+  __ bleu(x13, t0, after_frame_check);
++      // Zero the m*n result.
++      mv(Rhi_mn, zr);
++      mv(Rlo_mn, zr);
++    }
 +
-+  // compute sp as if this were going to be the last frame on
-+  // the stack before the red zone
++    // The core multiply-accumulate step of a Montgomery
++    // multiplication.  The idea is to schedule operations as a
++    // pipeline so that instructions with long latencies (loads and
++    // multiplies) have time to complete before their results are
++    // used.  This most benefits in-order implementations of the
++    // architecture but out-of-order ones also benefit.
++    void step() {
++      block_comment("step");
++      // MACC(Ra, Rb, tmp0, tmp1, tmp2);
++      // Ra = *++Pa;
++      // Rb = *--Pb;
++      mulhu(Rhi_ab, Ra, Rb);
++      mul(Rlo_ab, Ra, Rb);
++      addi(Pa, Pa, wordSize);
++      ld(Ra, Address(Pa));
++      addi(Pb, Pb, -wordSize);
++      ld(Rb, Address(Pb));
++      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the
++                                            // previous iteration.
++      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
++      // Rm = *++Pm;
++      // Rn = *--Pn;
++      mulhu(Rhi_mn, Rm, Rn);
++      mul(Rlo_mn, Rm, Rn);
++      addi(Pm, Pm, wordSize);
++      ld(Rm, Address(Pm));
++      addi(Pn, Pn, -wordSize);
++      ld(Rn, Address(Pn));
++      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
++    }
 +
-+  // locals + overhead, in bytes
-+  __ mv(x10, overhead_size);
-+  __ shadd(x10, x13, x10, t0, Interpreter::logStackElementSize);  // 2 slots per parameter.
++    void post1() {
++      block_comment("post1");
 +
-+  const Address stack_limit(xthread, JavaThread::stack_overflow_limit_offset());
-+  __ ld(t0, stack_limit);
++      // MACC(Ra, Rb, tmp0, tmp1, tmp2);
++      // Ra = *++Pa;
++      // Rb = *--Pb;
++      mulhu(Rhi_ab, Ra, Rb);
++      mul(Rlo_ab, Ra, Rb);
++      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
++      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
 +
-+#ifdef ASSERT
-+  Label limit_okay;
-+  // Verify that thread stack limit is non-zero.
-+  __ bnez(t0, limit_okay);
-+  __ stop("stack overflow limit is zero");
-+  __ bind(limit_okay);
-+#endif
++      // *Pm = Rm = tmp0 * inv;
++      mul(Rm, tmp0, inv);
++      sd(Rm, Address(Pm));
 +
-+  // Add stack limit to locals.
-+  __ add(x10, x10, t0);
++      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
++      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
++      mulhu(Rhi_mn, Rm, Rn);
 +
-+  // Check against the current stack bottom.
-+  __ bgtu(sp, x10, after_frame_check);
++#ifndef PRODUCT
++      // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
++      {
++        mul(Rlo_mn, Rm, Rn);
++        add(Rlo_mn, tmp0, Rlo_mn);
++        Label ok;
++        beqz(Rlo_mn, ok);
++        stop("broken Montgomery multiply");
++        bind(ok);
++      }
++#endif
++      // We have very carefully set things up so that
++      // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
++      // the lower half of Rm * Rn because we know the result already:
++      // it must be -tmp0.  tmp0 + (-tmp0) must generate a carry iff
++      // tmp0 != 0.  So, rather than do a mul and an cad we just set
++      // the carry flag iff tmp0 is nonzero.
++      //
++      // mul(Rlo_mn, Rm, Rn);
++      // cad(zr, tmp0, Rlo_mn);
++      addi(t0, tmp0, -1);
++      sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
++      cadc(tmp0, tmp1, Rhi_mn, t0);
++      adc(tmp1, tmp2, zr, t0);
++      mv(tmp2, zr);
++    }
 +
-+  // Remove the incoming args, peeling the machine SP back to where it
-+  // was in the caller.  This is not strictly necessary, but unless we
-+  // do so the stack frame may have a garbage FP; this ensures a
-+  // correct call stack that we can always unwind.  The ANDI should be
-+  // unnecessary because the sender SP in x30 is always aligned, but
-+  // it doesn't hurt.
-+  __ andi(sp, x30, -16);
++    void pre2(Register i, Register len) {
++      block_comment("pre2");
++      // Pa = Pa_base + i-len;
++      // Pb = Pb_base + len;
++      // Pm = Pm_base + i-len;
++      // Pn = Pn_base + len;
 +
-+  // Note: the restored frame is not necessarily interpreted.
-+  // Use the shared runtime version of the StackOverflowError.
-+  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
-+  __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry()));
-+
-+  // all done with frame size check
-+  __ bind(after_frame_check);
-+}
-+
-+// Allocate monitor and lock method (asm interpreter)
-+//
-+// Args:
-+//      xmethod: Method*
-+//      xlocals: locals
-+//
-+// Kills:
-+//      x10
-+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs)
-+//      t0, t1 (temporary regs)
-+void TemplateInterpreterGenerator::lock_method() {
-+  // synchronize method
-+  const Address access_flags(xmethod, Method::access_flags_offset());
-+  const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
-+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++      sub(Rj, i, len);
++      // Rj == i-len
 +
-+#ifdef ASSERT
-+  __ lwu(x10, access_flags);
-+  __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method doesn't need synchronization", false);
-+#endif // ASSERT
++      // Ra as temp register
++      slli(Ra, Rj, LogBytesPerWord);
++      add(Pa, Pa_base, Ra);
++      add(Pm, Pm_base, Ra);
++      slli(Ra, len, LogBytesPerWord);
++      add(Pb, Pb_base, Ra);
++      add(Pn, Pn_base, Ra);
 +
-+  // get synchronization object
-+  {
-+    Label done;
-+    __ lwu(x10, access_flags);
-+    __ andi(t0, x10, JVM_ACC_STATIC);
-+    // get receiver (assume this is frequent case)
-+    __ ld(x10, Address(xlocals, Interpreter::local_offset_in_bytes(0)));
-+    __ beqz(t0, done);
-+    __ load_mirror(x10, xmethod);
++      // Ra = *++Pa;
++      // Rb = *--Pb;
++      // Rm = *++Pm;
++      // Rn = *--Pn;
++      add(Pa, Pa, wordSize);
++      ld(Ra, Address(Pa));
++      add(Pb, Pb, -wordSize);
++      ld(Rb, Address(Pb));
++      add(Pm, Pm, wordSize);
++      ld(Rm, Address(Pm));
++      add(Pn, Pn, -wordSize);
++      ld(Rn, Address(Pn));
 +
-+#ifdef ASSERT
-+    {
-+      Label L;
-+      __ bnez(x10, L);
-+      __ stop("synchronization object is NULL");
-+      __ bind(L);
++      mv(Rhi_mn, zr);
++      mv(Rlo_mn, zr);
 +    }
-+#endif // ASSERT
 +
-+    __ bind(done);
-+  }
++    void post2(Register i, Register len) {
++      block_comment("post2");
++      sub(Rj, i, len);
 +
-+  // add space for monitor & lock
-+  __ add(sp, sp, - entry_size); // add space for a monitor entry
-+  __ add(esp, esp, - entry_size);
-+  __ mv(t0, esp);
-+  __ sd(t0, monitor_block_top);  // set new monitor block top
-+  // store object
-+  __ sd(x10, Address(esp, BasicObjectLock::obj_offset_in_bytes()));
-+  __ mv(c_rarg1, esp); // object address
-+  __ lock_object(c_rarg1);
-+}
++      cad(tmp0, tmp0, Rlo_mn, t0); // The pending m*n, low part
 +
-+// Generate a fixed interpreter frame. This is identical setup for
-+// interpreted methods and for native methods hence the shared code.
-+//
-+// Args:
-+//      ra: return address
-+//      xmethod: Method*
-+//      xlocals: pointer to locals
-+//      xcpool: cp cache
-+//      stack_pointer: previous sp
-+void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
-+  // initialize fixed part of activation frame
-+  if (native_call) {
-+    __ add(esp, sp, - 14 * wordSize);
-+    __ mv(xbcp, zr);
-+    __ add(sp, sp, - 14 * wordSize);
-+    // add 2 zero-initialized slots for native calls
-+    __ sd(zr, Address(sp, 13 * wordSize));
-+    __ sd(zr, Address(sp, 12 * wordSize));
-+  } else {
-+    __ add(esp, sp, - 12 * wordSize);
-+    __ ld(t0, Address(xmethod, Method::const_offset()));     // get ConstMethod
-+    __ add(xbcp, t0, in_bytes(ConstMethod::codes_offset())); // get codebase
-+    __ add(sp, sp, - 12 * wordSize);
-+  }
-+  __ sd(xbcp, Address(sp, wordSize));
-+  __ sd(esp, Address(sp, 0));
++      // As soon as we know the least significant digit of our result,
++      // store it.
++      // Pm_base[i-len] = tmp0;
++      // Rj as temp register
++      slli(Rj, Rj, LogBytesPerWord);
++      add(Rj, Pm_base, Rj);
++      sd(tmp0, Address(Rj));
 +
-+  if (ProfileInterpreter) {
-+    Label method_data_continue;
-+    __ ld(t0, Address(xmethod, Method::method_data_offset()));
-+    __ beqz(t0, method_data_continue);
-+    __ la(t0, Address(t0, in_bytes(MethodData::data_offset())));
-+    __ bind(method_data_continue);
-+  }
++      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
++      cadc(tmp0, tmp1, Rhi_mn, t0); // The pending m*n, high part
++      adc(tmp1, tmp2, zr, t0);
++      mv(tmp2, zr);
++    }
 +
-+  __ sd(xmethod, Address(sp, 7 * wordSize));
-+  __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize));
++    // A carry in tmp0 after Montgomery multiplication means that we
++    // should subtract multiples of n from our result in m.  We'll
++    // keep doing that until there is no carry.
++    void normalize(Register len) {
++      block_comment("normalize");
++      // while (tmp0)
++      //   tmp0 = sub(Pm_base, Pn_base, tmp0, len);
++      Label loop, post, again;
++      Register cnt = tmp1, i = tmp2; // Re-use registers; we're done with them now
++      beqz(tmp0, post); {
++        bind(again); {
++          mv(i, zr);
++          mv(cnt, len);
++          slli(Rn, i, LogBytesPerWord);
++          add(Rm, Pm_base, Rn);
++          ld(Rm, Address(Rm));
++          add(Rn, Pn_base, Rn);
++          ld(Rn, Address(Rn));
++          li(t0, 1); // set carry flag, i.e. no borrow
++          align(16);
++          bind(loop); {
++            notr(Rn, Rn);
++            add(Rm, Rm, t0);
++            add(Rm, Rm, Rn);
++            sltu(t0, Rm, Rn);
++            slli(Rn, i, LogBytesPerWord); // Rn as temp register
++            add(Rn, Pm_base, Rn);
++            sd(Rm, Address(Rn));
++            add(i, i, 1);
++            slli(Rn, i, LogBytesPerWord);
++            add(Rm, Pm_base, Rn);
++            ld(Rm, Address(Rm));
++            add(Rn, Pn_base, Rn);
++            ld(Rn, Address(Rn));
++            sub(cnt, cnt, 1);
++          } bnez(cnt, loop);
++          addi(tmp0, tmp0, -1);
++          add(tmp0, tmp0, t0);
++        } bnez(tmp0, again);
++      } bind(post);
++    }
 +
-+  // Get mirror and store it in the frame as GC root for this Method*
-+#if INCLUDE_SHENANDOAHGC
-+  if (UseShenandoahGC) {
-+    __ load_mirror(x28, xmethod);
-+    __ sd(x28, Address(sp, 4 * wordSize));
-+  } else
-+#endif
-+  {
-+    __ load_mirror(t0, xmethod);
-+    __ sd(t0, Address(sp, 4 * wordSize));
-+  }
-+  __ sd(zr, Address(sp, 5 * wordSize));
++    // Move memory at s to d, reversing words.
++    //    Increments d to end of copied memory
++    //    Destroys tmp1, tmp2
++    //    Preserves len
++    //    Leaves s pointing to the address which was in d at start
++    void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
++      assert(tmp1 < x28 && tmp2 < x28, "register corruption");
 +
-+  __ load_constant_pool_cache(xcpool, xmethod);
-+  __ sd(xcpool, Address(sp, 3 * wordSize));
-+  __ sd(xlocals, Address(sp, 2 * wordSize));
++      slli(tmp1, len, LogBytesPerWord);
++      add(s, s, tmp1);
++      mv(tmp1, len);
++      unroll_2(tmp1,  &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
++      slli(tmp1, len, LogBytesPerWord);
++      sub(s, d, tmp1);
++    }
++    // [63...0] -> [31...0][63...32]
++    void reverse1(Register d, Register s, Register tmp) {
++      addi(s, s, -wordSize);
++      ld(tmp, Address(s));
++      ror_imm(tmp, tmp, 32, t0);
++      sd(tmp, Address(d));
++      addi(d, d, wordSize);
++    }
 +
-+  __ sd(ra, Address(sp, 11 * wordSize));
-+  __ sd(fp, Address(sp, 10 * wordSize));
-+  __ la(fp, Address(sp, 12 * wordSize)); // include ra & fp
++    void step_squaring() {
++      // An extra ACC
++      step();
++      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
++    }
 +
-+  // set sender sp
-+  // leave last_sp as null
-+  __ sd(x30, Address(sp, 9 * wordSize));
-+  __ sd(zr, Address(sp, 8 * wordSize));
++    void last_squaring(Register i) {
++      Label dont;
++      // if ((i & 1) == 0) {
++      andi(t0, i, 0x1);
++      bnez(t0, dont); {
++        // MACC(Ra, Rb, tmp0, tmp1, tmp2);
++        // Ra = *++Pa;
++        // Rb = *--Pb;
++        mulhu(Rhi_ab, Ra, Rb);
++        mul(Rlo_ab, Ra, Rb);
++        acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
++      } bind(dont);
++    }
 +
-+  // Move SP out of the way
-+  if (!native_call) {
-+    __ load_max_stack(t0, xmethod);
-+    __ add(t0, t0, frame::interpreter_frame_monitor_size() + 2);
-+    __ slli(t0, t0, 3);
-+    __ sub(t0, sp, t0);
-+    __ andi(sp, t0, -16);
-+  }
-+}
++    void extra_step_squaring() {
++      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
 +
-+// End of helpers
++      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
++      // Rm = *++Pm;
++      // Rn = *--Pn;
++      mulhu(Rhi_mn, Rm, Rn);
++      mul(Rlo_mn, Rm, Rn);
++      addi(Pm, Pm, wordSize);
++      ld(Rm, Address(Pm));
++      addi(Pn, Pn, -wordSize);
++      ld(Rn, Address(Pn));
++    }
 +
-+// Various method entries
-+//------------------------------------------------------------------------------------------------------------------------
-+//
-+//
++    void post1_squaring() {
++      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
 +
-+// Method entry for java.lang.ref.Reference.get.
-+address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
-+  // Code: _aload_0, _getfield, _areturn
-+  // parameter size = 1
-+  //
-+  // The code that gets generated by this routine is split into 2 parts:
-+  //    1. The "intrinsified" code for G1 (or any SATB based GC),
-+  //    2. The slow path - which is an expansion of the regular method entry.
-+  //
-+  // Notes:-
-+  // * In the G1 code we do not check whether we need to block for
-+  //   a safepoint. If G1 is enabled then we must execute the specialized
-+  //   code for Reference.get (except when the Reference object is null)
-+  //   so that we can log the value in the referent field with an SATB
-+  //   update buffer.
-+  //   If the code for the getfield template is modified so that the
-+  //   G1 pre-barrier code is executed when the current method is
-+  //   Reference.get() then going through the normal method entry
-+  //   will be fine.
-+  // * The G1 code can, however, check the receiver object (the instance
-+  //   of java.lang.Reference) and jump to the slow path if null. If the
-+  //   Reference object is null then we obviously cannot fetch the referent
-+  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
-+  //   regular method entry code to generate the NPE.
-+  //
-+  // This code is based on generate_accessor_entry.
-+  //
-+  // xmethod: Method*
-+  // x30: senderSP must preserve for slow path, set SP to it on fast path
++      // *Pm = Rm = tmp0 * inv;
++      mul(Rm, tmp0, inv);
++      sd(Rm, Address(Pm));
 +
-+  // RA is live.  It must be saved around calls.
++      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
++      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
++      mulhu(Rhi_mn, Rm, Rn);
 +
-+  address entry = __ pc();
++#ifndef PRODUCT
++      // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
++      {
++        mul(Rlo_mn, Rm, Rn);
++        add(Rlo_mn, tmp0, Rlo_mn);
++        Label ok;
++        beqz(Rlo_mn, ok); {
++          stop("broken Montgomery multiply");
++        } bind(ok);
++      }
++#endif
++      // We have very carefully set things up so that
++      // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
++      // the lower half of Rm * Rn because we know the result already:
++      // it must be -tmp0.  tmp0 + (-tmp0) must generate a carry iff
++      // tmp0 != 0.  So, rather than do a mul and a cad we just set
++      // the carry flag iff tmp0 is nonzero.
++      //
++      // mul(Rlo_mn, Rm, Rn);
++      // cad(zr, tmp, Rlo_mn);
++      addi(t0, tmp0, -1);
++      sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
++      cadc(tmp0, tmp1, Rhi_mn, t0);
++      adc(tmp1, tmp2, zr, t0);
++      mv(tmp2, zr);
++    }
 +
-+  const int referent_offset = java_lang_ref_Reference::referent_offset;
-+  guarantee(referent_offset > 0, "referent offset not initialized");
++    // use t0 as carry
++    void acc(Register Rhi, Register Rlo,
++             Register tmp0, Register tmp1, Register tmp2) {
++      cad(tmp0, tmp0, Rlo, t0);
++      cadc(tmp1, tmp1, Rhi, t0);
++      adc(tmp2, tmp2, zr, t0);
++    }
 +
-+  Label slow_path;
-+  const Register local_0 = c_rarg0;
-+  // Check if local 0 != NULL
-+  // If the receiver is null then it is OK to jump to the slow path.
-+  __ ld(local_0, Address(esp, 0));
-+  __ beqz(local_0, slow_path);
++  public:
++    /**
++     * Fast Montgomery multiplication.  The derivation of the
++     * algorithm is in A Cryptographic Library for the Motorola
++     * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
++     *
++     * Arguments:
++     *
++     * Inputs for multiplication:
++     *   c_rarg0   - int array elements a
++     *   c_rarg1   - int array elements b
++     *   c_rarg2   - int array elements n (the modulus)
++     *   c_rarg3   - int length
++     *   c_rarg4   - int inv
++     *   c_rarg5   - int array elements m (the result)
++     *
++     * Inputs for squaring:
++     *   c_rarg0   - int array elements a
++     *   c_rarg1   - int array elements n (the modulus)
++     *   c_rarg2   - int length
++     *   c_rarg3   - int inv
++     *   c_rarg4   - int array elements m (the result)
++     *
++     */
++    address generate_multiply() {
++      Label argh, nothing;
++      bind(argh);
++      stop("MontgomeryMultiply total_allocation must be <= 8192");
 +
-+  __ mv(x9, x30);   // Move senderSP to a callee-saved register
++      align(CodeEntryAlignment);
++      address entry = pc();
 +
-+  // Load the value of the referent field.
-+  const Address field_address(local_0, referent_offset);
-+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ t1, /*tmp2*/ t0);
++      beqz(Rlen, nothing);
 +
-+  // areturn
-+  __ andi(sp, x9, -16);  // done with stack
-+  __ ret();
++      enter();
 +
-+  // generate a vanilla interpreter entry as the slow path
-+  __ bind(slow_path);
-+  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
-+  return entry;
-+}
++      // Make room.
++      li(Ra, 512);
++      bgt(Rlen, Ra, argh);
++      slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
++      sub(Ra, sp, Ra);
++      andi(sp, Ra, -2 * wordSize);
 +
-+/**
-+ * Method entry for static native methods:
-+ *   int java.util.zip.CRC32.update(int crc, int b)
-+ */
-+address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
-+  // TODO: Unimplemented generate_CRC32_update_entry
-+  return 0;
-+}
++      srliw(Rlen, Rlen, 1);  // length in longwords = len/2
 +
-+/**
-+ * Method entry for static native methods:
-+ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
-+ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
-+ */
-+address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
-+  // TODO: Unimplemented generate_CRC32_updateBytes_entry
-+  return 0;
-+}
++      {
++        // Copy input args, reversing as we go.  We use Ra as a
++        // temporary variable.
++        reverse(Ra, Pa_base, Rlen, Ri, Rj);
++        if (!_squaring)
++          reverse(Ra, Pb_base, Rlen, Ri, Rj);
++        reverse(Ra, Pn_base, Rlen, Ri, Rj);
++      }
 +
-+/**
-+ * Method entry for intrinsic-candidate (non-native) methods:
-+ *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
-+ *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
-+ * Unlike CRC32, CRC32C does not have any methods marked as native
-+ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses
-+ */
-+address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
-+  // TODO: Unimplemented generate_CRC32C_updateBytes_entry
-+  return 0;
-+}
++      // Push all call-saved registers and also Pm_base which we'll need
++      // at the end.
++      save_regs();
 +
-+void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
-+  // Bang each page in the shadow zone. We can't assume it's been done for
-+  // an interpreter frame with greater than a page of locals, so each page
-+  // needs to be checked.  Only true for non-native.
-+  if (UseStackBanging) {
-+    const int n_shadow_pages = checked_cast<int>(JavaThread::stack_shadow_zone_size()) / os::vm_page_size();
-+    const int start_page = native_call ? n_shadow_pages : 1;
-+    const int page_size = os::vm_page_size();
-+    for (int pages = start_page; pages <= n_shadow_pages ; pages++) {
-+      __ sub(t1, sp, pages * page_size);
-+      __ sd(zr, Address(t1));
-+    }
-+  }
-+}
++#ifndef PRODUCT
++      // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++      {
++        ld(Rn, Address(Pn_base));
++        mul(Rlo_mn, Rn, inv);
++        li(t0, -1);
++        Label ok;
++        beq(Rlo_mn, t0, ok);
++        stop("broken inverse in Montgomery multiply");
++        bind(ok);
++      }
++#endif
 +
-+// Interpreter stub for calling a native method. (asm interpreter)
-+// This sets up a somewhat different looking stack for calling the
-+// native method than the typical interpreter frame setup.
-+address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
-+  // determine code generation flags
-+  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++      mv(Pm_base, Ra);
 +
-+  // x11: Method*
-+  // x30: sender sp
++      mv(tmp0, zr);
++      mv(tmp1, zr);
++      mv(tmp2, zr);
 +
-+  address entry_point = __ pc();
++      block_comment("for (int i = 0; i < len; i++) {");
++      mv(Ri, zr); {
++        Label loop, end;
++        bge(Ri, Rlen, end);
 +
-+  const Address constMethod       (xmethod, Method::const_offset());
-+  const Address access_flags      (xmethod, Method::access_flags_offset());
-+  const Address size_of_parameters(x12, ConstMethod::
-+                                   size_of_parameters_offset());
++        bind(loop);
++        pre1(Ri);
 +
-+  // get parameter size (always needed)
-+  __ ld(x12, constMethod);
-+  __ load_unsigned_short(x12, size_of_parameters);
++        block_comment("  for (j = i; j; j--) {"); {
++          mv(Rj, Ri);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
++        } block_comment("  } // j");
 +
-+  // Native calls don't need the stack size check since they have no
-+  // expression stack and the arguments are already on the stack and
-+  // we only add a handful of words to the stack.
++        post1();
++        addw(Ri, Ri, 1);
++        blt(Ri, Rlen, loop);
++        bind(end);
++        block_comment("} // i");
++      }
 +
-+  // xmethod: Method*
-+  // x12: size of parameters
-+  // x30: sender sp
++      block_comment("for (int i = len; i < 2*len; i++) {");
++      mv(Ri, Rlen); {
++        Label loop, end;
++        slli(t0, Rlen, 1);
++        bge(Ri, t0, end);
 +
-+  // for natives the size of locals is zero
++        bind(loop);
++        pre2(Ri, Rlen);
 +
-+  // compute beginning of parameters (xlocals)
-+  __ shadd(xlocals, x12, esp, xlocals, 3);
-+  __ addi(xlocals, xlocals, -wordSize);
++        block_comment("  for (j = len*2-i-1; j; j--) {"); {
++          slliw(Rj, Rlen, 1);
++          subw(Rj, Rj, Ri);
++          subw(Rj, Rj, 1);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
++        } block_comment("  } // j");
 +
-+  // Pull SP back to minimum size: this avoids holes in the stack
-+  __ andi(sp, esp, -16);
++        post2(Ri, Rlen);
++        addw(Ri, Ri, 1);
++        slli(t0, Rlen, 1);
++        blt(Ri, t0, loop);
++        bind(end);
++      }
++      block_comment("} // i");
 +
-+  // initialize fixed part of activation frame
-+  generate_fixed_frame(true);
++      normalize(Rlen);
 +
-+  // make sure method is native & not abstract
-+#ifdef ASSERT
-+  __ lwu(x10, access_flags);
-+  __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute non-native method as native", false);
-+  __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter");
-+#endif
++      mv(Ra, Pm_base);  // Save Pm_base in Ra
++      restore_regs();  // Restore caller's Pm_base
 +
-+  // Since at this point in the method invocation the exception
-+  // handler would try to exit the monitor of synchronized methods
-+  // which hasn't been entered yet, we set the thread local variable
-+  // _do_not_unlock_if_synchronized to true. The remove_activation
-+  // will check this flag.
++      // Copy our result into caller's Pm_base
++      reverse(Pm_base, Ra, Rlen, Ri, Rj);
 +
-+  const Address do_not_unlock_if_synchronized(xthread,
-+                                              in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
-+  __ mv(t1, true);
-+  __ sb(t1, do_not_unlock_if_synchronized);
++      leave();
++      bind(nothing);
++      ret();
 +
-+  // increment invocation count & check for overflow
-+  Label invocation_counter_overflow;
-+  if (inc_counter) {
-+    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
-+  }
++      return entry;
++    }
 +
-+  Label continue_after_compile;
-+  __ bind(continue_after_compile);
++    /**
++     *
++     * Arguments:
++     *
++     * Inputs:
++     *   c_rarg0   - int array elements a
++     *   c_rarg1   - int array elements n (the modulus)
++     *   c_rarg2   - int length
++     *   c_rarg3   - int inv
++     *   c_rarg4   - int array elements m (the result)
++     *
++     */
++    address generate_square() {
++      Label argh;
++      bind(argh);
++      stop("MontgomeryMultiply total_allocation must be <= 8192");
 +
-+  bang_stack_shadow_pages(true);
++      align(CodeEntryAlignment);
++      address entry = pc();
 +
-+  // reset the _do_not_unlock_if_synchronized flag
-+  __ sb(zr, do_not_unlock_if_synchronized);
++      enter();
 +
-+  // check for synchronized methods
-+  // Must happen AFTER invocation_counter check and stack overflow check,
-+  // so method is not locked if overflows.
-+  if (synchronized) {
-+    lock_method();
-+  } else {
-+    // no synchronization necessary
-+#ifdef ASSERT
-+    __ lwu(x10, access_flags);
-+    __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization");
-+#endif
-+  }
++      // Make room.
++      li(Ra, 512);
++      bgt(Rlen, Ra, argh);
++      slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
++      sub(Ra, sp, Ra);
++      andi(sp, Ra, -2 * wordSize);
 +
-+  // start execution
-+#ifdef ASSERT
-+  __ verify_frame_setup();
-+#endif
++      srliw(Rlen, Rlen, 1);  // length in longwords = len/2
 +
-+  // jvmti support
-+  __ notify_method_entry();
++      {
++        // Copy input args, reversing as we go.  We use Ra as a
++        // temporary variable.
++        reverse(Ra, Pa_base, Rlen, Ri, Rj);
++        reverse(Ra, Pn_base, Rlen, Ri, Rj);
++      }
 +
-+  // work registers
-+  const Register t = x18;
-+  const Register result_handler = x19;
++      // Push all call-saved registers and also Pm_base which we'll need
++      // at the end.
++      save_regs();
 +
-+  // allocate space for parameters
-+  __ ld(t, Address(xmethod, Method::const_offset()));
-+  __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset()));
++      mv(Pm_base, Ra);
 +
-+  __ slli(t, t, Interpreter::logStackElementSize);
-+  __ sub(x30, esp, t);
-+  __ andi(sp, x30, -16);
-+  __ mv(esp, x30);
++      mv(tmp0, zr);
++      mv(tmp1, zr);
++      mv(tmp2, zr);
 +
-+  // get signature handler
-+  {
-+    Label L;
-+    __ ld(t, Address(xmethod, Method::signature_handler_offset()));
-+    __ bnez(t, L);
-+    __ call_VM(noreg,
-+               CAST_FROM_FN_PTR(address,
-+                                InterpreterRuntime::prepare_native_call),
-+               xmethod);
-+    __ ld(t, Address(xmethod, Method::signature_handler_offset()));
-+    __ bind(L);
-+  }
++      block_comment("for (int i = 0; i < len; i++) {");
++      mv(Ri, zr); {
++        Label loop, end;
++        bind(loop);
++        bge(Ri, Rlen, end);
 +
-+  // call signature handler
-+  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == xlocals,
-+         "adjust this code");
-+  assert(InterpreterRuntime::SignatureHandlerGenerator::to() == sp,
-+         "adjust this code");
-+  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t0,
-+          "adjust this code");
++        pre1(Ri);
 +
-+  // The generated handlers do not touch xmethod (the method).
-+  // However, large signatures cannot be cached and are generated
-+  // each time here.  The slow-path generator can do a GC on return,
-+  // so we must reload it after the call.
-+  __ jalr(t);
-+  __ get_method(xmethod);        // slow path can do a GC, reload xmethod
++        block_comment("for (j = (i+1)/2; j; j--) {"); {
++          addi(Rj, Ri, 1);
++          srliw(Rj, Rj, 1);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
++        } block_comment("  } // j");
 +
++        last_squaring(Ri);
 +
-+  // result handler is in x10
-+  // set result handler
-+  __ mv(result_handler, x10);
-+  // pass mirror handle if static call
-+  {
-+    Label L;
-+    __ lwu(t, Address(xmethod, Method::access_flags_offset()));
-+    __ andi(t0, t, JVM_ACC_STATIC);
-+    __ beqz(t0, L);
-+    // get mirror
-+    __ load_mirror(t, xmethod);
-+    // copy mirror into activation frame
-+    __ sd(t, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
-+    // pass handle to mirror
-+    __ addi(c_rarg1, fp, frame::interpreter_frame_oop_temp_offset * wordSize);
-+    __ bind(L);
-+  }
++        block_comment("  for (j = i/2; j; j--) {"); {
++          srliw(Rj, Ri, 1);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
++        } block_comment("  } // j");
 +
-+  // get native function entry point in x28
-+  {
-+    Label L;
-+    __ ld(x28, Address(xmethod, Method::native_function_offset()));
-+    address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
-+    __ mv(t1, unsatisfied);
-+    __ ld(t1, t1);
-+    __ bne(x28, t1, L);
-+    __ call_VM(noreg,
-+               CAST_FROM_FN_PTR(address,
-+                                InterpreterRuntime::prepare_native_call),
-+               xmethod);
-+    __ get_method(xmethod);
-+    __ ld(x28, Address(xmethod, Method::native_function_offset()));
-+    __ bind(L);
-+  }
++        post1_squaring();
++        addi(Ri, Ri, 1);
++        blt(Ri, Rlen, loop);
 +
-+  // pass JNIEnv
-+  __ add(c_rarg0, xthread, in_bytes(JavaThread::jni_environment_offset()));
++        bind(end);
++        block_comment("} // i");
++      }
 +
-+  // It is enough that the pc() points into the right code
-+  // segment. It does not have to be the correct return pc.
-+  Label native_return;
-+  __ set_last_Java_frame(esp, fp, native_return, x30);
++      block_comment("for (int i = len; i < 2*len; i++) {");
++      mv(Ri, Rlen); {
++        Label loop, end;
++        bind(loop);
++        slli(t0, Rlen, 1);
++        bge(Ri, t0, end);
 +
-+  // change thread state
-+#ifdef ASSERT
-+  {
-+    Label L;
-+    __ lwu(t, Address(xthread, JavaThread::thread_state_offset()));
-+    __ addi(t0, zr, (u1)_thread_in_Java);
-+    __ beq(t, t0, L);
-+    __ stop("Wrong thread state in native stub");
-+    __ bind(L);
-+  }
-+#endif
++        pre2(Ri, Rlen);
 +
-+  // Change state to native
-+  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
-+  __ mv(t0, _thread_in_native);
-+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+  __ sw(t0, Address(t1));
++        block_comment("  for (j = (2*len-i-1)/2; j; j--) {"); {
++          slli(Rj, Rlen, 1);
++          sub(Rj, Rj, Ri);
++          sub(Rj, Rj, 1);
++          srliw(Rj, Rj, 1);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
++        } block_comment("  } // j");
 +
-+  // Call the native method.
-+  __ jalr(x28);
-+  __ bind(native_return);
-+  __ get_method(xmethod);
-+  // result potentially in x10 or f10
++        last_squaring(Ri);
 +
-+  // make room for the pushes we're about to do
-+  __ sub(t0, esp, 4 * wordSize);
-+  __ andi(sp, t0, -16);
++        block_comment("  for (j = (2*len-i)/2; j; j--) {"); {
++          slli(Rj, Rlen, 1);
++          sub(Rj, Rj, Ri);
++          srliw(Rj, Rj, 1);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
++        } block_comment("  } // j");
 +
-+  // NOTE: The order of these pushes is known to frame::interpreter_frame_result
-+  // in order to extract the result of a method call. If the order of these
-+  // pushes change or anything else is added to the stack then the code in
-+  // interpreter_frame_result must also change.
-+  __ push(dtos);
-+  __ push(ltos);
++        post2(Ri, Rlen);
++        addi(Ri, Ri, 1);
++        slli(t0, Rlen, 1);
++        blt(Ri, t0, loop);
 +
-+  // change thread state
-+  // Force all preceding writes to be observed prior to thread state change
-+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++        bind(end);
++        block_comment("} // i");
++      }
 +
-+  __ mv(t0, _thread_in_native_trans);
-+  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
++      normalize(Rlen);
 +
-+  if (os::is_MP()) {
-+    if (UseMembar) {
-+      // Force this write out before the read below
-+      __ membar(MacroAssembler::AnyAny);
-+    } else {
-+      // Write serialization page so VM thread can do a pseudo remote membar.
-+      // We use the current thread pointer to calculate a thread specific
-+      // offset to write to within the page. This minimizes bus traffic
-+      // due to cache line collision.
-+      __ serialize_memory(xthread, t0, t1);
++      mv(Ra, Pm_base);  // Save Pm_base in Ra
++      restore_regs();  // Restore caller's Pm_base
++
++      // Copy our result into caller's Pm_base
++      reverse(Pm_base, Ra, Rlen, Ri, Rj);
++
++      leave();
++      ret();
++
++      return entry;
 +    }
-+  }
++  };
++#endif // COMPILER2
 +
-+  // check for safepoint operation in progress and/or pending suspend requests
-+  {
-+    Label L, Continue;
-+    __ safepoint_poll_acquire(L);
-+    __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset()));
-+    __ beqz(t1, Continue);
-+    __ bind(L);
++  // Continuation point for throwing of implicit exceptions that are
++  // not handled in the current activation. Fabricates an exception
++  // oop and initiates normal exception dispatching in this
++  // frame. Since we need to preserve callee-saved values (currently
++  // only for C2, but done for C1 as well) we need a callee-saved oop
++  // map and therefore have to make these stubs into RuntimeStubs
++  // rather than BufferBlobs.  If the compiler needs all registers to
++  // be preserved between the fault point and the exception handler
++  // then it must assume responsibility for that in
++  // AbstractCompiler::continuation_for_implicit_null_exception or
++  // continuation_for_implicit_division_by_zero_exception. All other
++  // implicit exceptions (e.g., NullPointerException or
++  // AbstractMethodError on entry) are either at call sites or
++  // otherwise assume that stack unwinding will be initiated, so
++  // caller saved registers were assumed volatile in the compiler.
 +
-+    // Don't use call_VM as it will see a possible pending exception
-+    // and forward it and never return here preventing us from
-+    // clearing _last_native_pc down below. So we do a runtime call by
-+    // hand.
-+    //
-+    __ mv(c_rarg0, xthread);
-+    __ mv(t1, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
-+    __ jalr(t1);
-+    __ get_method(xmethod);
-+    __ reinit_heapbase();
-+    __ bind(Continue);
-+  }
++#undef __
++#define __ masm->
 +
-+  // change thread state
-+  // Force all preceding writes to be observed prior to thread state change
-+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++  address generate_throw_exception(const char* name,
++                                   address runtime_entry,
++                                   Register arg1 = noreg,
++                                   Register arg2 = noreg) {
++    // Information about frame layout at time of blocking runtime call.
++    // Note that we only have to preserve callee-saved registers since
++    // the compilers are responsible for supplying a continuation point
++    // if they expect all registers to be preserved.
++    // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0
++    assert_cond(runtime_entry != NULL);
++    enum layout {
++      fp_off = 0,
++      fp_off2,
++      return_off,
++      return_off2,
++      framesize // inclusive of return address
++    };
 +
-+  __ mv(t0, _thread_in_Java);
-+  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
++    const int insts_size = 512;
++    const int locs_size  = 64;
 +
-+  // reset_last_Java_frame
-+  __ reset_last_Java_frame(true);
++    CodeBuffer code(name, insts_size, locs_size);
++    OopMapSet* oop_maps  = new OopMapSet();
++    MacroAssembler* masm = new MacroAssembler(&code);
++    assert_cond(oop_maps != NULL && masm != NULL);
 +
-+  if (CheckJNICalls) {
-+    // clear_pending_jni_exception_check
-+    __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
-+  }
++    address start = __ pc();
 +
-+  // reset handle block
-+  __ ld(t, Address(xthread, JavaThread::active_handles_offset()));
-+  __ sd(zr, Address(t, JNIHandleBlock::top_offset_in_bytes()));
++    // This is an inlined and slightly modified version of call_VM
++    // which has the ability to fetch the return PC out of
++    // thread-local storage and also sets up last_Java_sp slightly
++    // differently than the real call_VM
 +
-+  // If result is an oop unbox and store it in frame where gc will see it
-+  // and result handler will pick it up
++    __ enter(); // Save FP and RA before call
 +
-+  {
-+    Label no_oop, not_weak, store_result;
-+    __ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
-+    __ bne(t, result_handler, no_oop);
-+    // Unbox oop result, e.g. JNIHandles::resolve result.
-+    __ pop(ltos);
-+    __ resolve_jobject(x10, xthread, t);
-+    __ sd(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
-+    // keep stack depth as expected by pushing oop which will eventually be discarded
-+    __ push(ltos);
-+    __ bind(no_oop);
-+  }
++    assert(is_even(framesize / 2), "sp not 16-byte aligned");
 +
-+  {
-+    Label no_reguard;
-+    __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset())));
-+    __ addi(t1, zr, JavaThread::stack_guard_yellow_reserved_disabled);
-+    __ bne(t0, t1, no_reguard);
++    // ra and fp are already in place
++    __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog
 +
-+    __ push_call_clobbered_registers();
++    int frame_complete = __ pc() - start;
++
++    // Set up last_Java_sp and last_Java_fp
++    address the_pc = __ pc();
++    __ set_last_Java_frame(sp, fp, the_pc, t0);
++
++    // Call runtime
++    if (arg1 != noreg) {
++      assert(arg2 != c_rarg1, "clobbered");
++      __ mv(c_rarg1, arg1);
++    }
++    if (arg2 != noreg) {
++      __ mv(c_rarg2, arg2);
++    }
 +    __ mv(c_rarg0, xthread);
-+    __ mv(t1, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
-+    __ jalr(t1);
-+    __ pop_call_clobbered_registers();
-+    __ bind(no_reguard);
-+  }
++    BLOCK_COMMENT("call runtime_entry");
++    int32_t offset = 0;
++    __ movptr_with_offset(t0, runtime_entry, offset);
++    __ jalr(x1, t0, offset);
 +
-+  // The method register is junk from after the thread_in_native transition
-+  // until here.  Also can't call_VM until the bcp has been
-+  // restored.  Need bcp for throwing exception below so get it now.
-+  __ get_method(xmethod);
++    // Generate oop map
++    OopMap* map = new OopMap(framesize, 0);
++    assert_cond(map != NULL);
 +
-+  // restore bcp to have legal interpreter frame, i.e., bci == 0 <=>
-+  // xbcp == code_base()
-+  __ ld(xbcp, Address(xmethod, Method::const_offset()));   // get ConstMethod*
-+  __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));          // get codebase
-+  // handle exceptions (exception handling will handle unlocking!)
-+  {
++    oop_maps->add_gc_map(the_pc - start, map);
++
++    __ reset_last_Java_frame(true);
++
++    __ leave();
++
++    // check for pending exceptions
++#ifdef ASSERT
 +    Label L;
 +    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+    __ beqz(t0, L);
-+    // Note: At some point we may want to unify this with the code
-+    // used in call_VM_base(); i.e., we should use the
-+    // StubRoutines::forward_exception code. For now this doesn't work
-+    // here because the sp is not correctly set at this point.
-+    __ MacroAssembler::call_VM(noreg,
-+                               CAST_FROM_FN_PTR(address,
-+                               InterpreterRuntime::throw_pending_exception));
++    __ bnez(t0, L);
 +    __ should_not_reach_here();
 +    __ bind(L);
-+  }
-+
-+  // do unlocking if necessary
-+  {
-+    Label L;
-+    __ lwu(t, Address(xmethod, Method::access_flags_offset()));
-+    __ andi(t0, t, JVM_ACC_SYNCHRONIZED);
-+    __ beqz(t0, L);
-+    // the code below should be shared with interpreter macro
-+    // assembler implementation
-+    {
-+      Label unlock;
-+      // BasicObjectLock will be first in list, since this is a
-+      // synchronized method. However, need to check that the object
-+      // has not been unlocked by an explicit monitorexit bytecode.
-+
-+      // monitor expect in c_rarg1 for slow unlock path
-+      __ la(c_rarg1, Address(fp,   // address of first monitor
-+                             (intptr_t)(frame::interpreter_frame_initial_sp_offset *
-+                                        wordSize - sizeof(BasicObjectLock))));
-+
-+      __ ld(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
-+      __ bnez(t, unlock);
++#endif // ASSERT
++    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 +
-+      // Entry already unlocked, need to throw exception
-+      __ MacroAssembler::call_VM(noreg,
-+                                 CAST_FROM_FN_PTR(address,
-+                                                  InterpreterRuntime::throw_illegal_monitor_state_exception));
-+      __ should_not_reach_here();
 +
-+      __ bind(unlock);
-+      __ unlock_object(c_rarg1);
-+    }
-+    __ bind(L);
++    // codeBlob framesize is in words (not VMRegImpl::slot_size)
++    RuntimeStub* stub =
++      RuntimeStub::new_runtime_stub(name,
++                                    &code,
++                                    frame_complete,
++                                    (framesize >> (LogBytesPerWord - LogBytesPerInt)),
++                                    oop_maps, false);
++    assert(stub != NULL, "create runtime stub fail!");
++    return stub->entry_point();
 +  }
 +
-+  // jvmti support
-+  // Note: This must happen _after_ handling/throwing any exceptions since
-+  //       the exception handler code notifies the runtime of method exits
-+  //       too. If this happens before, method entry/exit notifications are
-+  //       not properly paired (was bug - gri 11/22/99).
-+  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
-+
-+  __ pop(ltos);
-+  __ pop(dtos);
++  // Initialization
++  void generate_initial() {
++    // Generate initial stubs and initializes the entry points
 +
-+  __ jalr(result_handler);
++    // entry points that exist in all platforms Note: This is code
++    // that could be shared among different platforms - however the
++    // benefit seems to be smaller than the disadvantage of having a
++    // much more complicated generator structure. See also comment in
++    // stubRoutines.hpp.
 +
-+  // remove activation
-+  __ ld(esp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
-+  // remove frame anchor
-+  __ leave();
++    StubRoutines::_forward_exception_entry = generate_forward_exception();
 +
-+  // restore sender sp
-+  __ mv(sp, esp);
++    StubRoutines::_call_stub_entry =
++      generate_call_stub(StubRoutines::_call_stub_return_address);
 +
-+  __ ret();
++    // is referenced by megamorphic call
++    StubRoutines::_catch_exception_entry = generate_catch_exception();
 +
-+  if (inc_counter) {
-+    // Handle overflow of counter and compile method
-+    __ bind(invocation_counter_overflow);
-+    generate_counter_overflow(continue_after_compile);
++    // Build this early so it's available for the interpreter.
++    StubRoutines::_throw_StackOverflowError_entry =
++      generate_throw_exception("StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address,
++                                                SharedRuntime::throw_StackOverflowError));
++    StubRoutines::_throw_delayed_StackOverflowError_entry =
++      generate_throw_exception("delayed StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address,
++                                                SharedRuntime::throw_delayed_StackOverflowError));
++    // Safefetch stubs.
++    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
++                                                       &StubRoutines::_safefetch32_fault_pc,
++                                                       &StubRoutines::_safefetch32_continuation_pc);
++    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
++                                                       &StubRoutines::_safefetchN_fault_pc,
++                                                       &StubRoutines::_safefetchN_continuation_pc);
 +  }
 +
-+  return entry_point;
-+}
++  void generate_all() {
++    // support for verify_oop (must happen after universe_init)
++    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
++    StubRoutines::_throw_AbstractMethodError_entry =
++      generate_throw_exception("AbstractMethodError throw_exception",
++                               CAST_FROM_FN_PTR(address,
++                                                SharedRuntime::
++                                                throw_AbstractMethodError));
 +
-+//
-+// Generic interpreted method entry to (asm) interpreter
-+//
-+address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
++    StubRoutines::_throw_IncompatibleClassChangeError_entry =
++      generate_throw_exception("IncompatibleClassChangeError throw_exception",
++                               CAST_FROM_FN_PTR(address,
++                                                SharedRuntime::
++                                                throw_IncompatibleClassChangeError));
 +
-+  // determine code generation flags
-+  const bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++    StubRoutines::_throw_NullPointerException_at_call_entry =
++      generate_throw_exception("NullPointerException at call throw_exception",
++                               CAST_FROM_FN_PTR(address,
++                                                SharedRuntime::
++                                                throw_NullPointerException_at_call));
++    // arraycopy stubs used by compilers
++    generate_arraycopy_stubs();
 +
-+  // t0: sender sp
-+  address entry_point = __ pc();
++#ifdef COMPILER2
++    if (UseMulAddIntrinsic) {
++      StubRoutines::_mulAdd = generate_mulAdd();
++    }
 +
-+  const Address constMethod(xmethod, Method::const_offset());
-+  const Address access_flags(xmethod, Method::access_flags_offset());
-+  const Address size_of_parameters(x13,
-+                                   ConstMethod::size_of_parameters_offset());
-+  const Address size_of_locals(x13, ConstMethod::size_of_locals_offset());
++    if (UseMultiplyToLenIntrinsic) {
++      StubRoutines::_multiplyToLen = generate_multiplyToLen();
++    }
 +
-+  // get parameter size (always needed)
-+  // need to load the const method first
-+  __ ld(x13, constMethod);
-+  __ load_unsigned_short(x12, size_of_parameters);
++    if (UseSquareToLenIntrinsic) {
++      StubRoutines::_squareToLen = generate_squareToLen();
++    }
 +
-+  // x12: size of parameters
++    if (UseMontgomeryMultiplyIntrinsic) {
++      StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
++      MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
++      StubRoutines::_montgomeryMultiply = g.generate_multiply();
++    }
 +
-+  __ load_unsigned_short(x13, size_of_locals); // get size of locals in words
-+  __ sub(x13, x13, x12); // x13 = no. of additional locals
++    if (UseMontgomerySquareIntrinsic) {
++      StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
++      MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
++      StubRoutines::_montgomerySquare = g.generate_square();
++    }
 +
-+  // see if we've got enough room on the stack for locals plus overhead.
-+  generate_stack_overflow_check();
++    if (UseRVVForBigIntegerShiftIntrinsics) {
++      StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
++      StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
++    }
++#endif
 +
-+  // compute beginning of parameters (xlocals)
-+  __ shadd(xlocals, x12, esp, t1, 3);
-+  __ add(xlocals, xlocals, -wordSize);
++    generate_compare_long_strings();
 +
-+  // Make room for additional locals
-+  __ slli(t1, x13, 3);
-+  __ sub(t0, esp, t1);
++    generate_string_indexof_stubs();
 +
-+  // Padding between locals and fixed part of activation frame to ensure
-+  // SP is always 16-byte aligned.
-+  __ andi(sp, t0, -16);
++    BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
++    if (bs_nm != NULL) {
++      StubRoutines::riscv::_method_entry_barrier = generate_method_entry_barrier();
++    }
 +
-+  // x13 - # of additional locals
-+  // allocate space for locals
-+  // explicitly initialize locals
-+  {
-+    Label exit, loop;
-+    __ blez(x13, exit); // do nothing if x13 <= 0
-+    __ bind(loop);
-+    __ sd(zr, Address(t0));
-+    __ add(t0, t0, wordSize);
-+    __ add(x13, x13, -1); // until everything initialized
-+    __ bnez(x13, loop);
-+    __ bind(exit);
++    StubRoutines::riscv::set_completed();
 +  }
 +
-+  // And the base dispatch table
-+  __ get_dispatch();
-+
-+  // initialize fixed part of activation frame
-+  generate_fixed_frame(false);
-+
-+  // make sure method is not native & not abstract
-+#ifdef ASSERT
-+  __ lwu(x10, access_flags);
-+  __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute native method as non-native");
-+  __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter");
-+#endif
-+
-+  // Since at this point in the method invocation the exception
-+  // handler would try to exit the monitor of synchronized methods
-+  // which hasn't been entered yet, we set the thread local variable
-+  // _do_not_unlock_if_synchronized to true. The remove_activation
-+  // will check this flag.
-+
-+  const Address do_not_unlock_if_synchronized(xthread,
-+                                              in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
-+  __ mv(t1, true);
-+  __ sb(t1, do_not_unlock_if_synchronized);
++ public:
++  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
++    if (all) {
++      generate_all();
++    } else {
++      generate_initial();
++    }
++  }
 +
-+  Label no_mdp;
-+  const Register mdp = x13;
-+  __ ld(mdp, Address(xmethod, Method::method_data_offset()));
-+  __ beqz(mdp, no_mdp);
-+  __ add(mdp, mdp, in_bytes(MethodData::data_offset()));
-+  __ profile_parameters_type(mdp, x11, x12, x14); // use x11, x12, x14 as tmp registers
-+  __ bind(no_mdp);
++  ~StubGenerator() {}
++}; // end class declaration
 +
-+  // increment invocation count & check for overflow
-+  Label invocation_counter_overflow;
-+  Label profile_method;
-+  Label profile_method_continue;
-+  if (inc_counter) {
-+    generate_counter_incr(&invocation_counter_overflow,
-+                          &profile_method,
-+                          &profile_method_continue);
-+    if (ProfileInterpreter) {
-+      __ bind(profile_method_continue);
-+    }
++#define UCM_TABLE_MAX_ENTRIES 8
++void StubGenerator_generate(CodeBuffer* code, bool all) {
++  if (UnsafeCopyMemory::_table == NULL) {
++    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
 +  }
 +
-+  Label continue_after_compile;
-+  __ bind(continue_after_compile);
++  StubGenerator g(code, all);
++}
+diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
+new file mode 100644
+index 00000000000..395a2d338e4
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
+@@ -0,0 +1,58 @@
++/*
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  bang_stack_shadow_pages(false);
++#include "precompiled.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "utilities/globalDefinitions.hpp"
 +
-+  // reset the _do_not_unlock_if_synchronized flag
-+  __ sb(zr, do_not_unlock_if_synchronized);
++// Implementation of the platform-specific part of StubRoutines - for
++// a description of how to extend it, see the stubRoutines.hpp file.
 +
-+  // check for synchronized methods
-+  // Must happen AFTER invocation_counter check and stack overflow check,
-+  // so method is not locked if overflows.
-+  if (synchronized) {
-+    // Allocate monitor and lock method
-+    lock_method();
-+  } else {
-+    // no synchronization necessary
-+#ifdef ASSERT
-+    __ lwu(x10, access_flags);
-+    __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization");
-+#endif
-+  }
++address StubRoutines::riscv::_get_previous_sp_entry = NULL;
 +
-+  // start execution
-+#ifdef ASSERT
-+  __ verify_frame_setup();
-+#endif
++address StubRoutines::riscv::_f2i_fixup = NULL;
++address StubRoutines::riscv::_f2l_fixup = NULL;
++address StubRoutines::riscv::_d2i_fixup = NULL;
++address StubRoutines::riscv::_d2l_fixup = NULL;
++address StubRoutines::riscv::_float_sign_mask = NULL;
++address StubRoutines::riscv::_float_sign_flip = NULL;
++address StubRoutines::riscv::_double_sign_mask = NULL;
++address StubRoutines::riscv::_double_sign_flip = NULL;
++address StubRoutines::riscv::_zero_blocks = NULL;
++address StubRoutines::riscv::_compare_long_string_LL = NULL;
++address StubRoutines::riscv::_compare_long_string_UU = NULL;
++address StubRoutines::riscv::_compare_long_string_LU = NULL;
++address StubRoutines::riscv::_compare_long_string_UL = NULL;
++address StubRoutines::riscv::_string_indexof_linear_ll = NULL;
++address StubRoutines::riscv::_string_indexof_linear_uu = NULL;
++address StubRoutines::riscv::_string_indexof_linear_ul = NULL;
++address StubRoutines::riscv::_large_byte_array_inflate = NULL;
++address StubRoutines::riscv::_method_entry_barrier = NULL;
 +
-+  // jvmti support
-+  __ notify_method_entry();
++bool StubRoutines::riscv::_completed = false;
+diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
+new file mode 100644
+index 00000000000..51f07819c33
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
+@@ -0,0 +1,161 @@
++/*
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  __ dispatch_next(vtos);
++#ifndef CPU_RISCV_STUBROUTINES_RISCV_HPP
++#define CPU_RISCV_STUBROUTINES_RISCV_HPP
 +
-+  // invocation counter overflow
-+  if (inc_counter) {
-+    if (ProfileInterpreter) {
-+      // We have decided to profile this method in the interpreter
-+      __ bind(profile_method);
-+      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
-+      __ set_method_data_pointer_for_bcp();
-+      // don't think we need this
-+      __ get_method(x11);
-+      __ jal(profile_method_continue);
-+    }
-+    // Handle overflow of counter and compile method
-+    __ bind(invocation_counter_overflow);
-+    generate_counter_overflow(continue_after_compile);
-+  }
++// This file holds the platform specific parts of the StubRoutines
++// definition. See stubRoutines.hpp for a description on how to
++// extend it.
 +
-+  return entry_point;
++static bool returns_to_call_stub(address return_pc) {
++  return return_pc == _call_stub_return_address;
 +}
 +
-+//-----------------------------------------------------------------------------
-+// Exceptions
++enum platform_dependent_constants {
++  code_size1 = 19000,          // simply increase if too small (assembler will crash if too small)
++  code_size2 = 28000           // simply increase if too small (assembler will crash if too small)
++};
 +
-+void TemplateInterpreterGenerator::generate_throw_exception() {
-+  // Entry point in previous activation (i.e., if the caller was
-+  // interpreted)
-+  Interpreter::_rethrow_exception_entry = __ pc();
-+  // Restore sp to interpreter_frame_last_sp even though we are going
-+  // to empty the expression stack for the exception processing.
-+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  // x10: exception
-+  // x13: return address/pc that threw exception
-+  __ restore_bcp();    // xbcp points to call/send
-+  __ restore_locals();
-+  __ restore_constant_pool_cache();
-+  __ reinit_heapbase();  // restore xheapbase as heapbase.
-+  __ get_dispatch();
++class riscv {
++ friend class StubGenerator;
 +
-+  // Entry point for exceptions thrown within interpreter code
-+  Interpreter::_throw_exception_entry = __ pc();
-+  // If we came here via a NullPointerException on the receiver of a
-+  // method, xthread may be corrupt.
-+  __ get_method(xmethod);
-+  // expression stack is undefined here
-+  // x10: exception
-+  // xbcp: exception bcp
-+  __ verify_oop(x10);
-+  __ mv(c_rarg1, x10);
++ private:
++  static address _get_previous_sp_entry;
 +
-+  // expression stack must be empty before entering the VM in case of
-+  // an exception
-+  __ empty_expression_stack();
-+  // find exception handler address and preserve exception oop
-+  __ call_VM(x13,
-+             CAST_FROM_FN_PTR(address,
-+                          InterpreterRuntime::exception_handler_for_exception),
-+             c_rarg1);
++  static address _f2i_fixup;
++  static address _f2l_fixup;
++  static address _d2i_fixup;
++  static address _d2l_fixup;
 +
-+  // Calculate stack limit
-+  __ ld(t0, Address(xmethod, Method::const_offset()));
-+  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
-+  __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4);
-+  __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
-+  __ slli(t0, t0, 3);
-+  __ sub(t0, t1, t0);
-+  __ andi(sp, t0, -16);
++  static address _float_sign_mask;
++  static address _float_sign_flip;
++  static address _double_sign_mask;
++  static address _double_sign_flip;
 +
-+  // x10: exception handler entry point
-+  // x13: preserved exception oop
-+  // xbcp: bcp for exception handler
-+  __ push_ptr(x13); // push exception which is now the only value on the stack
-+  __ jr(x10); // jump to exception handler (may be _remove_activation_entry!)
++  static address _zero_blocks;
 +
-+  // If the exception is not handled in the current frame the frame is
-+  // removed and the exception is rethrown (i.e. exception
-+  // continuation is _rethrow_exception).
-+  //
-+  // Note: At this point the bci is still the bxi for the instruction
-+  // which caused the exception and the expression stack is
-+  // empty. Thus, for any VM calls at this point, GC will find a legal
-+  // oop map (with empty expression stack).
++  static address _compare_long_string_LL;
++  static address _compare_long_string_LU;
++  static address _compare_long_string_UL;
++  static address _compare_long_string_UU;
++  static address _string_indexof_linear_ll;
++  static address _string_indexof_linear_uu;
++  static address _string_indexof_linear_ul;
++  static address _large_byte_array_inflate;
 +
-+  //
-+  // JVMTI PopFrame support
-+  //
++  static address _method_entry_barrier;
 +
-+  Interpreter::_remove_activation_preserving_args_entry = __ pc();
-+  __ empty_expression_stack();
-+  // Set the popframe_processing bit in pending_popframe_condition
-+  // indicating that we are currently handling popframe, so that
-+  // call_VMs that may happen later do not trigger new popframe
-+  // handling cycles.
-+  __ lwu(x13, Address(xthread, JavaThread::popframe_condition_offset()));
-+  __ ori(x13, x13, JavaThread::popframe_processing_bit);
-+  __ sw(x13, Address(xthread, JavaThread::popframe_condition_offset()));
++  static bool _completed;
 +
-+  {
-+    // Check to see whether we are returning to a deoptimized frame.
-+    // (The PopFrame call ensures that the caller of the popped frame is
-+    // either interpreted or compiled and deoptimizes it if compiled.)
-+    // In this case, we can't call dispatch_next() after the frame is
-+    // popped, but instead must save the incoming arguments and restore
-+    // them after deoptimization has occurred.
-+    //
-+    // Note that we don't compare the return PC against the
-+    // deoptimization blob's unpack entry because of the presence of
-+    // adapter frames in C2.
-+    Label caller_not_deoptimized;
-+    __ ld(c_rarg1, Address(fp, frame::return_addr_offset * wordSize));
-+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), c_rarg1);
-+    __ bnez(x10, caller_not_deoptimized);
++ public:
 +
-+    // Compute size of arguments for saving when returning to
-+    // deoptimized caller
-+    __ get_method(x10);
-+    __ ld(x10, Address(x10, Method::const_offset()));
-+    __ load_unsigned_short(x10, Address(x10, in_bytes(ConstMethod::
-+                                                      size_of_parameters_offset())));
-+    __ slli(x10, x10, Interpreter::logStackElementSize);
-+    __ restore_locals();
-+    __ sub(xlocals, xlocals, x10);
-+    __ add(xlocals, xlocals, wordSize);
-+    // Save these arguments
-+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
-+                                           Deoptimization::
-+                                           popframe_preserve_args),
-+                          xthread, x10, xlocals);
++  static address get_previous_sp_entry() {
++    return _get_previous_sp_entry;
++  }
 +
-+    __ remove_activation(vtos,
-+                         /* throw_monitor_exception */ false,
-+                         /* install_monitor_exception */ false,
-+                         /* notify_jvmdi */ false);
++  static address f2i_fixup() {
++    return _f2i_fixup;
++  }
 +
-+    // Inform deoptimization that it is responsible for restoring
-+    // these arguments
-+    __ mv(t0, JavaThread::popframe_force_deopt_reexecution_bit);
-+    __ sw(t0, Address(xthread, JavaThread::popframe_condition_offset()));
++  static address f2l_fixup() {
++    return _f2l_fixup;
++  }
 +
-+    // Continue in deoptimization handler
-+    __ ret();
++  static address d2i_fixup() {
++    return _d2i_fixup;
++  }
 +
-+    __ bind(caller_not_deoptimized);
++  static address d2l_fixup() {
++    return _d2l_fixup;
 +  }
 +
-+  __ remove_activation(vtos,
-+                       /* throw_monitor_exception */ false,
-+                       /* install_monitor_exception */ false,
-+                       /* notify_jvmdi */ false);
++  static address float_sign_mask() {
++    return _float_sign_mask;
++  }
 +
-+  // Restore the last_sp and null it out
-+  __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  static address float_sign_flip() {
++    return _float_sign_flip;
++  }
 +
-+  __ restore_bcp();
-+  __ restore_locals();
-+  __ restore_constant_pool_cache();
-+  __ get_method(xmethod);
-+  __ get_dispatch();
++  static address double_sign_mask() {
++    return _double_sign_mask;
++  }
 +
-+  // The method data pointer was incremented already during
-+  // call profiling. We have to restore the mdp for the current bcp.
-+  if (ProfileInterpreter) {
-+    __ set_method_data_pointer_for_bcp();
++  static address double_sign_flip() {
++    return _double_sign_flip;
 +  }
 +
-+  // Clear the popframe condition flag
-+  __ sw(zr, Address(xthread, JavaThread::popframe_condition_offset()));
-+  assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive");
++  static address zero_blocks() {
++    return _zero_blocks;
++  }
 +
-+#if INCLUDE_JVMTI
-+  {
-+    Label L_done;
++  static address compare_long_string_LL() {
++    return _compare_long_string_LL;
++  }
 +
-+    __ lbu(t0, Address(xbcp, 0));
-+    __ mv(t1, Bytecodes::_invokestatic);
-+    __ bne(t1, t0, L_done);
++  static address compare_long_string_LU() {
++    return _compare_long_string_LU;
++  }
 +
-+    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
-+    // Detect such a case in the InterpreterRuntime function and return the member name argument,or NULL.
++  static address compare_long_string_UL() {
++    return _compare_long_string_UL;
++  }
 +
-+    __ ld(c_rarg0, Address(xlocals, 0));
-+    __ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null),c_rarg0, xmethod, xbcp);
++  static address compare_long_string_UU() {
++    return _compare_long_string_UU;
++  }
 +
-+    __ beqz(x10, L_done);
++  static address string_indexof_linear_ul() {
++    return _string_indexof_linear_ul;
++  }
 +
-+    __ sd(x10, Address(esp, 0));
-+    __ bind(L_done);
++  static address string_indexof_linear_ll() {
++    return _string_indexof_linear_ll;
 +  }
-+#endif // INCLUDE_JVMTI
 +
-+  // Restore machine SP
-+  __ ld(t0, Address(xmethod, Method::const_offset()));
-+  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
-+  __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4);
-+  __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
-+  __ slliw(t0, t0, 3);
-+  __ sub(t0, t1, t0);
-+  __ andi(sp, t0, -16);
++  static address string_indexof_linear_uu() {
++    return _string_indexof_linear_uu;
++  }
 +
-+  __ dispatch_next(vtos);
-+  // end of PopFrame support
++  static address large_byte_array_inflate() {
++    return _large_byte_array_inflate;
++  }
 +
-+  Interpreter::_remove_activation_entry = __ pc();
++  static address method_entry_barrier() {
++    return _method_entry_barrier;
++  }
 +
-+  // preserve exception over this code sequence
-+  __ pop_ptr(x10);
-+  __ sd(x10, Address(xthread, JavaThread::vm_result_offset()));
-+  // remove the activation (without doing throws on illegalMonitorExceptions)
-+  __ remove_activation(vtos, false, true, false);
-+  // restore exception
-+  __ get_vm_result(x10, xthread);
++  static bool complete() {
++    return _completed;
++  }
 +
-+  // In between activations - previous activation type unknown yet
-+  // compute continuation point - the continuation point expects the
-+  // following registers set up:
-+  //
-+  // x10: exception
-+  // ra: return address/pc that threw exception
-+  // sp: expression stack of caller
-+  // fp: fp of caller
-+  // FIXME: There's no point saving RA here because VM calls don't trash it
-+  __ sub(sp, sp, 2 * wordSize);
-+  __ sd(x10, Address(sp, 0));                   // save exception
-+  __ sd(ra, Address(sp, wordSize));             // save return address
-+  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
-+                                         SharedRuntime::exception_handler_for_return_address),
-+                        xthread, ra);
-+  __ mv(x11, x10);                              // save exception handler
-+  __ ld(x10, Address(sp, 0));                   // restore exception
-+  __ ld(ra, Address(sp, wordSize));             // restore return address
-+  __ add(sp, sp, 2 * wordSize);
-+  // We might be returning to a deopt handler that expects x13 to
-+  // contain the exception pc
-+  __ mv(x13, ra);
-+  // Note that an "issuing PC" is actually the next PC after the call
-+  __ jr(x11);                                   // jump to exception
-+                                                // handler of caller
-+}
-+
-+//
-+// JVMTI ForceEarlyReturn support
-+//
-+address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state)  {
-+  address entry = __ pc();
-+
-+  __ restore_bcp();
-+  __ restore_locals();
-+  __ empty_expression_stack();
-+  __ load_earlyret_value(state);
-+
-+  __ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
-+  Address cond_addr(t0, JvmtiThreadState::earlyret_state_offset());
-+
-+  // Clear the earlyret state
-+  assert(JvmtiThreadState::earlyret_inactive == 0, "should be");
-+  __ sd(zr, cond_addr);
-+
-+  __ remove_activation(state,
-+                       false, /* throw_monitor_exception */
-+                       false, /* install_monitor_exception */
-+                       true); /* notify_jvmdi */
-+  __ ret();
-+
-+  return entry;
-+}
-+// end of ForceEarlyReturn support
-+
-+//-----------------------------------------------------------------------------
-+// Helper for vtos entry point generation
-+
-+void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
-+                                                         address& bep,
-+                                                         address& cep,
-+                                                         address& sep,
-+                                                         address& aep,
-+                                                         address& iep,
-+                                                         address& lep,
-+                                                         address& fep,
-+                                                         address& dep,
-+                                                         address& vep) {
-+  assert(t != NULL && t->is_valid() && t->tos_in() == vtos, "illegal template");
-+  Label L;
-+  aep = __ pc();  __ push_ptr();  __ j(L);
-+  fep = __ pc();  __ push_f();    __ j(L);
-+  dep = __ pc();  __ push_d();    __ j(L);
-+  lep = __ pc();  __ push_l();    __ j(L);
-+  bep = cep = sep =
-+  iep = __ pc();  __ push_i();
-+  vep = __ pc();
-+  __ bind(L);
-+  generate_and_dispatch(t);
-+}
-+
-+//-----------------------------------------------------------------------------
-+
-+// Non-product code
-+#ifndef PRODUCT
-+address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
-+  address entry = __ pc();
-+
-+  __ push_reg(ra);
-+  __ push(state);
-+  __ push_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
-+  __ mv(c_rarg2, x10);  // Pass itos
-+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), c_rarg1, c_rarg2, c_rarg3);
-+  __ pop_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
-+  __ pop(state);
-+  __ pop_reg(ra);
-+  __ ret();                                   // return from result handler
-+
-+  return entry;
-+}
-+
-+void TemplateInterpreterGenerator::count_bytecode() {
-+  __ push_reg(t0);
-+  __ push_reg(x10);
-+  __ mv(x10, (address) &BytecodeCounter::_counter_value);
-+  __ mv(t0, 1);
-+  __ amoadd_d(zr, x10, t0, Assembler::aqrl);
-+  __ pop_reg(x10);
-+  __ pop_reg(t0);
-+}
-+
-+void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; }
-+
-+void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; }
-+
-+void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
-+  // Call a little run-time stub to avoid blow-up for each bytecode.
-+  // The run-time runtime saves the right registers, depending on
-+  // the tosca in-state for the given template.
-+
-+  assert(Interpreter::trace_code(t->tos_in()) != NULL, "entry must have been generated");
-+  __ jal(Interpreter::trace_code(t->tos_in()));
-+  __ reinit_heapbase();
-+}
-+
-+void TemplateInterpreterGenerator::stop_interpreter_at() {
-+  Label L;
-+  __ push_reg(t0);
-+  __ mv(t0, (address) &BytecodeCounter::_counter_value);
-+  __ ld(t0, Address(t0));
-+  __ mv(t1, StopInterpreterAt);
-+  __ bne(t0, t1, L);
-+  __ ebreak();
-+  __ bind(L);
-+  __ pop_reg(t0);
-+}
++  static void set_completed() {
++    _completed = true;
++  }
++};
 +
-+#endif // !PRODUCT
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
++#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
 new file mode 100644
-index 000000000..8e6e7dee5
+index 00000000000..6537b2dbd94
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -0,0 +1,4028 @@
++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+@@ -0,0 +1,1794 @@
 +/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -49225,6468 +49961,5737 @@ index 000000000..8e6e7dee5
 +#include "precompiled.hpp"
 +#include "asm/macroAssembler.inline.hpp"
 +#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/bytecodeTracer.hpp"
 +#include "interpreter/interp_masm.hpp"
 +#include "interpreter/interpreter.hpp"
 +#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateInterpreterGenerator.hpp"
 +#include "interpreter/templateTable.hpp"
-+#include "memory/universe.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/arrayOop.hpp"
 +#include "oops/method.hpp"
 +#include "oops/methodData.hpp"
-+#include "oops/objArrayKlass.hpp"
 +#include "oops/oop.inline.hpp"
-+#include "prims/methodHandles.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
 +#include "runtime/frame.inline.hpp"
++#include "runtime/jniHandles.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
 +#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++#include "utilities/powerOfTwo.hpp"
++#include <sys/types.h>
 +
-+#define __ _masm->
-+
-+// Platform-dependent initialization
-+
-+void TemplateTable::pd_initialize() {
-+  // No riscv specific initialization
-+}
-+
-+// Address computation: local variables
++#ifndef PRODUCT
++#include "oops/method.hpp"
++#endif // !PRODUCT
 +
-+static inline Address iaddress(int n) {
-+  return Address(xlocals, Interpreter::local_offset_in_bytes(n));
-+}
++// Size of interpreter code.  Increase if too small.  Interpreter will
++// fail with a guarantee ("not enough space for interpreter generation");
++// if too small.
++// Run with +PrintInterpreter to get the VM to print out the size.
++// Max size with JVMTI
++int TemplateInterpreter::InterpreterCodeSize = 256 * 1024;
 +
-+static inline Address laddress(int n) {
-+  return iaddress(n + 1);
-+}
++#define __ _masm->
 +
-+static inline Address faddress(int n) {
-+  return iaddress(n);
-+}
++//-----------------------------------------------------------------------------
 +
-+static inline Address daddress(int n) {
-+  return laddress(n);
-+}
++address TemplateInterpreterGenerator::generate_slow_signature_handler() {
++  address entry = __ pc();
 +
-+static inline Address aaddress(int n) {
-+  return iaddress(n);
-+}
++  __ andi(esp, esp, -16);
++  __ mv(c_rarg3, esp);
++  // xmethod
++  // xlocals
++  // c_rarg3: first stack arg - wordSize
++  // adjust sp
 +
-+static inline Address iaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
-+  _masm->shadd(temp, r, xlocals, temp, 3);
-+  return Address(temp, 0);
-+}
++  __ addi(sp, c_rarg3, -18 * wordSize);
++  __ addi(sp, sp, -2 * wordSize);
++  __ sd(ra, Address(sp, 0));
 +
-+static inline Address laddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
-+  _masm->shadd(temp, r, xlocals, temp, 3);
-+  return Address(temp, Interpreter::local_offset_in_bytes(1));;
-+}
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::slow_signature_handler),
++             xmethod, xlocals, c_rarg3);
 +
-+static inline Address faddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
-+  return iaddress(r, temp, _masm);
-+}
++  // x10: result handler
 +
-+static inline Address daddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
-+  return laddress(r, temp, _masm);
-+}
++  // Stack layout:
++  // sp: return address           <- sp
++  //      1 garbage
++  //      8 integer args (if static first is unused)
++  //      1 float/double identifiers
++  //      8 double args
++  //        stack args              <- esp
++  //        garbage
++  //        expression stack bottom
++  //        bcp (NULL)
++  //        ...
 +
-+static inline Address aaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
-+  return iaddress(r, temp, _masm);
-+}
++  // Restore ra
++  __ ld(ra, Address(sp, 0));
++  __ addi(sp, sp , 2 * wordSize);
 +
-+// At top of Java expression stack which may be different than esp().  It
-+// isn't for category 1 objects.
-+static inline Address at_tos   () {
-+  return Address(esp,  Interpreter::expr_offset_in_bytes(0));
-+}
++  // Do FP first so we can use c_rarg3 as temp
++  __ lwu(c_rarg3, Address(sp, 9 * wordSize)); // float/double identifiers
 +
-+static inline Address at_tos_p1() {
-+  return Address(esp,  Interpreter::expr_offset_in_bytes(1));
-+}
++  for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
++    const FloatRegister r = g_FPArgReg[i];
++    Label d, done;
 +
-+static inline Address at_tos_p2() {
-+  return Address(esp,  Interpreter::expr_offset_in_bytes(2));
-+}
++    __ andi(t0, c_rarg3, 1UL << i);
++    __ bnez(t0, d);
++    __ flw(r, Address(sp, (10 + i) * wordSize));
++    __ j(done);
++    __ bind(d);
++    __ fld(r, Address(sp, (10 + i) * wordSize));
++    __ bind(done);
++  }
 +
-+static inline Address at_tos_p3() {
-+  return Address(esp,  Interpreter::expr_offset_in_bytes(3));
-+}
++  // c_rarg0 contains the result from the call of
++  // InterpreterRuntime::slow_signature_handler so we don't touch it
++  // here.  It will be loaded with the JNIEnv* later.
++  for (int i = 1; i < Argument::n_int_register_parameters_c; i++) {
++    const Register rm = g_INTArgReg[i];
++    __ ld(rm, Address(sp, i * wordSize));
++  }
 +
-+static inline Address at_tos_p4() {
-+  return Address(esp,  Interpreter::expr_offset_in_bytes(4));
-+}
++  __ addi(sp, sp, 18 * wordSize);
++  __ ret();
 +
-+static inline Address at_tos_p5() {
-+  return Address(esp,  Interpreter::expr_offset_in_bytes(5));
++  return entry;
 +}
 +
-+// Miscelaneous helper routines
-+// Store an oop (or NULL) at the Address described by obj.
-+// If val == noreg this means store a NULL
-+static void do_oop_store(InterpreterMacroAssembler* _masm,
-+                         Address dst,
-+                         Register val,
-+                         DecoratorSet decorators) {
-+  assert(val == noreg || val == x10, "parameter is just for looks");
-+  __ store_heap_oop(dst, val, x29, x11, x13, decorators);
-+}
++// Various method entries
++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
++  // xmethod: Method*
++  // x30: sender sp
++  // esp: args
 +
-+static void do_oop_load(InterpreterMacroAssembler* _masm,
-+                        Address src,
-+                        Register dst,
-+                        DecoratorSet decorators) {
-+  __ load_heap_oop(dst, src, x7, x11, decorators);
-+}
++  if (!InlineIntrinsics) {
++    return NULL; // Generate a vanilla entry
++  }
 +
-+Address TemplateTable::at_bcp(int offset) {
-+  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
-+  return Address(xbcp, offset);
-+}
++  // These don't need a safepoint check because they aren't virtually
++  // callable. We won't enter these intrinsics from compiled code.
++  // If in the future we added an intrinsic which was virtually callable
++  // we'd have to worry about how to safepoint so that this code is used.
 +
-+void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
-+                                   Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
-+                                   int byte_no)
-+{
-+  if (!RewriteBytecodes)  { return; }
-+  Label L_patch_done;
++  // mathematical functions inlined by compiler
++  // (interpreter must provide identical implementation
++  // in order to avoid monotonicity bugs when switching
++  // from interpreter to compiler in the middle of some
++  // computation)
++  //
++  // stack:
++  //        [ arg ] <-- esp
++  //        [ arg ]
++  // retaddr in ra
 +
-+  switch (bc) {
-+    case Bytecodes::_fast_aputfield:  // fall through
-+    case Bytecodes::_fast_bputfield:  // fall through
-+    case Bytecodes::_fast_zputfield:  // fall through
-+    case Bytecodes::_fast_cputfield:  // fall through
-+    case Bytecodes::_fast_dputfield:  // fall through
-+    case Bytecodes::_fast_fputfield:  // fall through
-+    case Bytecodes::_fast_iputfield:  // fall through
-+    case Bytecodes::_fast_lputfield:  // fall through
-+    case Bytecodes::_fast_sputfield: {
-+      // We skip bytecode quickening for putfield instructions when
-+      // the put_code written to the constant pool cache is zero.
-+      // This is required so that every execution of this instruction
-+      // calls out to InterpreterRuntime::resolve_get_put to do
-+      // additional, required work.
-+      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
-+      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
-+      __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
-+      __ mv(bc_reg, bc);
-+      __ beqz(temp_reg, L_patch_done);
++  address fn = NULL;
++  address entry_point = NULL;
++  Register continuation = ra;
++  switch (kind) {
++    case Interpreter::java_lang_math_abs:
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ fabs_d(f10, f10);
++      __ mv(sp, x30); // Restore caller's SP
 +      break;
-+    }
-+    default:
-+      assert(byte_no == -1, "sanity");
-+      // the pair bytecodes have already done the load.
-+      if (load_bc_into_bc_reg) {
-+        __ mv(bc_reg, bc);
++    case Interpreter::java_lang_math_sqrt:
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ fsqrt_d(f10, f10);
++      __ mv(sp, x30);
++      break;
++    case Interpreter::java_lang_math_sin :
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ mv(sp, x30);
++      __ mv(x9, ra);
++      continuation = x9;  // The first callee-saved register
++      if (StubRoutines::dsin() == NULL) {
++        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
++      } else {
++        fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin());
++      }
++      __ mv(t0, fn);
++      __ jalr(t0);
++      break;
++    case Interpreter::java_lang_math_cos :
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ mv(sp, x30);
++      __ mv(x9, ra);
++      continuation = x9;  // The first callee-saved register
++      if (StubRoutines::dcos() == NULL) {
++        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
++      } else {
++        fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos());
++      }
++      __ mv(t0, fn);
++      __ jalr(t0);
++      break;
++    case Interpreter::java_lang_math_tan :
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ mv(sp, x30);
++      __ mv(x9, ra);
++      continuation = x9;  // The first callee-saved register
++      if (StubRoutines::dtan() == NULL) {
++        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
++      } else {
++        fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan());
++      }
++      __ mv(t0, fn);
++      __ jalr(t0);
++      break;
++    case Interpreter::java_lang_math_log :
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ mv(sp, x30);
++      __ mv(x9, ra);
++      continuation = x9;  // The first callee-saved register
++      if (StubRoutines::dlog() == NULL) {
++        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
++      } else {
++        fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog());
++      }
++      __ mv(t0, fn);
++      __ jalr(t0);
++      break;
++    case Interpreter::java_lang_math_log10 :
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ mv(sp, x30);
++      __ mv(x9, ra);
++      continuation = x9;  // The first callee-saved register
++      if (StubRoutines::dlog10() == NULL) {
++        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
++      } else {
++        fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10());
++      }
++      __ mv(t0, fn);
++      __ jalr(t0);
++      break;
++    case Interpreter::java_lang_math_exp :
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ mv(sp, x30);
++      __ mv(x9, ra);
++      continuation = x9;  // The first callee-saved register
++      if (StubRoutines::dexp() == NULL) {
++        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
++      } else {
++        fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp());
++      }
++      __ mv(t0, fn);
++      __ jalr(t0);
++      break;
++    case Interpreter::java_lang_math_pow :
++      entry_point = __ pc();
++      __ mv(x9, ra);
++      continuation = x9;
++      __ fld(f10, Address(esp, 2 * Interpreter::stackElementSize));
++      __ fld(f11, Address(esp));
++      __ mv(sp, x30);
++      if (StubRoutines::dpow() == NULL) {
++        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
++      } else {
++        fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow());
++      }
++      __ mv(t0, fn);
++      __ jalr(t0);
++      break;
++    case Interpreter::java_lang_math_fmaD :
++      if (UseFMA) {
++        entry_point = __ pc();
++        __ fld(f10, Address(esp, 4 * Interpreter::stackElementSize));
++        __ fld(f11, Address(esp, 2 * Interpreter::stackElementSize));
++        __ fld(f12, Address(esp));
++        __ fmadd_d(f10, f10, f11, f12);
++        __ mv(sp, x30); // Restore caller's SP
++      }
++      break;
++    case Interpreter::java_lang_math_fmaF :
++      if (UseFMA) {
++        entry_point = __ pc();
++        __ flw(f10, Address(esp, 2 * Interpreter::stackElementSize));
++        __ flw(f11, Address(esp, Interpreter::stackElementSize));
++        __ flw(f12, Address(esp));
++        __ fmadd_s(f10, f10, f11, f12);
++        __ mv(sp, x30); // Restore caller's SP
 +      }
++      break;
++    default:
++      ;
 +  }
-+
-+  if (JvmtiExport::can_post_breakpoint()) {
-+    Label L_fast_patch;
-+    // if a breakpoint is present we can't rewrite the stream directly
-+    __ load_unsigned_byte(temp_reg, at_bcp(0));
-+    __ addi(temp_reg, temp_reg, -Bytecodes::_breakpoint); // temp_reg is temporary register.
-+    __ bnez(temp_reg, L_fast_patch);
-+    // Let breakpoint table handling rewrite to quicker bytecode
-+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), xmethod, xbcp, bc_reg);
-+    __ j(L_patch_done);
-+    __ bind(L_fast_patch);
++  if (entry_point != NULL) {
++    __ jr(continuation);
 +  }
 +
-+#ifdef ASSERT
-+  Label L_okay;
-+  __ load_unsigned_byte(temp_reg, at_bcp(0));
-+  __ beq(temp_reg, bc_reg, L_okay);
-+  __ addi(temp_reg, temp_reg, -(int) Bytecodes::java_code(bc));
-+  __ beqz(temp_reg, L_okay);
-+  __ stop("patching the wrong bytecode");
-+  __ bind(L_okay);
-+#endif
-+
-+  // patch bytecode
-+  __ sb(bc_reg, at_bcp(0));
-+  __ bind(L_patch_done);
++  return entry_point;
 +}
 +
-+// Individual instructions
++// Abstract method entry
++// Attempt to execute abstract method. Throw exception
++address TemplateInterpreterGenerator::generate_abstract_entry(void) {
++  // xmethod: Method*
++  // x30: sender SP
 +
-+void TemplateTable::nop() {
-+  transition(vtos, vtos);
-+  // nothing to do
-+}
++  address entry_point = __ pc();
 +
-+void TemplateTable::shouldnotreachhere() {
-+  transition(vtos, vtos);
-+  __ stop("should not reach here bytecode");
-+}
++  // abstract method entry
 +
-+void TemplateTable::aconst_null()
-+{
-+  transition(vtos, atos);
-+  __ mv(x10, zr);
-+}
++  //  pop return address, reset last_sp to NULL
++  __ empty_expression_stack();
++  __ restore_bcp();      // bcp must be correct for exception handler   (was destroyed)
++  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
 +
-+void TemplateTable::iconst(int value)
-+{
-+  transition(vtos, itos);
-+  __ mv(x10, value);
-+}
++  // throw exception
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                     InterpreterRuntime::throw_AbstractMethodErrorWithMethod),
++                                     xmethod);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
 +
-+void TemplateTable::lconst(int value)
-+{
-+  transition(vtos, ltos);
-+  __ mv(x10, value);
++  return entry_point;
 +}
 +
-+void TemplateTable::fconst(int value)
-+{
-+  transition(vtos, ftos);
-+  static float fBuf[2] = {1.0, 2.0};
-+  __ mv(t0, (intptr_t)fBuf);
-+  switch (value) {
-+    case 0:
-+      __ fmv_w_x(f10, zr);
-+      break;
-+    case 1:
-+      __ flw(f10, t0, 0);
-+      break;
-+    case 2:
-+      __ flw(f10, t0, sizeof(float));
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+}
++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
++  address entry = __ pc();
 +
-+void TemplateTable::dconst(int value)
-+{
-+  transition(vtos, dtos);
-+  static double dBuf[2] = {1.0, 2.0};
-+  __ mv(t0, (intptr_t)dBuf);
-+  switch (value) {
-+    case 0:
-+      __ fmv_d_x(f10, zr);
-+      break;
-+    case 1:
-+      __ fld(f10, t0, 0);
-+      break;
-+    case 2:
-+      __ fld(f10, t0, sizeof(double));
-+      break;
-+    default:
-+      ShouldNotReachHere();
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld(t0, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
++    __ mv(t1, sp);
++    // maximal sp for current fp (stack grows negative)
++    // check if frame is complete
++    __ bge(t0, t1, L);
++    __ stop ("interpreter frame not set up");
++    __ bind(L);
 +  }
-+}
++#endif // ASSERT
++  // Restore bcp under the assumption that the current frame is still
++  // interpreted
++  __ restore_bcp();
 +
-+void TemplateTable::bipush()
-+{
-+  transition(vtos, itos);
-+  __ load_signed_byte(x10, at_bcp(1));
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // throw exception
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
++  return entry;
 +}
 +
-+void TemplateTable::sipush()
-+{
-+  transition(vtos, itos);
-+  __ load_unsigned_short(x10, at_bcp(1));
-+  __ revb_w_w(x10, x10);
-+  __ sraiw(x10, x10, 16);
-+}
++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
++  address entry = __ pc();
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // setup parameters
 +
-+void TemplateTable::ldc(bool wide)
-+{
-+  transition(vtos, vtos);
-+  Label call_ldc, notFloat, notClass, notInt, Done;
++  // convention: expect aberrant index in register x11
++  __ zero_extend(c_rarg2, x11, 32);
++  // convention: expect array in register x13
++  __ mv(c_rarg1, x13);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::
++                              throw_ArrayIndexOutOfBoundsException),
++             c_rarg1, c_rarg2);
++  return entry;
++}
 +
-+  if (wide) {
-+   __ get_unsigned_2_byte_index_at_bcp(x11, 1);
-+  } else {
-+   __ load_unsigned_byte(x11, at_bcp(1));
-+  }
-+  __ get_cpool_and_tags(x12, x10);
++address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
++  address entry = __ pc();
 +
-+  const int base_offset = ConstantPool::header_size() * wordSize;
-+  const int tags_offset = Array<u1>::base_offset_in_bytes();
++  // object is at TOS
++  __ pop_reg(c_rarg1);
 +
-+  // get type
-+  __ addi(x13, x11, tags_offset);
-+  __ add(x13, x10, x13);
-+  __ membar(MacroAssembler::AnyAny);
-+  __ lbu(x13, Address(x13, 0));
-+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
 +
-+  // unresolved class - get the resolved class
-+  __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClass);
-+  __ beq(x13, t1, call_ldc);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::
++                              throw_ClassCastException),
++             c_rarg1);
++  return entry;
++}
 +
-+  // unresolved class in error state - call into runtime to throw the error
-+  // from the first resolution attempt
-+  __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClassInError);
-+  __ beq(x13, t1, call_ldc);
++address TemplateInterpreterGenerator::generate_exception_handler_common(
++  const char* name, const char* message, bool pass_oop) {
++  assert(!pass_oop || message == NULL, "either oop or message but not both");
++  address entry = __ pc();
++  if (pass_oop) {
++    // object is at TOS
++    __ pop_reg(c_rarg2);
++  }
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // setup parameters
++  __ la(c_rarg1, Address((address)name));
++  if (pass_oop) {
++    __ call_VM(x10, CAST_FROM_FN_PTR(address,
++                                     InterpreterRuntime::
++                                     create_klass_exception),
++               c_rarg1, c_rarg2);
++  } else {
++    // kind of lame ExternalAddress can't take NULL because
++    // external_word_Relocation will assert.
++    if (message != NULL) {
++      __ la(c_rarg2, Address((address)message));
++    } else {
++      __ mv(c_rarg2, NULL_WORD);
++    }
++    __ call_VM(x10,
++               CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception),
++               c_rarg1, c_rarg2);
++  }
++  // throw exception
++  __ j(address(Interpreter::throw_exception_entry()));
++  return entry;
++}
 +
-+  // resolved class - need to call vm to get java mirror of the class
-+  __ mv(t1, (u1)JVM_CONSTANT_Class);
-+  __ bne(x13, t1, notClass);
++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
++  address entry = __ pc();
 +
-+  __ bind(call_ldc);
-+  __ mv(c_rarg1, wide);
-+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1);
-+  __ push_ptr(x10);
-+  __ verify_oop(x10);
-+  __ j(Done);
++  // Restore stack bottom in case i2c adjusted stack
++  __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  // and NULL it as marker that esp is now tos until next java call
++  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ restore_bcp();
++  __ restore_locals();
++  __ restore_constant_pool_cache();
++  __ get_method(xmethod);
 +
-+  __ bind(notClass);
-+  __ mv(t1, (u1)JVM_CONSTANT_Float);
-+  __ bne(x13, t1, notFloat);
++  if (state == atos) {
++    Register obj = x10;
++    Register mdp = x11;
++    Register tmp = x12;
++    __ ld(mdp, Address(xmethod, Method::method_data_offset()));
++    __ profile_return_type(mdp, obj, tmp);
++  }
 +
-+  // ftos
-+  __ shadd(x11, x11, x12, x11, 3);
-+  __ flw(f10, Address(x11, base_offset));
-+  __ push_f(f10);
-+  __ j(Done);
++  // Pop N words from the stack
++  __ get_cache_and_index_at_bcp(x11, x12, 1, index_size);
++  __ ld(x11, Address(x11, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++  __ andi(x11, x11, ConstantPoolCacheEntry::parameter_size_mask);
 +
-+  __ bind(notFloat);
++  __ shadd(esp, x11, esp, t0, 3);
 +
-+  __ mv(t1, (u1)JVM_CONSTANT_Integer);
-+  __ bne(x13, t1, notInt);
++  // Restore machine SP
++  __ ld(t0, Address(xmethod, Method::const_offset()));
++  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
++  __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2);
++  __ ld(t1,
++        Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
++  __ slli(t0, t0, 3);
++  __ sub(t0, t1, t0);
++  __ andi(sp, t0, -16);
 +
-+  // itos
-+  __ shadd(x11, x11, x12, x11, 3);
-+  __ lw(x10, Address(x11, base_offset));
-+  __ push_i(x10);
-+  __ j(Done);
++ __ check_and_handle_popframe(xthread);
++ __ check_and_handle_earlyret(xthread);
 +
-+  __ bind(notInt);
-+  condy_helper(Done);
++  __ get_dispatch();
++  __ dispatch_next(state, step);
 +
-+  __ bind(Done);
++  return entry;
 +}
 +
-+// Fast path for caching oop constants.
-+void TemplateTable::fast_aldc(bool wide)
-+{
-+  transition(vtos, atos);
++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
++                                                               int step,
++                                                               address continuation) {
++  address entry = __ pc();
++  __ restore_bcp();
++  __ restore_locals();
++  __ restore_constant_pool_cache();
++  __ get_method(xmethod);
++  __ get_dispatch();
 +
-+  const Register result = x10;
-+  const Register tmp = x11;
-+  const Register rarg = x12;
++  // Calculate stack limit
++  __ ld(t0, Address(xmethod, Method::const_offset()));
++  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
++  __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2);
++  __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
++  __ slli(t0, t0, 3);
++  __ sub(t0, t1, t0);
++  __ andi(sp, t0, -16);
 +
-+  const int index_size = wide ? sizeof(u2) : sizeof(u1);
++  // Restore expression stack pointer
++  __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  // NULL last_sp until next java call
++  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
 +
-+  Label resolved;
++  // handle exceptions
++  {
++    Label L;
++    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++    __ beqz(t0, L);
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
 +
-+  // We are resolved if the resolved reference cache entry contains a
-+  // non-null object (String, MethodType, etc.)
-+  assert_different_registers(result, tmp);
-+  __ get_cache_index_at_bcp(tmp, 1, index_size);
-+  __ load_resolved_reference_at_index(result, tmp);
-+  __ bnez(result, resolved);
++  if (continuation == NULL) {
++    __ dispatch_next(state, step);
++  } else {
++    __ jump_to_entry(continuation);
++  }
++  return entry;
++}
 +
-+  const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
++address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) {
++  address entry = __ pc();
++  if (type == T_OBJECT) {
++    // retrieve result from frame
++    __ ld(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
++    // and verify it
++    __ verify_oop(x10);
++  } else {
++   __ cast_primitive_type(type, x10);
++  }
 +
-+  // first time invocation - must resolve first
-+  __ mv(rarg, (int)bytecode());
-+  __ call_VM(result, entry, rarg);
++  __ ret();                                  // return from result handler
++  return entry;
++}
 +
-+  __ bind(resolved);
++address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state,
++                                                                address runtime_entry) {
++  assert_cond(runtime_entry != NULL);
++  address entry = __ pc();
++  __ push(state);
++  __ call_VM(noreg, runtime_entry);
++  __ fence(0xf, 0xf);
++  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
++  return entry;
++}
 +
-+  { // Check for the null sentinel.
-+    // If we just called the VM, it already did the mapping for us,
-+    // but it's harmless to retry.
-+    Label notNull;
++// Helpers for commoning out cases in the various type of method entries.
++//
 +
-+    // Stash null_sentinel address to get its value later
-+    int32_t offset = 0;
-+    __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset);
-+    __ ld(tmp, Address(rarg, offset));
-+    __ bne(result, tmp, notNull);
-+    __ mv(result, zr);  // NULL object reference
-+    __ bind(notNull);
-+  }
 +
-+  if (VerifyOops) {
-+    // Safe to call with 0 result
-+    __ verify_oop(result);
++// increment invocation count & check for overflow
++//
++// Note: checking for negative value instead of overflow
++//       so we have a 'sticky' overflow test
++//
++// xmethod: method
++//
++void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) {
++  Label done;
++  // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
++  int increment = InvocationCounter::count_increment;
++  Label no_mdo;
++  if (ProfileInterpreter) {
++    // Are we profiling?
++    __ ld(x10, Address(xmethod, Method::method_data_offset()));
++    __ beqz(x10, no_mdo);
++    // Increment counter in the MDO
++    const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
++                                         in_bytes(InvocationCounter::counter_offset()));
++    const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
++    __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
++    __ j(done);
 +  }
++  __ bind(no_mdo);
++  // Increment counter in MethodCounters
++  const Address invocation_counter(t1,
++                                   MethodCounters::invocation_counter_offset() +
++                                   InvocationCounter::counter_offset());
++  __ get_method_counters(xmethod, t1, done);
++  const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
++  __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
++  __ bind(done);
 +}
 +
-+void TemplateTable::ldc2_w()
-+{
-+    transition(vtos, vtos);
-+    Label notDouble, notLong, Done;
-+    __ get_unsigned_2_byte_index_at_bcp(x10, 1);
++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
++  __ mv(c_rarg1, zr);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), c_rarg1);
++  __ j(do_continue);
++}
 +
-+    __ get_cpool_and_tags(x11, x12);
-+    const int base_offset = ConstantPool::header_size() * wordSize;
-+    const int tags_offset = Array<u1>::base_offset_in_bytes();
++// See if we've got enough room on the stack for locals plus overhead
++// below JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError
++// without going through the signal handler, i.e., reserved and yellow zones
++// will not be made usable. The shadow zone must suffice to handle the
++// overflow.
++// The expression stack grows down incrementally, so the normal guard
++// page mechanism will work for that.
++//
++// NOTE: Since the additional locals are also always pushed (wasn't
++// obvious in generate_method_entry) so the guard should work for them
++// too.
++//
++// Args:
++//      x13: number of additional locals this frame needs (what we must check)
++//      xmethod: Method*
++//
++// Kills:
++//      x10
++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
 +
-+    // get type
-+    __ add(x12, x12, x10);
-+    __ load_unsigned_byte(x12, Address(x12, tags_offset));
-+    __ mv(t1, JVM_CONSTANT_Double);
-+    __ bne(x12, t1, notDouble);
++  // monitor entry size: see picture of stack set
++  // (generate_method_entry) and frame_amd64.hpp
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
 +
-+    // dtos
-+    __ shadd(x12, x10, x11, x12, 3);
-+    __ fld(f10, Address(x12, base_offset));
-+    __ push_d(f10);
-+    __ j(Done);
++  // total overhead size: entry_size + (saved fp through expr stack
++  // bottom).  be sure to change this if you add/subtract anything
++  // to/from the overhead area
++  const int overhead_size =
++    -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size;
 +
-+    __ bind(notDouble);
-+    __ mv(t1, (int)JVM_CONSTANT_Long);
-+    __ bne(x12, t1, notLong);
++  const int page_size = os::vm_page_size();
 +
-+    // ltos
-+    __ shadd(x10, x10, x11, x10, 3);
-+    __ ld(x10, Address(x10, base_offset));
-+    __ push_l(x10);
-+    __ j(Done);
++  Label after_frame_check;
 +
-+    __ bind(notLong);
-+    condy_helper(Done);
-+    __ bind(Done);
++  // see if the frame is greater than one page in size. If so,
++  // then we need to verify there is enough stack space remaining
++  // for the additional locals.
++  __ mv(t0, (page_size - overhead_size) / Interpreter::stackElementSize);
++  __ bleu(x13, t0, after_frame_check);
 +
-+}
++  // compute sp as if this were going to be the last frame on
++  // the stack before the red zone
 +
-+void TemplateTable::condy_helper(Label& Done)
-+{
-+  const Register obj = x10;
-+  const Register rarg = x11;
-+  const Register flags = x12;
-+  const Register off = x13;
++  // locals + overhead, in bytes
++  __ mv(x10, overhead_size);
++  __ shadd(x10, x13, x10, t0, Interpreter::logStackElementSize);  // 2 slots per parameter.
 +
-+  const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
++  const Address stack_limit(xthread, JavaThread::stack_overflow_limit_offset());
++  __ ld(t0, stack_limit);
 +
-+  __ mv(rarg, (int) bytecode());
-+  __ call_VM(obj, entry, rarg);
++#ifdef ASSERT
++  Label limit_okay;
++  // Verify that thread stack limit is non-zero.
++  __ bnez(t0, limit_okay);
++  __ stop("stack overflow limit is zero");
++  __ bind(limit_okay);
++#endif
 +
-+  __ get_vm_result_2(flags, xthread);
++  // Add stack limit to locals.
++  __ add(x10, x10, t0);
 +
-+  // VMr = obj = base address to find primitive value to push
-+  // VMr2 = flags = (tos, off) using format of CPCE::_flags
-+  __ mv(off, flags);
-+  __ mv(t0, ConstantPoolCacheEntry::field_index_mask);
-+  __ andrw(off, off, t0);
++  // Check against the current stack bottom.
++  __ bgtu(sp, x10, after_frame_check);
 +
-+  __ add(off, obj, off);
-+  const Address field(off, 0); // base + R---->base + offset
++  // Remove the incoming args, peeling the machine SP back to where it
++  // was in the caller.  This is not strictly necessary, but unless we
++  // do so the stack frame may have a garbage FP; this ensures a
++  // correct call stack that we can always unwind.  The ANDI should be
++  // unnecessary because the sender SP in x30 is always aligned, but
++  // it doesn't hurt.
++  __ andi(sp, x30, -16);
 +
-+  __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
-+  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> flags:0~3
++  // Note: the restored frame is not necessarily interpreted.
++  // Use the shared runtime version of the StackOverflowError.
++  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
++  __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry()));
 +
-+  switch (bytecode()) {
-+    case Bytecodes::_ldc:   // fall through
-+    case Bytecodes::_ldc_w: {
-+      // tos in (itos, ftos, stos, btos, ctos, ztos)
-+      Label notInt, notFloat, notShort, notByte, notChar, notBool;
-+      __ mv(t1, itos);
-+      __ bne(flags, t1, notInt);
-+      // itos
-+      __ lw(x10, field);
-+      __ push(itos);
-+      __ j(Done);
++  // all done with frame size check
++  __ bind(after_frame_check);
++}
 +
-+      __ bind(notInt);
-+      __ mv(t1, ftos);
-+      __ bne(flags, t1, notFloat);
-+      // ftos
-+      __ load_float(field);
-+      __ push(ftos);
-+      __ j(Done);
++// Allocate monitor and lock method (asm interpreter)
++//
++// Args:
++//      xmethod: Method*
++//      xlocals: locals
++//
++// Kills:
++//      x10
++//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs)
++//      t0, t1 (temporary regs)
++void TemplateInterpreterGenerator::lock_method() {
++  // synchronize method
++  const Address access_flags(xmethod, Method::access_flags_offset());
++  const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
 +
-+      __ bind(notFloat);
-+      __ mv(t1, stos);
-+      __ bne(flags, t1, notShort);
-+      // stos
-+      __ load_signed_short(x10, field);
-+      __ push(stos);
-+      __ j(Done);
-+
-+      __ bind(notShort);
-+      __ mv(t1, btos);
-+      __ bne(flags, t1, notByte);
-+      // btos
-+      __ load_signed_byte(x10, field);
-+      __ push(btos);
-+      __ j(Done);
-+
-+      __ bind(notByte);
-+      __ mv(t1, ctos);
-+      __ bne(flags, t1, notChar);
-+      // ctos
-+      __ load_unsigned_short(x10, field);
-+      __ push(ctos);
-+      __ j(Done);
++#ifdef ASSERT
++  __ lwu(x10, access_flags);
++  __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method doesn't need synchronization", false);
++#endif // ASSERT
 +
-+      __ bind(notChar);
-+      __ mv(t1, ztos);
-+      __ bne(flags, t1, notBool);
-+      // ztos
-+      __ load_signed_byte(x10, field);
-+      __ push(ztos);
-+      __ j(Done);
++  // get synchronization object
++  {
++    Label done;
++    __ lwu(x10, access_flags);
++    __ andi(t0, x10, JVM_ACC_STATIC);
++    // get receiver (assume this is frequent case)
++    __ ld(x10, Address(xlocals, Interpreter::local_offset_in_bytes(0)));
++    __ beqz(t0, done);
++    __ load_mirror(x10, xmethod);
 +
-+      __ bind(notBool);
-+      break;
++#ifdef ASSERT
++    {
++      Label L;
++      __ bnez(x10, L);
++      __ stop("synchronization object is NULL");
++      __ bind(L);
 +    }
++#endif // ASSERT
 +
-+    case Bytecodes::_ldc2_w: {
-+      Label notLong, notDouble;
-+      __ mv(t1, ltos);
-+      __ bne(flags, t1, notLong);
-+      // ltos
-+      __ ld(x10, field);
-+      __ push(ltos);
-+      __ j(Done);
++    __ bind(done);
++  }
 +
-+      __ bind(notLong);
-+      __ mv(t1, dtos);
-+      __ bne(flags, t1, notDouble);
-+      // dtos
-+      __ load_double(field);
-+      __ push(dtos);
-+      __ j(Done);
++  // add space for monitor & lock
++  __ add(sp, sp, - entry_size); // add space for a monitor entry
++  __ add(esp, esp, - entry_size);
++  __ mv(t0, esp);
++  __ sd(t0, monitor_block_top);  // set new monitor block top
++  // store object
++  __ sd(x10, Address(esp, BasicObjectLock::obj_offset_in_bytes()));
++  __ mv(c_rarg1, esp); // object address
++  __ lock_object(c_rarg1);
++}
 +
-+      __ bind(notDouble);
-+      break;
-+    }
++// Generate a fixed interpreter frame. This is identical setup for
++// interpreted methods and for native methods hence the shared code.
++//
++// Args:
++//      ra: return address
++//      xmethod: Method*
++//      xlocals: pointer to locals
++//      xcpool: cp cache
++//      stack_pointer: previous sp
++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
++  // initialize fixed part of activation frame
++  if (native_call) {
++    __ add(esp, sp, - 14 * wordSize);
++    __ mv(xbcp, zr);
++    __ add(sp, sp, - 14 * wordSize);
++    // add 2 zero-initialized slots for native calls
++    __ sd(zr, Address(sp, 13 * wordSize));
++    __ sd(zr, Address(sp, 12 * wordSize));
++  } else {
++    __ add(esp, sp, - 12 * wordSize);
++    __ ld(t0, Address(xmethod, Method::const_offset()));     // get ConstMethod
++    __ add(xbcp, t0, in_bytes(ConstMethod::codes_offset())); // get codebase
++    __ add(sp, sp, - 12 * wordSize);
++  }
++  __ sd(xbcp, Address(sp, wordSize));
++  __ sd(esp, Address(sp, 0));
 +
-+    default:
-+      ShouldNotReachHere();
++  if (ProfileInterpreter) {
++    Label method_data_continue;
++    __ ld(t0, Address(xmethod, Method::method_data_offset()));
++    __ beqz(t0, method_data_continue);
++    __ la(t0, Address(t0, in_bytes(MethodData::data_offset())));
++    __ bind(method_data_continue);
 +  }
 +
-+  __ stop("bad ldc/condy");
-+}
++  __ sd(xmethod, Address(sp, 7 * wordSize));
++  __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize));
 +
-+void TemplateTable::locals_index(Register reg, int offset)
-+{
-+  __ lbu(reg, at_bcp(offset));
-+  __ neg(reg, reg);
-+}
++  // Get mirror and store it in the frame as GC root for this Method*
++  __ load_mirror(t2, xmethod);
++  __ sd(zr, Address(sp, 5 * wordSize));
++  __ sd(t2, Address(sp, 4 * wordSize));
 +
-+void TemplateTable::iload() {
-+  iload_internal();
-+}
++  __ ld(xcpool, Address(xmethod, Method::const_offset()));
++  __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset()));
++  __ ld(xcpool, Address(xcpool, ConstantPool::cache_offset_in_bytes()));
++  __ sd(xcpool, Address(sp, 3 * wordSize));
++  __ sd(xlocals, Address(sp, 2 * wordSize));
 +
-+void TemplateTable::nofast_iload() {
-+  iload_internal(may_not_rewrite);
-+}
++  __ sd(ra, Address(sp, 11 * wordSize));
++  __ sd(fp, Address(sp, 10 * wordSize));
++  __ la(fp, Address(sp, 12 * wordSize)); // include ra & fp
 +
-+void TemplateTable::iload_internal(RewriteControl rc) {
-+  transition(vtos, itos);
-+  if (RewriteFrequentPairs && rc == may_rewrite) {
-+    Label rewrite, done;
-+    const Register bc = x14;
++  // set sender sp
++  // leave last_sp as null
++  __ sd(x30, Address(sp, 9 * wordSize));
++  __ sd(zr, Address(sp, 8 * wordSize));
 +
-+    // get next bytecode
-+    __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
++  // Move SP out of the way
++  if (!native_call) {
++    __ ld(t0, Address(xmethod, Method::const_offset()));
++    __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
++    __ add(t0, t0, frame::interpreter_frame_monitor_size() + 2);
++    __ slli(t0, t0, 3);
++    __ sub(t0, sp, t0);
++    __ andi(sp, t0, -16);
++  }
++}
 +
-+    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
-+    // last two iloads in a pair.  Comparing against fast_iload means that
-+    // the next bytecode is neither an iload or a caload, and therefore
-+    // an iload pair.
-+    __ mv(t1, Bytecodes::_iload);
-+    __ beq(x11, t1, done);
++// End of helpers
 +
-+    // if _fast_iload rewrite to _fast_iload2
-+    __ mv(t1, Bytecodes::_fast_iload);
-+    __ mv(bc, Bytecodes::_fast_iload2);
-+    __ beq(x11, t1, rewrite);
++// Various method entries
++//------------------------------------------------------------------------------------------------------------------------
++//
++//
 +
-+    // if _caload rewrite to _fast_icaload
-+    __ mv(t1, Bytecodes::_caload);
-+    __ mv(bc, Bytecodes::_fast_icaload);
-+    __ beq(x11, t1, rewrite);
++// Method entry for java.lang.ref.Reference.get.
++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
++  // Code: _aload_0, _getfield, _areturn
++  // parameter size = 1
++  //
++  // The code that gets generated by this routine is split into 2 parts:
++  //    1. The "intrinsified" code for G1 (or any SATB based GC),
++  //    2. The slow path - which is an expansion of the regular method entry.
++  //
++  // Notes:-
++  // * In the G1 code we do not check whether we need to block for
++  //   a safepoint. If G1 is enabled then we must execute the specialized
++  //   code for Reference.get (except when the Reference object is null)
++  //   so that we can log the value in the referent field with an SATB
++  //   update buffer.
++  //   If the code for the getfield template is modified so that the
++  //   G1 pre-barrier code is executed when the current method is
++  //   Reference.get() then going through the normal method entry
++  //   will be fine.
++  // * The G1 code can, however, check the receiver object (the instance
++  //   of java.lang.Reference) and jump to the slow path if null. If the
++  //   Reference object is null then we obviously cannot fetch the referent
++  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
++  //   regular method entry code to generate the NPE.
++  //
++  // This code is based on generate_accessor_entry.
++  //
++  // xmethod: Method*
++  // x30: senderSP must preserve for slow path, set SP to it on fast path
 +
-+    // else rewrite to _fast_iload
-+    __ mv(bc, Bytecodes::_fast_iload);
++  // ra is live.  It must be saved around calls.
 +
-+    // rewrite
-+    // bc: new bytecode
-+    __ bind(rewrite);
-+    patch_bytecode(Bytecodes::_iload, bc, x11, false);
-+    __ bind(done);
++  address entry = __ pc();
 +
-+  }
++  const int referent_offset = java_lang_ref_Reference::referent_offset();
++  guarantee(referent_offset > 0, "referent offset not initialized");
 +
-+  // do iload, get the local value into tos
-+  locals_index(x11);
-+  __ lw(x10, iaddress(x11, x10, _masm));
-+}
++  Label slow_path;
++  const Register local_0 = c_rarg0;
++  // Check if local 0 != NULL
++  // If the receiver is null then it is OK to jump to the slow path.
++  __ ld(local_0, Address(esp, 0));
++  __ beqz(local_0, slow_path);
 +
-+void TemplateTable::fast_iload2()
-+{
-+  transition(vtos, itos);
-+  locals_index(x11);
-+  __ lw(x10, iaddress(x11, x10, _masm));
-+  __ push(itos);
-+  locals_index(x11, 3);
-+  __ lw(x10, iaddress(x11, x10, _masm));
-+}
++  __ mv(x9, x30);   // Move senderSP to a callee-saved register
 +
-+void TemplateTable::fast_iload()
-+{
-+  transition(vtos, itos);
-+  locals_index(x11);
-+  __ lw(x10, iaddress(x11, x10, _masm));
-+}
++  // Load the value of the referent field.
++  const Address field_address(local_0, referent_offset);
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ t1, /*tmp2*/ t0);
 +
-+void TemplateTable::lload()
-+{
-+  transition(vtos, ltos);
-+  __ lbu(x11, at_bcp(1));
-+  __ slli(x11, x11, LogBytesPerWord);
-+  __ sub(x11, xlocals, x11);
-+  __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1)));
-+}
++  // areturn
++  __ andi(sp, x9, -16);  // done with stack
++  __ ret();
 +
-+void TemplateTable::fload()
-+{
-+  transition(vtos, ftos);
-+  locals_index(x11);
-+  __ flw(f10, faddress(x11, t0, _masm));
++  // generate a vanilla interpreter entry as the slow path
++  __ bind(slow_path);
++  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
++  return entry;
 +}
 +
-+void TemplateTable::dload()
-+{
-+  transition(vtos, dtos);
-+  __ lbu(x11, at_bcp(1));
-+  __ slli(x11, x11, LogBytesPerWord);
-+  __ sub(x11, xlocals, x11);
-+  __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1)));
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.update(int crc, int b)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
++  // TODO: Unimplemented generate_CRC32_update_entry
++  return 0;
 +}
 +
-+void TemplateTable::aload()
-+{
-+  transition(vtos, atos);
-+  locals_index(x11);
-+  __ ld(x10, iaddress(x11, x10, _masm));
-+
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
++ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  // TODO: Unimplemented generate_CRC32_updateBytes_entry
++  return 0;
 +}
 +
-+void TemplateTable::locals_index_wide(Register reg) {
-+  __ lhu(reg, at_bcp(2));
-+  __ revb_h_h_u(reg, reg); // reverse bytes in half-word and zero-extend
-+  __ neg(reg, reg);
++/**
++ * Method entry for intrinsic-candidate (non-native) methods:
++ *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
++ *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
++ * Unlike CRC32, CRC32C does not have any methods marked as native
++ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses
++ */
++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  // TODO: Unimplemented generate_CRC32C_updateBytes_entry
++  return 0;
 +}
 +
-+void TemplateTable::wide_iload() {
-+  transition(vtos, itos);
-+  locals_index_wide(x11);
-+  __ lw(x10, iaddress(x11, t0, _masm));
-+}
++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
++  // See more discussion in stackOverflow.hpp.
 +
-+void TemplateTable::wide_lload()
-+{
-+  transition(vtos, ltos);
-+  __ lhu(x11, at_bcp(2));
-+  __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
-+  __ slli(x11, x11, LogBytesPerWord);
-+  __ sub(x11, xlocals, x11);
-+  __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1)));
-+}
++  const int shadow_zone_size = checked_cast<int>(StackOverflow::stack_shadow_zone_size());
++  const int page_size = os::vm_page_size();
++  const int n_shadow_pages = shadow_zone_size / page_size;
 +
-+void TemplateTable::wide_fload()
-+{
-+  transition(vtos, ftos);
-+  locals_index_wide(x11);
-+  __ flw(f10, faddress(x11, t0, _masm));
-+}
++#ifdef ASSERT
++  Label L_good_limit;
++  __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit()));
++  __ bnez(t0, L_good_limit);
++  __ stop("shadow zone safe limit is not initialized");
++  __ bind(L_good_limit);
++
++  Label L_good_watermark;
++  __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
++  __ bnez(t0, L_good_watermark);
++  __ stop("shadow zone growth watermark is not initialized");
++  __ bind(L_good_watermark);
++#endif
 +
-+void TemplateTable::wide_dload()
-+{
-+  transition(vtos, dtos);
-+  __ lhu(x11, at_bcp(2));
-+  __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
-+  __ slli(x11, x11, LogBytesPerWord);
-+  __ sub(x11, xlocals, x11);
-+  __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1)));
-+}
++  Label L_done;
 +
-+void TemplateTable::wide_aload()
-+{
-+  transition(vtos, atos);
-+  locals_index_wide(x11);
-+  __ ld(x10, aaddress(x11, t0, _masm));
-+}
++  __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
++  __ bgtu(sp, t0, L_done);
 +
-+void TemplateTable::index_check(Register array, Register index)
-+{
-+  // destroys x11, t0
-+  // check array
-+  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
-+  // sign extend index for use by indexed load
-+  // check index
-+  const Register length = t0;
-+  __ lwu(length, Address(array, arrayOopDesc::length_offset_in_bytes()));
-+  if (index != x11) {
-+    assert(x11 != array, "different registers");
-+    __ mv(x11, index);
++  for (int p = 1; p <= n_shadow_pages; p++) {
++    __ bang_stack_with_offset(p * page_size);
 +  }
-+  Label ok;
-+  __ addw(index, index, zr);
-+  __ bltu(index, length, ok);
-+  __ mv(x13, array);
-+  __ mv(t0, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
-+  __ jr(t0);
-+  __ bind(ok);
-+}
-+
-+void TemplateTable::iaload()
-+{
-+  transition(itos, itos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
-+  __ shadd(t0, x11, x10, t0, 2);
-+  __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
-+  __ addw(x10, x10, zr); // signed extended
-+}
 +
-+void TemplateTable::laload()
-+{
-+  transition(itos, ltos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
-+  __ shadd(t0, x11, x10, t0, 3);
-+  __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
-+}
++  // Record the new watermark, but only if the update is above the safe limit.
++  // Otherwise, the next time around the check above would pass the safe limit.
++  __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit()));
++  __ bleu(sp, t0, L_done);
++  __ sd(sp, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
 +
-+void TemplateTable::faload()
-+{
-+  transition(itos, ftos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
-+  __ shadd(t0, x11, x10, t0, 2);
-+  __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
++  __ bind(L_done);
 +}
 +
-+void TemplateTable::daload()
-+{
-+  transition(itos, dtos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
-+  __ shadd(t0, x11, x10, t0, 3);
-+  __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
-+}
++// Interpreter stub for calling a native method. (asm interpreter)
++// This sets up a somewhat different looking stack for calling the
++// native method than the typical interpreter frame setup.
++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
 +
-+void TemplateTable::aaload()
-+{
-+  transition(itos, atos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
-+  __ shadd(t0, x11, x10, t0, LogBytesPerHeapOop);
-+  do_oop_load(_masm,
-+              Address(t0),
-+              x10,
-+              IS_ARRAY);
-+}
++  // x11: Method*
++  // x30: sender sp
 +
-+void TemplateTable::baload()
-+{
-+  transition(itos, itos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
-+  __ shadd(t0, x11, x10, t0, 0);
-+  __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
-+}
++  address entry_point = __ pc();
 +
-+void TemplateTable::caload()
-+{
-+ transition(itos, itos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
-+  __ shadd(t0, x11, x10, t0, 1);
-+  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
-+}
++  const Address constMethod       (xmethod, Method::const_offset());
++  const Address access_flags      (xmethod, Method::access_flags_offset());
++  const Address size_of_parameters(x12, ConstMethod::
++                                   size_of_parameters_offset());
 +
-+// iload followed by caload frequent pair
-+void TemplateTable::fast_icaload()
-+{
-+  transition(vtos, itos);
-+  // load index out of locals
-+  locals_index(x12);
-+  __ lw(x11, iaddress(x12, x11, _masm));
-+  __ pop_ptr(x10);
++  // get parameter size (always needed)
++  __ ld(x12, constMethod);
++  __ load_unsigned_short(x12, size_of_parameters);
 +
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11, kills t0
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11
-+  __ shadd(t0, x11, x10, t0, 1);
-+  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
-+}
++  // Native calls don't need the stack size check since they have no
++  // expression stack and the arguments are already on the stack and
++  // we only add a handful of words to the stack.
 +
-+void TemplateTable::saload()
-+{
-+  transition(itos, itos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11, kills t0
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1);
-+  __ shadd(t0, x11, x10, t0, 1);
-+  __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
-+}
++  // xmethod: Method*
++  // x12: size of parameters
++  // x30: sender sp
 +
-+void TemplateTable::iload(int n)
-+{
-+  transition(vtos, itos);
-+  __ lw(x10, iaddress(n));
-+}
++  // for natives the size of locals is zero
 +
-+void TemplateTable::lload(int n)
-+{
-+  transition(vtos, ltos);
-+  __ ld(x10, laddress(n));
-+}
++  // compute beginning of parameters (xlocals)
++  __ shadd(xlocals, x12, esp, xlocals, 3);
++  __ addi(xlocals, xlocals, -wordSize);
 +
-+void TemplateTable::fload(int n)
-+{
-+  transition(vtos, ftos);
-+  __ flw(f10, faddress(n));
-+}
++  // Pull SP back to minimum size: this avoids holes in the stack
++  __ andi(sp, esp, -16);
 +
-+void TemplateTable::dload(int n)
-+{
-+  transition(vtos, dtos);
-+  __ fld(f10, daddress(n));
-+}
++  // initialize fixed part of activation frame
++  generate_fixed_frame(true);
 +
-+void TemplateTable::aload(int n)
-+{
-+  transition(vtos, atos);
-+  __ ld(x10, iaddress(n));
-+}
++  // make sure method is native & not abstract
++#ifdef ASSERT
++  __ lwu(x10, access_flags);
++  __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute non-native method as native", false);
++  __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter");
++#endif
 +
-+void TemplateTable::aload_0() {
-+  aload_0_internal();
-+}
++  // Since at this point in the method invocation the exception
++  // handler would try to exit the monitor of synchronized methods
++  // which hasn't been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation
++  // will check this flag.
 +
-+void TemplateTable::nofast_aload_0() {
-+  aload_0_internal(may_not_rewrite);
-+}
++  const Address do_not_unlock_if_synchronized(xthread,
++                                              in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  __ mv(t1, true);
++  __ sb(t1, do_not_unlock_if_synchronized);
 +
-+void TemplateTable::aload_0_internal(RewriteControl rc) {
-+  // According to bytecode histograms, the pairs:
-+  //
-+  // _aload_0, _fast_igetfield
-+  // _aload_0, _fast_agetfield
-+  // _aload_0, _fast_fgetfield
-+  //
-+  // occur frequently. If RewriteFrequentPairs is set, the (slow)
-+  // _aload_0 bytecode checks if the next bytecode is either
-+  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
-+  // rewrites the current bytecode into a pair bytecode; otherwise it
-+  // rewrites the current bytecode into _fast_aload_0 that doesn't do
-+  // the pair check anymore.
-+  //
-+  // Note: If the next bytecode is _getfield, the rewrite must be
-+  //       delayed, otherwise we may miss an opportunity for a pair.
-+  //
-+  // Also rewrite frequent pairs
-+  //   aload_0, aload_1
-+  //   aload_0, iload_1
-+  // These bytecodes with a small amount of code are most profitable
-+  // to rewrite
-+  if (RewriteFrequentPairs && rc == may_rewrite) {
-+    Label rewrite, done;
-+    const Register bc = x14;
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow);
++  }
 +
-+    // get next bytecode
-+    __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
 +
-+    // if _getfield then wait with rewrite
-+    __ mv(t1, Bytecodes::Bytecodes::_getfield);
-+    __ beq(x11, t1, done);
++  bang_stack_shadow_pages(true);
 +
-+    // if _igetfield then rewrite to _fast_iaccess_0
-+    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
-+    __ mv(t1, Bytecodes::_fast_igetfield);
-+    __ mv(bc, Bytecodes::_fast_iaccess_0);
-+    __ beq(x11, t1, rewrite);
++  // reset the _do_not_unlock_if_synchronized flag
++  __ sb(zr, do_not_unlock_if_synchronized);
 +
-+    // if _agetfield then rewrite to _fast_aaccess_0
-+    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
-+    __ mv(t1, Bytecodes::_fast_agetfield);
-+    __ mv(bc, Bytecodes::_fast_aaccess_0);
-+    __ beq(x11, t1, rewrite);
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  if (synchronized) {
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    __ lwu(x10, access_flags);
++    __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization");
++#endif
++  }
 +
-+    // if _fgetfield then rewrite to _fast_faccess_0
-+    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
-+    __ mv(t1, Bytecodes::_fast_fgetfield);
-+    __ mv(bc, Bytecodes::_fast_faccess_0);
-+    __ beq(x11, t1, rewrite);
++  // start execution
++#ifdef ASSERT
++  __ verify_frame_setup();
++#endif
 +
-+    // else rewrite to _fast_aload0
-+    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
-+    __ mv(bc, Bytecodes::Bytecodes::_fast_aload_0);
++  // jvmti support
++  __ notify_method_entry();
 +
-+    // rewrite
-+    // bc: new bytecode
-+    __ bind(rewrite);
-+    patch_bytecode(Bytecodes::_aload_0, bc, x11, false);
++  // work registers
++  const Register t = x18;
++  const Register result_handler = x19;
 +
-+    __ bind(done);
-+  }
++  // allocate space for parameters
++  __ ld(t, Address(xmethod, Method::const_offset()));
++  __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset()));
 +
-+  // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
-+  aload(0);
-+}
++  __ slli(t, t, Interpreter::logStackElementSize);
++  __ sub(x30, esp, t);
++  __ andi(sp, x30, -16);
++  __ mv(esp, x30);
 +
-+void TemplateTable::istore()
-+{
-+  transition(itos, vtos);
-+  locals_index(x11);
-+  __ sw(x10, iaddress(x11, t0, _masm));
-+}
++  // get signature handler
++  {
++    Label L;
++    __ ld(t, Address(xmethod, Method::signature_handler_offset()));
++    __ bnez(t, L);
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::prepare_native_call),
++               xmethod);
++    __ ld(t, Address(xmethod, Method::signature_handler_offset()));
++    __ bind(L);
++  }
 +
-+void TemplateTable::lstore()
-+{
-+  transition(ltos, vtos);
-+  locals_index(x11);
-+  __ sd(x10, laddress(x11, t0, _masm));
-+}
++  // call signature handler
++  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == xlocals,
++         "adjust this code");
++  assert(InterpreterRuntime::SignatureHandlerGenerator::to() == sp,
++         "adjust this code");
++  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t0,
++          "adjust this code");
 +
-+void TemplateTable::fstore() {
-+  transition(ftos, vtos);
-+  locals_index(x11);
-+  __ fsw(f10, iaddress(x11, t0, _masm));
-+}
++  // The generated handlers do not touch xmethod (the method).
++  // However, large signatures cannot be cached and are generated
++  // each time here.  The slow-path generator can do a GC on return,
++  // so we must reload it after the call.
++  __ jalr(t);
++  __ get_method(xmethod);        // slow path can do a GC, reload xmethod
 +
-+void TemplateTable::dstore() {
-+  transition(dtos, vtos);
-+  locals_index(x11);
-+  __ fsd(f10, daddress(x11, t0, _masm));
-+}
 +
-+void TemplateTable::astore()
-+{
-+  transition(vtos, vtos);
-+  __ pop_ptr(x10);
-+  locals_index(x11);
-+  __ sd(x10, aaddress(x11, t0, _masm));
-+}
++  // result handler is in x10
++  // set result handler
++  __ mv(result_handler, x10);
++  // pass mirror handle if static call
++  {
++    Label L;
++    __ lwu(t, Address(xmethod, Method::access_flags_offset()));
++    __ andi(t0, t, JVM_ACC_STATIC);
++    __ beqz(t0, L);
++    // get mirror
++    __ load_mirror(t, xmethod);
++    // copy mirror into activation frame
++    __ sd(t, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
++    // pass handle to mirror
++    __ addi(c_rarg1, fp, frame::interpreter_frame_oop_temp_offset * wordSize);
++    __ bind(L);
++  }
 +
-+void TemplateTable::wide_istore() {
-+  transition(vtos, vtos);
-+  __ pop_i();
-+  locals_index_wide(x11);
-+  __ sw(x10, iaddress(x11, t0, _masm));
-+}
++  // get native function entry point in x28
++  {
++    Label L;
++    __ ld(x28, Address(xmethod, Method::native_function_offset()));
++    address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
++    __ mv(t1, unsatisfied);
++    __ ld(t1, t1);
++    __ bne(x28, t1, L);
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::prepare_native_call),
++               xmethod);
++    __ get_method(xmethod);
++    __ ld(x28, Address(xmethod, Method::native_function_offset()));
++    __ bind(L);
++  }
 +
-+void TemplateTable::wide_lstore() {
-+  transition(vtos, vtos);
-+  __ pop_l();
-+  locals_index_wide(x11);
-+  __ sd(x10, laddress(x11, t0, _masm));
-+}
++  // pass JNIEnv
++  __ add(c_rarg0, xthread, in_bytes(JavaThread::jni_environment_offset()));
 +
-+void TemplateTable::wide_fstore() {
-+  transition(vtos, vtos);
-+  __ pop_f();
-+  locals_index_wide(x11);
-+  __ fsw(f10, faddress(x11, t0, _masm));
-+}
++  // It is enough that the pc() points into the right code
++  // segment. It does not have to be the correct return pc.
++  Label native_return;
++  __ set_last_Java_frame(esp, fp, native_return, x30);
 +
-+void TemplateTable::wide_dstore() {
-+  transition(vtos, vtos);
-+  __ pop_d();
-+  locals_index_wide(x11);
-+  __ fsd(f10, daddress(x11, t0, _masm));
-+}
++  // change thread state
++#ifdef ASSERT
++  {
++    Label L;
++    __ lwu(t, Address(xthread, JavaThread::thread_state_offset()));
++    __ addi(t0, zr, (u1)_thread_in_Java);
++    __ beq(t, t0, L);
++    __ stop("Wrong thread state in native stub");
++    __ bind(L);
++  }
++#endif
 +
-+void TemplateTable::wide_astore() {
-+  transition(vtos, vtos);
-+  __ pop_ptr(x10);
-+  locals_index_wide(x11);
-+  __ sd(x10, aaddress(x11, t0, _masm));
-+}
++  // Change state to native
++  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
++  __ mv(t0, _thread_in_native);
++  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++  __ sw(t0, Address(t1));
 +
-+void TemplateTable::iastore() {
-+  transition(itos, vtos);
-+  __ pop_i(x11);
-+  __ pop_ptr(x13);
-+  // x10: value
-+  // x11: index
-+  // x13: array
-+  index_check(x13, x11); // prefer index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
-+  __ shadd(t0, x11, x13, t0, 2);
-+  __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg);
-+}
++  // Call the native method.
++  __ jalr(x28);
++  __ bind(native_return);
++  __ get_method(xmethod);
++  // result potentially in x10 or f10
 +
-+void TemplateTable::lastore() {
-+  transition(ltos, vtos);
-+  __ pop_i(x11);
-+  __ pop_ptr(x13);
-+  // x10: value
-+  // x11: index
-+  // x13: array
-+  index_check(x13, x11); // prefer index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
-+  __ shadd(t0, x11, x13, t0, 3);
-+  __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg);
-+}
++  // make room for the pushes we're about to do
++  __ sub(t0, esp, 4 * wordSize);
++  __ andi(sp, t0, -16);
 +
-+void TemplateTable::fastore() {
-+  transition(ftos, vtos);
-+  __ pop_i(x11);
-+  __ pop_ptr(x13);
-+  // f10: value
-+  // x11:  index
-+  // x13:  array
-+  index_check(x13, x11); // prefer index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
-+  __ shadd(t0, x11, x13, t0, 2);
-+  __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg, noreg);
-+}
++  // NOTE: The order of these pushes is known to frame::interpreter_frame_result
++  // in order to extract the result of a method call. If the order of these
++  // pushes change or anything else is added to the stack then the code in
++  // interpreter_frame_result must also change.
++  __ push(dtos);
++  __ push(ltos);
 +
-+void TemplateTable::dastore() {
-+  transition(dtos, vtos);
-+  __ pop_i(x11);
-+  __ pop_ptr(x13);
-+  // f10: value
-+  // x11:  index
-+  // x13:  array
-+  index_check(x13, x11); // prefer index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
-+  __ shadd(t0, x11, x13, t0, 3);
-+  __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg, noreg);
-+}
++  // change thread state
++  // Force all preceding writes to be observed prior to thread state change
++  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
 +
-+void TemplateTable::aastore() {
-+  Label is_null, ok_is_subtype, done;
-+  transition(vtos, vtos);
-+  // stack: ..., array, index, value
-+  __ ld(x10, at_tos());    // value
-+  __ ld(x12, at_tos_p1()); // index
-+  __ ld(x13, at_tos_p2()); // array
++  __ mv(t0, _thread_in_native_trans);
++  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
 +
-+  index_check(x13, x12);     // kills x11
-+  __ add(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
-+  __ shadd(x14, x14, x13, x14, LogBytesPerHeapOop);
++  // Force this write out before the read below
++  __ membar(MacroAssembler::AnyAny);
 +
-+  Address element_address(x14, 0);
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    Label L, Continue;
 +
-+  // do array store check - check for NULL value first
-+  __ beqz(x10, is_null);
++    // We need an acquire here to ensure that any subsequent load of the
++    // global SafepointSynchronize::_state flag is ordered after this load
++    // of the thread-local polling word. We don't want this poll to
++    // return false (i.e. not safepointing) and a later poll of the global
++    // SafepointSynchronize::_state spuriously to return true.
++    //
++    // This is to avoid a race when we're in a native->Java transition
++    // racing the code which wakes up from a safepoint.
++    __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */);
++    __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset()));
++    __ beqz(t1, Continue);
++    __ bind(L);
 +
-+  // Move subklass into x11
-+  __ load_klass(x11, x10);
-+  // Move superklass into x10
-+  __ load_klass(x10, x13);
-+  __ ld(x10, Address(x10,
-+                     ObjArrayKlass::element_klass_offset()));
-+  // Compress array + index * oopSize + 12 into a single register.  Frees x12.
++    // Don't use call_VM as it will see a possible pending exception
++    // and forward it and never return here preventing us from
++    // clearing _last_native_pc down below. So we do a runtime call by
++    // hand.
++    //
++    __ mv(c_rarg0, xthread);
++    __ mv(t1, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
++    __ jalr(t1);
++    __ get_method(xmethod);
++    __ reinit_heapbase();
++    __ bind(Continue);
++  }
 +
-+  // Generate subtype check.  Blows x12, x15
-+  // Superklass in x10.  Subklass in x11.
-+  __ gen_subtype_check(x11, ok_is_subtype); //todo
++  // change thread state
++  // Force all preceding writes to be observed prior to thread state change
++  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
 +
-+  // Come here on failure
-+  // object is at TOS
-+  __ j(Interpreter::_throw_ArrayStoreException_entry);
++  __ mv(t0, _thread_in_Java);
++  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
 +
-+  // Come here on success
-+  __ bind(ok_is_subtype);
++  // reset_last_Java_frame
++  __ reset_last_Java_frame(true);
 +
-+  // Get the value we will store
-+  __ ld(x10, at_tos());
-+  // Now store using the appropriate barrier
-+  do_oop_store(_masm, element_address, x10, IS_ARRAY);
-+  __ j(done);
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
++  }
 +
-+  // Have a NULL in x10, x13=array, x12=index.  Store NULL at ary[idx]
-+  __ bind(is_null);
-+  __ profile_null_seen(x12);
++  // reset handle block
++  __ ld(t, Address(xthread, JavaThread::active_handles_offset()));
++  __ sd(zr, Address(t, JNIHandleBlock::top_offset_in_bytes()));
 +
-+  // Store a NULL
-+  do_oop_store(_masm, element_address, noreg, IS_ARRAY);
++  // If result is an oop unbox and store it in frame where gc will see it
++  // and result handler will pick it up
 +
-+  // Pop stack arguments
-+  __ bind(done);
-+  __ add(esp, esp, 3 * Interpreter::stackElementSize);
++  {
++    Label no_oop;
++    __ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
++    __ bne(t, result_handler, no_oop);
++    // Unbox oop result, e.g. JNIHandles::resolve result.
++    __ pop(ltos);
++    __ resolve_jobject(x10, xthread, t);
++    __ sd(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
++    // keep stack depth as expected by pushing oop which will eventually be discarded
++    __ push(ltos);
++    __ bind(no_oop);
++  }
 +
-+}
++  {
++    Label no_reguard;
++    __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset())));
++    __ addi(t1, zr, (u1)StackOverflow::stack_guard_yellow_reserved_disabled);
++    __ bne(t0, t1, no_reguard);
 +
-+void TemplateTable::bastore()
-+{
-+  transition(itos, vtos);
-+  __ pop_i(x11);
-+  __ pop_ptr(x13);
-+  // x10: value
-+  // x11: index
-+  // x13: array
-+  index_check(x13, x11); // prefer index in x11
++    __ pusha(); // only save smashed registers
++    __ mv(c_rarg0, xthread);
++    __ mv(t1, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
++    __ jalr(t1);
++    __ popa(); // only restore smashed registers
++    __ bind(no_reguard);
++  }
 +
-+  // Need to check whether array is boolean or byte
-+  // since both types share the bastore bytecode.
-+  __ load_klass(x12, x13);
-+  __ lwu(x12, Address(x12, Klass::layout_helper_offset()));
-+  Label L_skip;
-+  __ andi(t0, x12, Klass::layout_helper_boolean_diffbit());
-+  __ beqz(t0, L_skip);
-+  __ andi(x10, x10, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
-+  __ bind(L_skip);
++  // The method register is junk from after the thread_in_native transition
++  // until here.  Also can't call_VM until the bcp has been
++  // restored.  Need bcp for throwing exception below so get it now.
++  __ get_method(xmethod);
 +
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
++  // restore bcp to have legal interpreter frame, i.e., bci == 0 <=>
++  // xbcp == code_base()
++  __ ld(xbcp, Address(xmethod, Method::const_offset()));   // get ConstMethod*
++  __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));          // get codebase
++  // handle exceptions (exception handling will handle unlocking!)
++  {
++    Label L;
++    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++    __ beqz(t0, L);
++    // Note: At some point we may want to unify this with the code
++    // used in call_VM_base(); i.e., we should use the
++    // StubRoutines::forward_exception code. For now this doesn't work
++    // here because the sp is not correctly set at this point.
++    __ MacroAssembler::call_VM(noreg,
++                               CAST_FROM_FN_PTR(address,
++                               InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
 +
-+  __ add(x11, x13, x11);
-+  __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg, noreg);
-+}
++  // do unlocking if necessary
++  {
++    Label L;
++    __ lwu(t, Address(xmethod, Method::access_flags_offset()));
++    __ andi(t0, t, JVM_ACC_SYNCHRONIZED);
++    __ beqz(t0, L);
++    // the code below should be shared with interpreter macro
++    // assembler implementation
++    {
++      Label unlock;
++      // BasicObjectLock will be first in list, since this is a
++      // synchronized method. However, need to check that the object
++      // has not been unlocked by an explicit monitorexit bytecode.
 +
-+void TemplateTable::castore()
-+{
-+  transition(itos, vtos);
-+  __ pop_i(x11);
-+  __ pop_ptr(x13);
-+  // x10: value
-+  // x11: index
-+  // x13: array
-+  index_check(x13, x11); // prefer index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
-+  __ shadd(t0, x11, x13, t0, 1);
-+  __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg);
-+}
++      // monitor expect in c_rarg1 for slow unlock path
++      __ la(c_rarg1, Address(fp,   // address of first monitor
++                             (intptr_t)(frame::interpreter_frame_initial_sp_offset *
++                                        wordSize - sizeof(BasicObjectLock))));
 +
-+void TemplateTable::sastore()
-+{
-+  castore();
-+}
++      __ ld(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
++      __ bnez(t, unlock);
 +
-+void TemplateTable::istore(int n)
-+{
-+  transition(itos, vtos);
-+  __ sd(x10, iaddress(n));
-+}
++      // Entry already unlocked, need to throw exception
++      __ MacroAssembler::call_VM(noreg,
++                                 CAST_FROM_FN_PTR(address,
++                                                  InterpreterRuntime::throw_illegal_monitor_state_exception));
++      __ should_not_reach_here();
 +
-+void TemplateTable::lstore(int n)
-+{
-+  transition(ltos, vtos);
-+  __ sd(x10, laddress(n));
-+}
++      __ bind(unlock);
++      __ unlock_object(c_rarg1);
++    }
++    __ bind(L);
++  }
 +
-+void TemplateTable::fstore(int n)
-+{
-+  transition(ftos, vtos);
-+  __ fsw(f10, faddress(n));
-+}
++  // jvmti support
++  // Note: This must happen _after_ handling/throwing any exceptions since
++  //       the exception handler code notifies the runtime of method exits
++  //       too. If this happens before, method entry/exit notifications are
++  //       not properly paired (was bug - gri 11/22/99).
++  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
 +
-+void TemplateTable::dstore(int n)
-+{
-+  transition(dtos, vtos);
-+  __ fsd(f10, daddress(n));
-+}
++  __ pop(ltos);
++  __ pop(dtos);
 +
-+void TemplateTable::astore(int n)
-+{
-+  transition(vtos, vtos);
-+  __ pop_ptr(x10);
-+  __ sd(x10, iaddress(n));
-+}
++  __ jalr(result_handler);
 +
-+void TemplateTable::pop()
-+{
-+  transition(vtos, vtos);
-+  __ addi(esp, esp, Interpreter::stackElementSize);
-+}
++  // remove activation
++  __ ld(esp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
++  // remove frame anchor
++  __ leave();
 +
-+void TemplateTable::pop2()
-+{
-+  transition(vtos, vtos);
-+  __ addi(esp, esp, 2 * Interpreter::stackElementSize);
-+}
++  // restore sender sp
++  __ mv(sp, esp);
 +
-+void TemplateTable::dup()
-+{
-+  transition(vtos, vtos);
-+  __ ld(x10, Address(esp, 0));
-+  __ push_reg(x10);
-+  // stack: ..., a, a
-+}
++  __ ret();
 +
-+void TemplateTable::dup_x1()
-+{
-+  transition(vtos, vtos);
-+  // stack: ..., a, b
-+  __ ld(x10, at_tos());  // load b
-+  __ ld(x12, at_tos_p1());  // load a
-+  __ sd(x10, at_tos_p1());  // store b
-+  __ sd(x12, at_tos());  // store a
-+  __ push_reg(x10);                  // push b
-+  // stack: ..., b, a, b
-+}
++  if (inc_counter) {
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++  }
 +
-+void TemplateTable::dup_x2()
-+{
-+  transition(vtos, vtos);
-+  // stack: ..., a, b, c
-+  __ ld(x10, at_tos());  // load c
-+  __ ld(x12, at_tos_p2());  // load a
-+  __ sd(x10, at_tos_p2());  // store c in a
-+  __ push_reg(x10);      // push c
-+  // stack: ..., c, b, c, c
-+  __ ld(x10, at_tos_p2());  // load b
-+  __ sd(x12, at_tos_p2());  // store a in b
-+  // stack: ..., c, a, c, c
-+  __ sd(x10, at_tos_p1());  // store b in c
-+  // stack: ..., c, a, b, c
++  return entry_point;
 +}
 +
-+void TemplateTable::dup2()
-+{
-+  transition(vtos, vtos);
-+  // stack: ..., a, b
-+  __ ld(x10, at_tos_p1());  // load a
-+  __ push_reg(x10);                  // push a
-+  __ ld(x10, at_tos_p1());  // load b
-+  __ push_reg(x10);                  // push b
-+  // stack: ..., a, b, a, b
-+}
++//
++// Generic interpreted method entry to (asm) interpreter
++//
++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
 +
-+void TemplateTable::dup2_x1()
-+{
-+  transition(vtos, vtos);
-+  // stack: ..., a, b, c
-+  __ ld(x12, at_tos());     // load c
-+  __ ld(x10, at_tos_p1());  // load b
-+  __ push_reg(x10);             // push b
-+  __ push_reg(x12);             // push c
-+  // stack: ..., a, b, c, b, c
-+  __ sd(x12, at_tos_p3());  // store c in b
-+  // stack: ..., a, c, c, b, c
-+  __ ld(x12, at_tos_p4());  // load a
-+  __ sd(x12, at_tos_p2());  // store a in 2nd c
-+  // stack: ..., a, c, a, b, c
-+  __ sd(x10, at_tos_p4());  // store b in a
-+  // stack: ..., b, c, a, b, c
-+}
++  // determine code generation flags
++  const bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
 +
-+void TemplateTable::dup2_x2()
-+{
-+  transition(vtos, vtos);
-+  // stack: ..., a, b, c, d
-+  __ ld(x12, at_tos());     // load d
-+  __ ld(x10, at_tos_p1());  // load c
-+  __ push_reg(x10);             // push c
-+  __ push_reg(x12);             // push d
-+  // stack: ..., a, b, c, d, c, d
-+  __ ld(x10, at_tos_p4());  // load b
-+  __ sd(x10, at_tos_p2());  // store b in d
-+  __ sd(x12, at_tos_p4());  // store d in b
-+  // stack: ..., a, d, c, b, c, d
-+  __ ld(x12, at_tos_p5());  // load a
-+  __ ld(x10, at_tos_p3());  // load c
-+  __ sd(x12, at_tos_p3());  // store a in c
-+  __ sd(x10, at_tos_p5());  // store c in a
-+  // stack: ..., c, d, a, b, c, d
-+}
++  // t0: sender sp
++  address entry_point = __ pc();
 +
-+void TemplateTable::swap()
-+{
-+  transition(vtos, vtos);
-+  // stack: ..., a, b
-+  __ ld(x12, at_tos_p1());  // load a
-+  __ ld(x10, at_tos());     // load b
-+  __ sd(x12, at_tos());     // store a in b
-+  __ sd(x10, at_tos_p1());  // store b in a
-+  // stack: ..., b, a
-+}
++  const Address constMethod(xmethod, Method::const_offset());
++  const Address access_flags(xmethod, Method::access_flags_offset());
++  const Address size_of_parameters(x13,
++                                   ConstMethod::size_of_parameters_offset());
++  const Address size_of_locals(x13, ConstMethod::size_of_locals_offset());
 +
-+void TemplateTable::iop2(Operation op)
-+{
-+  transition(itos, itos);
-+  // x10 <== x11 op x10
-+  __ pop_i(x11);
-+  switch (op) {
-+    case add  : __ addw(x10, x11, x10);  break;
-+    case sub  : __ subw(x10, x11, x10);  break;
-+    case mul  : __ mulw(x10, x11, x10);  break;
-+    case _and : __ andrw(x10, x11, x10); break;
-+    case _or  : __ orrw(x10, x11, x10);  break;
-+    case _xor : __ xorrw(x10, x11, x10); break;
-+    case shl  : __ sllw(x10, x11, x10);  break;
-+    case shr  : __ sraw(x10, x11, x10);  break;
-+    case ushr : __ srlw(x10, x11, x10);  break;
-+    default   : ShouldNotReachHere();
-+  }
-+}
++  // get parameter size (always needed)
++  // need to load the const method first
++  __ ld(x13, constMethod);
++  __ load_unsigned_short(x12, size_of_parameters);
 +
-+void TemplateTable::lop2(Operation op)
-+{
-+  transition(ltos, ltos);
-+  // x10 <== x11 op x10
-+  __ pop_l(x11);
-+  switch (op) {
-+    case add  : __ add(x10, x11, x10);  break;
-+    case sub  : __ sub(x10, x11, x10);  break;
-+    case mul  : __ mul(x10, x11, x10);  break;
-+    case _and : __ andr(x10, x11, x10); break;
-+    case _or  : __ orr(x10, x11, x10);  break;
-+    case _xor : __ xorr(x10, x11, x10); break;
-+    default   : ShouldNotReachHere();
++  // x12: size of parameters
++
++  __ load_unsigned_short(x13, size_of_locals); // get size of locals in words
++  __ sub(x13, x13, x12); // x13 = no. of additional locals
++
++  // see if we've got enough room on the stack for locals plus overhead.
++  generate_stack_overflow_check();
++
++  // compute beginning of parameters (xlocals)
++  __ shadd(xlocals, x12, esp, t1, 3);
++  __ add(xlocals, xlocals, -wordSize);
++
++  // Make room for additional locals
++  __ slli(t1, x13, 3);
++  __ sub(t0, esp, t1);
++
++  // Padding between locals and fixed part of activation frame to ensure
++  // SP is always 16-byte aligned.
++  __ andi(sp, t0, -16);
++
++  // x13 - # of additional locals
++  // allocate space for locals
++  // explicitly initialize locals
++  {
++    Label exit, loop;
++    __ blez(x13, exit); // do nothing if x13 <= 0
++    __ bind(loop);
++    __ sd(zr, Address(t0));
++    __ add(t0, t0, wordSize);
++    __ add(x13, x13, -1); // until everything initialized
++    __ bnez(x13, loop);
++    __ bind(exit);
 +  }
-+}
 +
-+void TemplateTable::idiv()
-+{
-+  transition(itos, itos);
-+  // explicitly check for div0
-+  Label no_div0;
-+  __ bnez(x10, no_div0);
-+  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
-+  __ jr(t0);
-+  __ bind(no_div0);
-+  __ pop_i(x11);
-+  // x10 <== x11 idiv x10
-+  __ corrected_idivl(x10, x11, x10, /* want_remainder */ false);
-+}
++  // And the base dispatch table
++  __ get_dispatch();
 +
-+void TemplateTable::irem()
-+{
-+  transition(itos, itos);
-+  // explicitly check for div0
-+  Label no_div0;
-+  __ bnez(x10, no_div0);
-+  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
-+  __ jr(t0);
-+  __ bind(no_div0);
-+  __ pop_i(x11);
-+  // x10 <== x11 irem x10
-+  __ corrected_idivl(x10, x11, x10, /* want_remainder */ true);
-+}
++  // initialize fixed part of activation frame
++  generate_fixed_frame(false);
 +
-+void TemplateTable::lmul()
-+{
-+  transition(ltos, ltos);
-+  __ pop_l(x11);
-+  __ mul(x10, x10, x11);
-+}
++  // make sure method is not native & not abstract
++#ifdef ASSERT
++  __ lwu(x10, access_flags);
++  __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute native method as non-native");
++  __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter");
++#endif
 +
-+void TemplateTable::ldiv()
-+{
-+  transition(ltos, ltos);
-+  // explicitly check for div0
-+  Label no_div0;
-+  __ bnez(x10, no_div0);
-+  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
-+  __ jr(t0);
-+  __ bind(no_div0);
-+  __ pop_l(x11);
-+  // x10 <== x11 ldiv x10
-+  __ corrected_idivq(x10, x11, x10, /* want_remainder */ false);
-+}
++  // Since at this point in the method invocation the exception
++  // handler would try to exit the monitor of synchronized methods
++  // which hasn't been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation
++  // will check this flag.
 +
-+void TemplateTable::lrem()
-+{
-+  transition(ltos, ltos);
-+  // explicitly check for div0
-+  Label no_div0;
-+  __ bnez(x10, no_div0);
-+  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
-+  __ jr(t0);
-+  __ bind(no_div0);
-+  __ pop_l(x11);
-+  // x10 <== x11 lrem x10
-+  __ corrected_idivq(x10, x11, x10, /* want_remainder */ true);
-+}
++  const Address do_not_unlock_if_synchronized(xthread,
++                                              in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  __ mv(t1, true);
++  __ sb(t1, do_not_unlock_if_synchronized);
 +
-+void TemplateTable::lshl()
-+{
-+  transition(itos, ltos);
-+  // shift count is in x10
-+  __ pop_l(x11);
-+  __ sll(x10, x11, x10);
-+}
++  Label no_mdp;
++  const Register mdp = x13;
++  __ ld(mdp, Address(xmethod, Method::method_data_offset()));
++  __ beqz(mdp, no_mdp);
++  __ add(mdp, mdp, in_bytes(MethodData::data_offset()));
++  __ profile_parameters_type(mdp, x11, x12, x14); // use x11, x12, x14 as tmp registers
++  __ bind(no_mdp);
 +
-+void TemplateTable::lshr()
-+{
-+  transition(itos, ltos);
-+  // shift count is in x10
-+  __ pop_l(x11);
-+  __ sra(x10, x11, x10);
-+}
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow);
++  }
 +
-+void TemplateTable::lushr()
-+{
-+  transition(itos, ltos);
-+  // shift count is in x10
-+  __ pop_l(x11);
-+  __ srl(x10, x11, x10);
-+}
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
 +
-+void TemplateTable::fop2(Operation op)
-+{
-+  transition(ftos, ftos);
-+  switch (op) {
-+    case add:
-+      __ pop_f(f11);
-+      __ fadd_s(f10, f11, f10);
-+      break;
-+    case sub:
-+      __ pop_f(f11);
-+      __ fsub_s(f10, f11, f10);
-+      break;
-+    case mul:
-+      __ pop_f(f11);
-+      __ fmul_s(f10, f11, f10);
-+      break;
-+    case div:
-+      __ pop_f(f11);
-+      __ fdiv_s(f10, f11, f10);
-+      break;
-+    case rem:
-+      __ fmv_s(f11, f10);
-+      __ pop_f(f10);
-+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem));
-+      break;
-+    default:
-+      ShouldNotReachHere();
++  bang_stack_shadow_pages(false);
++
++  // reset the _do_not_unlock_if_synchronized flag
++  __ sb(zr, do_not_unlock_if_synchronized);
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  if (synchronized) {
++    // Allocate monitor and lock method
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    __ lwu(x10, access_flags);
++    __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization");
++#endif
 +  }
-+}
 +
-+void TemplateTable::dop2(Operation op)
-+{
-+  transition(dtos, dtos);
-+  switch (op) {
-+    case add:
-+      __ pop_d(f11);
-+      __ fadd_d(f10, f11, f10);
-+      break;
-+    case sub:
-+      __ pop_d(f11);
-+      __ fsub_d(f10, f11, f10);
-+      break;
-+    case mul:
-+      __ pop_d(f11);
-+      __ fmul_d(f10, f11, f10);
-+      break;
-+    case div:
-+      __ pop_d(f11);
-+      __ fdiv_d(f10, f11, f10);
-+      break;
-+    case rem:
-+      __ fmv_d(f11, f10);
-+      __ pop_d(f10);
-+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem));
-+      break;
-+    default:
-+      ShouldNotReachHere();
++  // start execution
++#ifdef ASSERT
++  __ verify_frame_setup();
++#endif
++
++  // jvmti support
++  __ notify_method_entry();
++
++  __ dispatch_next(vtos);
++
++  // invocation counter overflow
++  if (inc_counter) {
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
 +  }
-+}
 +
-+void TemplateTable::ineg()
-+{
-+  transition(itos, itos);
-+  __ negw(x10, x10);
++  return entry_point;
 +}
 +
-+void TemplateTable::lneg()
-+{
-+  transition(ltos, ltos);
-+  __ neg(x10, x10);
-+}
++//-----------------------------------------------------------------------------
++// Exceptions
 +
-+void TemplateTable::fneg()
-+{
-+  transition(ftos, ftos);
-+  __ fneg_s(f10, f10);
-+}
++void TemplateInterpreterGenerator::generate_throw_exception() {
++  // Entry point in previous activation (i.e., if the caller was
++  // interpreted)
++  Interpreter::_rethrow_exception_entry = __ pc();
++  // Restore sp to interpreter_frame_last_sp even though we are going
++  // to empty the expression stack for the exception processing.
++  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  // x10: exception
++  // x13: return address/pc that threw exception
++  __ restore_bcp();    // xbcp points to call/send
++  __ restore_locals();
++  __ restore_constant_pool_cache();
++  __ reinit_heapbase();  // restore xheapbase as heapbase.
++  __ get_dispatch();
 +
-+void TemplateTable::dneg()
-+{
-+  transition(dtos, dtos);
-+  __ fneg_d(f10, f10);
-+}
++  // Entry point for exceptions thrown within interpreter code
++  Interpreter::_throw_exception_entry = __ pc();
++  // If we came here via a NullPointerException on the receiver of a
++  // method, xthread may be corrupt.
++  __ get_method(xmethod);
++  // expression stack is undefined here
++  // x10: exception
++  // xbcp: exception bcp
++  __ verify_oop(x10);
++  __ mv(c_rarg1, x10);
 +
-+void TemplateTable::iinc()
-+{
-+  transition(vtos, vtos);
-+  __ load_signed_byte(x11, at_bcp(2)); // get constant
-+  locals_index(x12);
-+  __ ld(x10, iaddress(x12, x10, _masm));
-+  __ addw(x10, x10, x11);
-+  __ sd(x10, iaddress(x12, t0, _masm));
-+}
++  // expression stack must be empty before entering the VM in case of
++  // an exception
++  __ empty_expression_stack();
++  // find exception handler address and preserve exception oop
++  __ call_VM(x13,
++             CAST_FROM_FN_PTR(address,
++                          InterpreterRuntime::exception_handler_for_exception),
++             c_rarg1);
 +
-+void TemplateTable::wide_iinc()
-+{
-+  transition(vtos, vtos);
-+  __ lwu(x11, at_bcp(2)); // get constant and index
-+  __ revb_h_w_u(x11, x11); // reverse bytes in half-word (32bit) and zero-extend
-+  __ zero_extend(x12, x11, 16);
-+  __ neg(x12, x12);
-+  __ slli(x11, x11, 32);
-+  __ srai(x11, x11, 48);
-+  __ ld(x10, iaddress(x12, t0, _masm));
-+  __ addw(x10, x10, x11);
-+  __ sd(x10, iaddress(x12, t0, _masm));
-+}
++  // Calculate stack limit
++  __ ld(t0, Address(xmethod, Method::const_offset()));
++  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
++  __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4);
++  __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
++  __ slli(t0, t0, 3);
++  __ sub(t0, t1, t0);
++  __ andi(sp, t0, -16);
 +
-+void TemplateTable::convert()
-+{
-+  // Checking
-+#ifdef ASSERT
-+  {
-+    TosState tos_in  = ilgl;
-+    TosState tos_out = ilgl;
-+    switch (bytecode()) {
-+      case Bytecodes::_i2l: // fall through
-+      case Bytecodes::_i2f: // fall through
-+      case Bytecodes::_i2d: // fall through
-+      case Bytecodes::_i2b: // fall through
-+      case Bytecodes::_i2c: // fall through
-+      case Bytecodes::_i2s: tos_in = itos; break;
-+      case Bytecodes::_l2i: // fall through
-+      case Bytecodes::_l2f: // fall through
-+      case Bytecodes::_l2d: tos_in = ltos; break;
-+      case Bytecodes::_f2i: // fall through
-+      case Bytecodes::_f2l: // fall through
-+      case Bytecodes::_f2d: tos_in = ftos; break;
-+      case Bytecodes::_d2i: // fall through
-+      case Bytecodes::_d2l: // fall through
-+      case Bytecodes::_d2f: tos_in = dtos; break;
-+      default             : ShouldNotReachHere();
-+    }
-+    switch (bytecode()) {
-+      case Bytecodes::_l2i: // fall through
-+      case Bytecodes::_f2i: // fall through
-+      case Bytecodes::_d2i: // fall through
-+      case Bytecodes::_i2b: // fall through
-+      case Bytecodes::_i2c: // fall through
-+      case Bytecodes::_i2s: tos_out = itos; break;
-+      case Bytecodes::_i2l: // fall through
-+      case Bytecodes::_f2l: // fall through
-+      case Bytecodes::_d2l: tos_out = ltos; break;
-+      case Bytecodes::_i2f: // fall through
-+      case Bytecodes::_l2f: // fall through
-+      case Bytecodes::_d2f: tos_out = ftos; break;
-+      case Bytecodes::_i2d: // fall through
-+      case Bytecodes::_l2d: // fall through
-+      case Bytecodes::_f2d: tos_out = dtos; break;
-+      default             : ShouldNotReachHere();
-+    }
-+    transition(tos_in, tos_out);
-+  }
-+#endif // ASSERT
++  // x10: exception handler entry point
++  // x13: preserved exception oop
++  // xbcp: bcp for exception handler
++  __ push_ptr(x13); // push exception which is now the only value on the stack
++  __ jr(x10); // jump to exception handler (may be _remove_activation_entry!)
 +
-+  // Conversion
-+  switch (bytecode()) {
-+    case Bytecodes::_i2l:
-+      __ sign_extend(x10, x10, 32);
-+      break;
-+    case Bytecodes::_i2f:
-+      __ fcvt_s_w(f10, x10);
-+      break;
-+    case Bytecodes::_i2d:
-+      __ fcvt_d_w(f10, x10);
-+      break;
-+    case Bytecodes::_i2b:
-+      __ sign_extend(x10, x10, 8);
-+      break;
-+    case Bytecodes::_i2c:
-+      __ zero_extend(x10, x10, 16);
-+      break;
-+    case Bytecodes::_i2s:
-+      __ sign_extend(x10, x10, 16);
-+      break;
-+    case Bytecodes::_l2i:
-+      __ addw(x10, x10, zr);
-+      break;
-+    case Bytecodes::_l2f:
-+      __ fcvt_s_l(f10, x10);
-+      break;
-+    case Bytecodes::_l2d:
-+      __ fcvt_d_l(f10, x10);
-+      break;
-+    case Bytecodes::_f2i:
-+      __ fcvt_w_s_safe(x10, f10);
-+      break;
-+    case Bytecodes::_f2l:
-+      __ fcvt_l_s_safe(x10, f10);
-+      break;
-+    case Bytecodes::_f2d:
-+      __ fcvt_d_s(f10, f10);
-+      break;
-+    case Bytecodes::_d2i:
-+      __ fcvt_w_d_safe(x10, f10);
-+      break;
-+    case Bytecodes::_d2l:
-+      __ fcvt_l_d_safe(x10, f10);
-+      break;
-+    case Bytecodes::_d2f:
-+      __ fcvt_s_d(f10, f10);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+}
++  // If the exception is not handled in the current frame the frame is
++  // removed and the exception is rethrown (i.e. exception
++  // continuation is _rethrow_exception).
++  //
++  // Note: At this point the bci is still the bxi for the instruction
++  // which caused the exception and the expression stack is
++  // empty. Thus, for any VM calls at this point, GC will find a legal
++  // oop map (with empty expression stack).
 +
-+void TemplateTable::lcmp()
-+{
-+  transition(ltos, itos);
-+  __ pop_l(x11);
-+  __ cmp_l2i(t0, x11, x10);
-+  __ mv(x10, t0);
-+}
++  //
++  // JVMTI PopFrame support
++  //
 +
-+void TemplateTable::float_cmp(bool is_float, int unordered_result)
-+{
-+  // For instruction feq, flt and fle, the result is 0 if either operand is NaN
-+  if (is_float) {
-+    __ pop_f(f11);
-+    // if unordered_result < 0:
-+    //   we want -1 for unordered or less than, 0 for equal and 1 for
-+    //   greater than.
-+    // else:
-+    //   we want -1 for less than, 0 for equal and 1 for unordered or
-+    //   greater than.
-+    // f11 primary, f10 secondary
-+    __ float_compare(x10, f11, f10, unordered_result);
-+  } else {
-+    __ pop_d(f11);
-+    // if unordered_result < 0:
-+    //   we want -1 for unordered or less than, 0 for equal and 1 for
-+    //   greater than.
-+    // else:
-+    //   we want -1 for less than, 0 for equal and 1 for unordered or
-+    //   greater than.
-+    // f11 primary, f10 secondary
-+    __ double_compare(x10, f11, f10, unordered_result);
-+  }
-+}
++  Interpreter::_remove_activation_preserving_args_entry = __ pc();
++  __ empty_expression_stack();
++  // Set the popframe_processing bit in pending_popframe_condition
++  // indicating that we are currently handling popframe, so that
++  // call_VMs that may happen later do not trigger new popframe
++  // handling cycles.
++  __ lwu(x13, Address(xthread, JavaThread::popframe_condition_offset()));
++  __ ori(x13, x13, JavaThread::popframe_processing_bit);
++  __ sw(x13, Address(xthread, JavaThread::popframe_condition_offset()));
 +
-+void TemplateTable::branch(bool is_jsr, bool is_wide)
-+{
-+  // We might be moving to a safepoint.  The thread which calls
-+  // Interpreter::notice_safepoints() will effectively flush its cache
-+  // when it makes a system call, but we need to do something to
-+  // ensure that we see the changed dispatch table.
-+  __ membar(MacroAssembler::LoadLoad);
++  {
++    // Check to see whether we are returning to a deoptimized frame.
++    // (The PopFrame call ensures that the caller of the popped frame is
++    // either interpreted or compiled and deoptimizes it if compiled.)
++    // In this case, we can't call dispatch_next() after the frame is
++    // popped, but instead must save the incoming arguments and restore
++    // them after deoptimization has occurred.
++    //
++    // Note that we don't compare the return PC against the
++    // deoptimization blob's unpack entry because of the presence of
++    // adapter frames in C2.
++    Label caller_not_deoptimized;
++    __ ld(c_rarg1, Address(fp, frame::return_addr_offset * wordSize));
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), c_rarg1);
++    __ bnez(x10, caller_not_deoptimized);
 +
-+  __ profile_taken_branch(x10, x11);
-+  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
-+                             InvocationCounter::counter_offset();
-+  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
-+                              InvocationCounter::counter_offset();
++    // Compute size of arguments for saving when returning to
++    // deoptimized caller
++    __ get_method(x10);
++    __ ld(x10, Address(x10, Method::const_offset()));
++    __ load_unsigned_short(x10, Address(x10, in_bytes(ConstMethod::
++                                                      size_of_parameters_offset())));
++    __ slli(x10, x10, Interpreter::logStackElementSize);
++    __ restore_locals();
++    __ sub(xlocals, xlocals, x10);
++    __ add(xlocals, xlocals, wordSize);
++    // Save these arguments
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
++                                           Deoptimization::
++                                           popframe_preserve_args),
++                          xthread, x10, xlocals);
 +
-+  // load branch displacement
-+  if (!is_wide) {
-+    __ lhu(x12, at_bcp(1));
-+    __ revb_h_h(x12, x12); // reverse bytes in half-word and sign-extend
-+  } else {
-+    __ lwu(x12, at_bcp(1));
-+    __ revb_w_w(x12, x12); // reverse bytes in word and sign-extend
-+  }
++    __ remove_activation(vtos,
++                         /* throw_monitor_exception */ false,
++                         /* install_monitor_exception */ false,
++                         /* notify_jvmdi */ false);
 +
-+  // Handle all the JSR stuff here, then exit.
-+  // It's much shorter and cleaner than intermingling with the non-JSR
-+  // normal-branch stuff occurring below.
++    // Inform deoptimization that it is responsible for restoring
++    // these arguments
++    __ mv(t0, JavaThread::popframe_force_deopt_reexecution_bit);
++    __ sw(t0, Address(xthread, JavaThread::popframe_condition_offset()));
 +
-+  if (is_jsr) {
-+    // compute return address as bci
-+    __ ld(t1, Address(xmethod, Method::const_offset()));
-+    __ add(t1, t1,
-+           in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3));
-+    __ sub(x11, xbcp, t1);
-+    __ push_i(x11);
-+    // Adjust the bcp by the 16-bit displacement in x12
-+    __ add(xbcp, xbcp, x12);
-+    __ load_unsigned_byte(t0, Address(xbcp, 0));
-+    // load the next target bytecode into t0, it is the argument of dispatch_only
-+    __ dispatch_only(vtos, /*generate_poll*/true);
-+    return;
++    // Continue in deoptimization handler
++    __ ret();
++
++    __ bind(caller_not_deoptimized);
 +  }
 +
-+  // Normal (non-jsr) branch handling
++  __ remove_activation(vtos,
++                       /* throw_monitor_exception */ false,
++                       /* install_monitor_exception */ false,
++                       /* notify_jvmdi */ false);
 +
-+  // Adjust the bcp by the displacement in x12
-+  __ add(xbcp, xbcp, x12);
++  // Restore the last_sp and null it out
++  __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
 +
-+  assert(UseLoopCounter || !UseOnStackReplacement,
-+         "on-stack-replacement requires loop counters");
-+  Label backedge_counter_overflow;
-+  Label profile_method;
-+  Label dispatch;
-+  if (UseLoopCounter) {
-+    // increment backedge counter for backward branches
-+    // x10: MDO
-+    // x11: MDO bumped taken-count
-+    // x12: target offset
-+    __ bgtz(x12, dispatch); // count only if backward branch
++  __ restore_bcp();
++  __ restore_locals();
++  __ restore_constant_pool_cache();
++  __ get_method(xmethod);
++  __ get_dispatch();
 +
-+    // check if MethodCounters exists
-+    Label has_counters;
-+    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
-+    __ bnez(t0, has_counters);
-+    __ push_reg(x10);
-+    __ push_reg(x11);
-+    __ push_reg(x12);
-+    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
-+            InterpreterRuntime::build_method_counters), xmethod);
-+    __ pop_reg(x12);
-+    __ pop_reg(x11);
-+    __ pop_reg(x10);
-+    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
-+    __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory
-+    __ bind(has_counters);
++  // The method data pointer was incremented already during
++  // call profiling. We have to restore the mdp for the current bcp.
++  if (ProfileInterpreter) {
++    __ set_method_data_pointer_for_bcp();
++  }
 +
-+    if (TieredCompilation) {
-+      Label no_mdo;
-+      int increment = InvocationCounter::count_increment;
-+      if (ProfileInterpreter) {
-+        // Are we profiling?
-+        __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
-+        __ beqz(x11, no_mdo);
-+        // Increment the MDO backedge counter
-+        const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
-+                                           in_bytes(InvocationCounter::counter_offset()));
-+        const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
-+        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
-+                                   x10, t0, false,
-+                                   UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
-+        __ j(dispatch);
-+      }
-+      __ bind(no_mdo);
-+      // Increment backedge counter in MethodCounters*
-+      __ ld(t0, Address(xmethod, Method::method_counters_offset()));
-+      const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset()));
-+      __ increment_mask_and_jump(Address(t0, be_offset), increment, mask,
-+                                 x10, t1, false,
-+                                 UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
-+    } else { // not TieredCompilation
-+      // increment counter
-+      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
-+      __ lwu(x10, Address(t1, be_offset));        // load backedge counter
-+      __ addw(t0, x10, InvocationCounter::count_increment); // increment counter
-+      __ sw(t0, Address(t1, be_offset));        // store counter
++  // Clear the popframe condition flag
++  __ sw(zr, Address(xthread, JavaThread::popframe_condition_offset()));
++  assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive");
 +
-+      __ lwu(x10, Address(t1, inv_offset));    // load invocation counter
-+      __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits
-+      __ addw(x10, x10, t0);        // add both counters
++#if INCLUDE_JVMTI
++  {
++    Label L_done;
 +
-+      if (ProfileInterpreter) {
-+        // Test to see if we should create a method data oop
-+        __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
-+        __ blt(x10, t0, dispatch);
++    __ lbu(t0, Address(xbcp, 0));
++    __ li(t1, Bytecodes::_invokestatic);
++    __ bne(t1, t0, L_done);
 +
-+        // if no method data exists, go to profile method
-+        __ test_method_data_pointer(x10, profile_method);
++    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
++    // Detect such a case in the InterpreterRuntime function and return the member name argument,or NULL.
 +
-+        if (UseOnStackReplacement) {
-+          // check for overflow against x11 which is the MDO taken count
-+          __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
-+          __ bltu(x11, t0, dispatch); // Intel == Assembler::below, lo:unsigned lower
++    __ ld(c_rarg0, Address(xlocals, 0));
++    __ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null),c_rarg0, xmethod, xbcp);
 +
-+          // When ProfileInterpreter is on, the backedge_count comes
-+          // from the MethodData*, which value does not get reset on
-+          // the call to frequency_counter_overflow().  To avoid
-+          // excessive calls to the overflow routine while the method is
-+          // being compiled, add a second test to make sure the overflow
-+          // function is called only once every overflow_frequency.
-+          const int overflow_frequency = 1024;
-+          __ andi(x11, x11, overflow_frequency - 1);
-+          __ beqz(x11, backedge_counter_overflow);
++    __ beqz(x10, L_done);
 +
-+        }
-+      } else {
-+        if (UseOnStackReplacement) {
-+          // check for overflow against x10, which is the sum of the
-+          // counters
-+          __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
-+          __ bgeu(x10, t0, backedge_counter_overflow); // Intel == Assembler::aboveEqual
-+        }
-+      }
-+    }
-+    __ bind(dispatch);
++    __ sd(x10, Address(esp, 0));
++    __ bind(L_done);
 +  }
++#endif // INCLUDE_JVMTI
 +
-+  // Pre-load the next target bytecode into t0
-+  __ load_unsigned_byte(t0, Address(xbcp, 0));
++  // Restore machine SP
++  __ ld(t0, Address(xmethod, Method::const_offset()));
++  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
++  __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4);
++  __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
++  __ slliw(t0, t0, 3);
++  __ sub(t0, t1, t0);
++  __ andi(sp, t0, -16);
 +
-+  // continue with the bytecode @ target
-+  // t0: target bytecode
-+  // xbcp: target bcp
-+  __ dispatch_only(vtos, /*generate_poll*/true);
++  __ dispatch_next(vtos);
++  // end of PopFrame support
 +
-+  if (UseLoopCounter) {
-+    if (ProfileInterpreter && !TieredCompilation) {
-+      // Out-of-line code to allocate method data oop.
-+      __ bind(profile_method);
-+      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
-+      __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
-+      __ set_method_data_pointer_for_bcp();
-+      __ j(dispatch);
-+    }
++  Interpreter::_remove_activation_entry = __ pc();
 +
-+    if (UseOnStackReplacement) {
-+      // invocation counter overflow
-+      __ bind(backedge_counter_overflow);
-+      __ neg(x12, x12);
-+      __ add(x12, x12, xbcp);     // branch xbcp
-+      // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
-+      __ call_VM(noreg,
-+                 CAST_FROM_FN_PTR(address,
-+                                  InterpreterRuntime::frequency_counter_overflow),
-+                 x12);
-+      __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
++  // preserve exception over this code sequence
++  __ pop_ptr(x10);
++  __ sd(x10, Address(xthread, JavaThread::vm_result_offset()));
++  // remove the activation (without doing throws on illegalMonitorExceptions)
++  __ remove_activation(vtos, false, true, false);
++  // restore exception
++  __ get_vm_result(x10, xthread);
 +
-+      // x10: osr nmethod (osr ok) or NULL (osr not possible)
-+      // w11: target bytecode
-+      // x12: temporary
-+      __ beqz(x10, dispatch);     // test result -- no osr if null
-+      // nmethod may have been invalidated (VM may block upon call_VM return)
-+      __ lbu(x12, Address(x10, nmethod::state_offset()));
-+      if (nmethod::in_use != 0) {
-+        __ sub(x12, x12, nmethod::in_use);
-+      }
-+      __ bnez(x12, dispatch);
++  // In between activations - previous activation type unknown yet
++  // compute continuation point - the continuation point expects the
++  // following registers set up:
++  //
++  // x10: exception
++  // ra: return address/pc that threw exception
++  // sp: expression stack of caller
++  // fp: fp of caller
++  // FIXME: There's no point saving ra here because VM calls don't trash it
++  __ sub(sp, sp, 2 * wordSize);
++  __ sd(x10, Address(sp, 0));                   // save exception
++  __ sd(ra, Address(sp, wordSize));             // save return address
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
++                                         SharedRuntime::exception_handler_for_return_address),
++                        xthread, ra);
++  __ mv(x11, x10);                              // save exception handler
++  __ ld(x10, Address(sp, 0));                   // restore exception
++  __ ld(ra, Address(sp, wordSize));             // restore return address
++  __ add(sp, sp, 2 * wordSize);
++  // We might be returning to a deopt handler that expects x13 to
++  // contain the exception pc
++  __ mv(x13, ra);
++  // Note that an "issuing PC" is actually the next PC after the call
++  __ jr(x11);                                   // jump to exception
++                                                // handler of caller
++}
 +
-+      // We have the address of an on stack replacement routine in x10
-+      // We need to prepare to execute the OSR method. First we must
-+      // migrate the locals and monitors off of the stack.
++//
++// JVMTI ForceEarlyReturn support
++//
++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state)  {
++  address entry = __ pc();
 +
-+      __ mv(x9, x10);                             // save the nmethod
++  __ restore_bcp();
++  __ restore_locals();
++  __ empty_expression_stack();
++  __ load_earlyret_value(state);
 +
-+      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
++  __ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
++  Address cond_addr(t0, JvmtiThreadState::earlyret_state_offset());
 +
-+      // x10 is OSR buffer, move it to expected parameter location
-+      __ mv(j_rarg0, x10);
++  // Clear the earlyret state
++  assert(JvmtiThreadState::earlyret_inactive == 0, "should be");
++  __ sd(zr, cond_addr);
 +
-+      // remove activation
-+      // get sender esp
-+      __ ld(esp,
-+          Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
-+      // remove frame anchor
-+      __ leave();
-+      // Ensure compiled code always sees stack at proper alignment
-+      __ andi(sp, esp, -16);
++  __ remove_activation(state,
++                       false, /* throw_monitor_exception */
++                       false, /* install_monitor_exception */
++                       true); /* notify_jvmdi */
++  __ ret();
 +
-+      // and begin the OSR nmethod
-+      __ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
-+      __ jr(t0);
-+    }
-+  }
++  return entry;
++}
++// end of ForceEarlyReturn support
++
++//-----------------------------------------------------------------------------
++// Helper for vtos entry point generation
 +
++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
++                                                         address& bep,
++                                                         address& cep,
++                                                         address& sep,
++                                                         address& aep,
++                                                         address& iep,
++                                                         address& lep,
++                                                         address& fep,
++                                                         address& dep,
++                                                         address& vep) {
++  assert(t != NULL && t->is_valid() && t->tos_in() == vtos, "illegal template");
++  Label L;
++  aep = __ pc();  __ push_ptr();  __ j(L);
++  fep = __ pc();  __ push_f();    __ j(L);
++  dep = __ pc();  __ push_d();    __ j(L);
++  lep = __ pc();  __ push_l();    __ j(L);
++  bep = cep = sep =
++  iep = __ pc();  __ push_i();
++  vep = __ pc();
++  __ bind(L);
++  generate_and_dispatch(t);
 +}
 +
-+void TemplateTable::if_0cmp(Condition cc)
-+{
-+  transition(itos, vtos);
-+  // assume branch is more often taken than not (loops use backward branches)
-+  Label not_taken;
++//-----------------------------------------------------------------------------
 +
-+  __ addw(x10, x10, zr);
-+  switch (cc) {
-+    case equal:
-+      __ bnez(x10, not_taken);
-+      break;
-+    case not_equal:
-+      __ beqz(x10, not_taken);
-+      break;
-+    case less:
-+      __ bgez(x10, not_taken);
-+      break;
-+    case less_equal:
-+      __ bgtz(x10, not_taken);
-+      break;
-+    case greater:
-+      __ blez(x10, not_taken);
-+      break;
-+    case greater_equal:
-+      __ bltz(x10, not_taken);
-+      break;
-+    default:
-+      break;
-+  }
-+
-+  branch(false, false);
-+  __ bind(not_taken);
-+  __ profile_not_taken_branch(x10);
-+}
++// Non-product code
++#ifndef PRODUCT
++address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
++  address entry = __ pc();
 +
-+void TemplateTable::if_icmp(Condition cc)
-+{
-+  transition(itos, vtos);
-+  // assume branch is more often taken than not (loops use backward branches)
-+  Label not_taken;
-+  __ pop_i(x11);
-+  __ addw(x10, x10, zr);
-+  switch (cc) {
-+    case equal:
-+      __ bne(x11, x10, not_taken);
-+      break;
-+    case not_equal:
-+      __ beq(x11, x10, not_taken);
-+      break;
-+    case less:
-+      __ bge(x11, x10, not_taken);
-+      break;
-+    case less_equal:
-+      __ bgt(x11, x10, not_taken);
-+      break;
-+    case greater:
-+      __ ble(x11, x10, not_taken);
-+      break;
-+    case greater_equal:
-+      __ blt(x11, x10, not_taken);
-+      break;
-+    default:
-+      break;
-+  }
++  __ push_reg(ra);
++  __ push(state);
++  __ push_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
++  __ mv(c_rarg2, x10);  // Pass itos
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), c_rarg1, c_rarg2, c_rarg3);
++  __ pop_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
++  __ pop(state);
++  __ pop_reg(ra);
++  __ ret();                                   // return from result handler
 +
-+  branch(false, false);
-+  __ bind(not_taken);
-+  __ profile_not_taken_branch(x10);
++  return entry;
 +}
 +
-+void TemplateTable::if_nullcmp(Condition cc)
-+{
-+  transition(atos, vtos);
-+  // assume branch is more often taken than not (loops use backward branches)
-+  Label not_taken;
-+  if (cc == equal) {
-+    __ bnez(x10, not_taken);
-+  } else {
-+    __ beqz(x10, not_taken);
-+  }
-+  branch(false, false);
-+  __ bind(not_taken);
-+  __ profile_not_taken_branch(x10);
++void TemplateInterpreterGenerator::count_bytecode() {
++  __ push_reg(t0);
++  __ push_reg(x10);
++  __ mv(x10, (address) &BytecodeCounter::_counter_value);
++  __ li(t0, 1);
++  __ amoadd_d(zr, x10, t0, Assembler::aqrl);
++  __ pop_reg(x10);
++  __ pop_reg(t0);
 +}
 +
-+void TemplateTable::if_acmp(Condition cc)
-+{
-+  transition(atos, vtos);
-+  // assume branch is more often taken than not (loops use backward branches)
-+  Label not_taken;
-+  __ pop_ptr(x11);
++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; }
 +
-+  if (cc == equal) {
-+    __ oop_nequal(x11, x10, not_taken);
-+  } else if (cc == not_equal) {
-+    __ oop_equal(x11, x10, not_taken);
-+  }
-+  branch(false, false);
-+  __ bind(not_taken);
-+  __ profile_not_taken_branch(x10);
-+}
++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; }
 +
-+void TemplateTable::ret() {
-+  transition(vtos, vtos);
-+  // We might be moving to a safepoint.  The thread which calls
-+  // Interpreter::notice_safepoints() will effectively flush its cache
-+  // when it makes a system call, but we need to do something to
-+  // ensure that we see the changed dispatch table.
-+  __ membar(MacroAssembler::LoadLoad);
++void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
++  // Call a little run-time stub to avoid blow-up for each bytecode.
++  // The run-time runtime saves the right registers, depending on
++  // the tosca in-state for the given template.
 +
-+  locals_index(x11);
-+  __ ld(x11, aaddress(x11, t1, _masm)); // get return bci, compute return bcp
-+  __ profile_ret(x11, x12);
-+  __ ld(xbcp, Address(xmethod, Method::const_offset()));
-+  __ add(xbcp, xbcp, x11);
-+  __ addi(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));
-+  __ dispatch_next(vtos, 0, /*generate_poll*/true);
++  assert(Interpreter::trace_code(t->tos_in()) != NULL, "entry must have been generated");
++  __ jal(Interpreter::trace_code(t->tos_in()));
++  __ reinit_heapbase();
 +}
 +
-+void TemplateTable::wide_ret() {
-+  transition(vtos, vtos);
-+  locals_index_wide(x11);
-+  __ ld(x11, aaddress(x11, t0, _masm)); // get return bci, compute return bcp
-+  __ profile_ret(x11, x12);
-+  __ ld(xbcp, Address(xmethod, Method::const_offset()));
-+  __ add(xbcp, xbcp, x11);
-+  __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));
-+  __ dispatch_next(vtos, 0, /*generate_poll*/true);
++void TemplateInterpreterGenerator::stop_interpreter_at() {
++  Label L;
++  __ push_reg(t0);
++  __ mv(t0, (address) &BytecodeCounter::_counter_value);
++  __ ld(t0, Address(t0));
++  __ mv(t1, StopInterpreterAt);
++  __ bne(t0, t1, L);
++  __ ebreak();
++  __ bind(L);
++  __ pop_reg(t0);
 +}
 +
-+void TemplateTable::tableswitch() {
-+  Label default_case, continue_execution;
-+  transition(itos, vtos);
-+  // align xbcp
-+  __ la(x11, at_bcp(BytesPerInt));
-+  __ andi(x11, x11, -BytesPerInt);
-+  // load lo & hi
-+  __ lwu(x12, Address(x11, BytesPerInt));
-+  __ lwu(x13, Address(x11, 2 * BytesPerInt));
-+  __ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend
-+  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
-+  // check against lo & hi
-+  __ blt(x10, x12, default_case);
-+  __ bgt(x10, x13, default_case);
-+  // lookup dispatch offset
-+  __ subw(x10, x10, x12);
-+  __ shadd(x13, x10, x11, t0, 2);
-+  __ lwu(x13, Address(x13, 3 * BytesPerInt));
-+  __ profile_switch_case(x10, x11, x12);
-+  // continue execution
-+  __ bind(continue_execution);
-+  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
-+  __ add(xbcp, xbcp, x13);
-+  __ load_unsigned_byte(t0, Address(xbcp));
-+  __ dispatch_only(vtos, /*generate_poll*/true);
-+  // handle default
-+  __ bind(default_case);
-+  __ profile_switch_default(x10);
-+  __ lwu(x13, Address(x11, 0));
-+  __ j(continue_execution);
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+new file mode 100644
+index 00000000000..d2a301c6e74
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+@@ -0,0 +1,3951 @@
++/*
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "gc/shared/tlab_globals.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateTable.hpp"
++#include "memory/universe.hpp"
++#include "oops/method.hpp"
++#include "oops/methodData.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "utilities/powerOfTwo.hpp"
++
++#define __ _masm->
++
++// Address computation: local variables
++
++static inline Address iaddress(int n) {
++  return Address(xlocals, Interpreter::local_offset_in_bytes(n));
 +}
 +
-+void TemplateTable::lookupswitch() {
-+  transition(itos, itos);
-+  __ stop("lookupswitch bytecode should have been rewritten");
++static inline Address laddress(int n) {
++  return iaddress(n + 1);
 +}
 +
-+void TemplateTable::fast_linearswitch() {
-+  transition(itos, vtos);
-+  Label loop_entry, loop, found, continue_execution;
-+  // bswap x10 so we can avoid bswapping the table entries
-+  __ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend
-+  // align xbcp
-+  __ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of
-+                                    // this instruction (change offsets
-+                                    // below)
-+  __ andi(x9, x9, -BytesPerInt);
-+  // set counter
-+  __ lwu(x11, Address(x9, BytesPerInt));
-+  __ revb_w(x11, x11);
-+  __ j(loop_entry);
-+  // table search
-+  __ bind(loop);
-+  __ shadd(t0, x11, x9, t0, 3);
-+  __ lw(t0, Address(t0, 2 * BytesPerInt));
-+  __ beq(x10, t0, found);
-+  __ bind(loop_entry);
-+  __ addi(x11, x11, -1);
-+  __ bgez(x11, loop);
-+  // default case
-+  __ profile_switch_default(x10);
-+  __ lwu(x13, Address(x9, 0));
-+  __ j(continue_execution);
-+  // entry found -> get offset
-+  __ bind(found);
-+  __ shadd(t0, x11, x9, t0, 3);
-+  __ lwu(x13, Address(t0, 3 * BytesPerInt));
-+  __ profile_switch_case(x11, x10, x9);
-+  // continue execution
-+  __ bind(continue_execution);
-+  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
-+  __ add(xbcp, xbcp, x13);
-+  __ lbu(t0, Address(xbcp, 0));
-+  __ dispatch_only(vtos, /*generate_poll*/true);
++static inline Address faddress(int n) {
++  return iaddress(n);
 +}
 +
-+void TemplateTable::fast_binaryswitch() {
-+  transition(itos, vtos);
-+  // Implementation using the following core algorithm:
-+  //
-+  // int binary_search(int key, LookupswitchPair* array, int n)
-+  //   binary_search start:
-+  //   #Binary search according to "Methodik des Programmierens" by
-+  //   # Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
-+  //   int i = 0;
-+  //   int j = n;
-+  //   while (i + 1 < j) do
-+  //     # invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
-+  //     # with      Q: for all i: 0 <= i < n: key < a[i]
-+  //     # where a stands for the array and assuming that the (inexisting)
-+  //     # element a[n] is infinitely big.
-+  //     int h = (i + j) >> 1
-+  //     # i < h < j
-+  //     if (key < array[h].fast_match())
-+  //     then [j = h]
-+  //     else [i = h]
-+  //   end
-+  //   # R: a[i] <= key < a[i+1] or Q
-+  //   # (i.e., if key is within array, i is the correct index)
-+  //   return i
-+  // binary_search end
++static inline Address daddress(int n) {
++  return laddress(n);
++}
 +
++static inline Address aaddress(int n) {
++  return iaddress(n);
++}
 +
-+  // Register allocation
-+  const Register key   = x10; // already set (tosca)
-+  const Register array = x11;
-+  const Register i     = x12;
-+  const Register j     = x13;
-+  const Register h     = x14;
-+  const Register temp  = x15;
++static inline Address iaddress(Register r,  Register temp, InterpreterMacroAssembler* _masm) {
++  assert_cond(_masm != NULL);
++  _masm->shadd(temp, r, xlocals, temp, 3);
++  return Address(temp, 0);
++}
 +
-+  // Find array start
-+  __ la(array, at_bcp(3 * BytesPerInt));  // btw: should be able to
-+                                          // get rid of this
-+                                          // instruction (change
-+                                          // offsets below)
-+  __ andi(array, array, -BytesPerInt);
++static inline Address laddress(Register r, Register temp,
++                               InterpreterMacroAssembler* _masm) {
++  assert_cond(_masm != NULL);
++  _masm->shadd(temp, r, xlocals, temp, 3);
++  return Address(temp, Interpreter::local_offset_in_bytes(1));;
++}
 +
-+  // Initialize i & j
-+  __ mv(i, zr);                            // i = 0
-+  __ lwu(j, Address(array, -BytesPerInt)); // j = length(array)
++static inline Address faddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
++  return iaddress(r, temp, _masm);
++}
 +
-+  // Convert j into native byteordering
-+  __ revb_w(j, j);
++static inline Address daddress(Register r, Register temp,
++                               InterpreterMacroAssembler* _masm) {
++  return laddress(r, temp, _masm);
++}
 +
-+  // And start
-+  Label entry;
-+  __ j(entry);
++static inline Address aaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
++  return iaddress(r, temp, _masm);
++}
 +
-+  // binary search loop
-+  {
-+    Label loop;
-+    __ bind(loop);
-+    __ addw(h, i, j);                           // h = i + j
-+    __ srliw(h, h, 1);                          // h = (i + j) >> 1
-+    // if [key < array[h].fast_match()]
-+    // then [j = h]
-+    // else [i = h]
-+    // Convert array[h].match to native byte-ordering before compare
-+    __ shadd(temp, h, array, temp, 3);
-+    __ ld(temp, Address(temp, 0));
-+    __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
++static inline Address at_rsp() {
++  return Address(esp, 0);
++}
 +
-+    Label L_done, L_greater;
-+    __ bge(key, temp, L_greater);
-+    // if [key < array[h].fast_match()] then j = h
-+    __ mv(j, h);
-+    __ j(L_done);
-+    __ bind(L_greater);
-+    // if [key >= array[h].fast_match()] then i = h
-+    __ mv(i, h);
-+    __ bind(L_done);
++// At top of Java expression stack which may be different than esp().  It
++// isn't for category 1 objects.
++static inline Address at_tos   () {
++  return Address(esp,  Interpreter::expr_offset_in_bytes(0));
++}
 +
-+    // while [i + 1 < j]
-+    __ bind(entry);
-+    __ addiw(h, i, 1);         // i + 1
-+    __ blt(h, j, loop);        // i + 1 < j
-+  }
++static inline Address at_tos_p1() {
++  return Address(esp,  Interpreter::expr_offset_in_bytes(1));
++}
 +
-+  // end of binary search, result index is i (must check again!)
-+  Label default_case;
-+  // Convert array[i].match to native byte-ordering before compare
-+  __ shadd(temp, i, array, temp, 3);
-+  __ ld(temp, Address(temp, 0));
-+  __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
-+  __ bne(key, temp, default_case);
++static inline Address at_tos_p2() {
++  return Address(esp,  Interpreter::expr_offset_in_bytes(2));
++}
 +
-+  // entry found -> j = offset
-+  __ shadd(temp, i, array, temp, 3);
-+  __ lwu(j, Address(temp, BytesPerInt));
-+  __ profile_switch_case(i, key, array);
-+  __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
++static inline Address at_tos_p3() {
++  return Address(esp,  Interpreter::expr_offset_in_bytes(3));
++}
 +
-+  __ add(temp, xbcp, j);
-+  __ load_unsigned_byte(t0, Address(temp, 0));
++static inline Address at_tos_p4() {
++  return Address(esp,  Interpreter::expr_offset_in_bytes(4));
++}
 +
-+  __ add(xbcp, xbcp, j);
-+  __ la(xbcp, Address(xbcp, 0));
-+  __ dispatch_only(vtos, /*generate_poll*/true);
++static inline Address at_tos_p5() {
++  return Address(esp,  Interpreter::expr_offset_in_bytes(5));
++}
 +
-+  // default case -> j = default offset
-+  __ bind(default_case);
-+  __ profile_switch_default(i);
-+  __ lwu(j, Address(array, -2 * BytesPerInt));
-+  __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
++// Miscelaneous helper routines
++// Store an oop (or NULL) at the Address described by obj.
++// If val == noreg this means store a NULL
++static void do_oop_store(InterpreterMacroAssembler* _masm,
++                         Address dst,
++                         Register val,
++                         DecoratorSet decorators) {
++  assert(val == noreg || val == x10, "parameter is just for looks");
++  assert_cond(_masm != NULL);
++  __ store_heap_oop(dst, val, x29, x11, decorators);
++}
 +
-+  __ add(temp, xbcp, j);
-+  __ load_unsigned_byte(t0, Address(temp, 0));
++static void do_oop_load(InterpreterMacroAssembler* _masm,
++                        Address src,
++                        Register dst,
++                        DecoratorSet decorators) {
++  assert_cond(_masm != NULL);
++  __ load_heap_oop(dst, src, x7, x11, decorators);
++}
 +
-+  __ add(xbcp, xbcp, j);
-+  __ la(xbcp, Address(xbcp, 0));
-+  __ dispatch_only(vtos, /*generate_poll*/true);
++Address TemplateTable::at_bcp(int offset) {
++  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
++  return Address(xbcp, offset);
 +}
 +
-+void TemplateTable::_return(TosState state)
++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
++                                   Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
++                                   int byte_no)
 +{
-+  transition(state, state);
-+  assert(_desc->calls_vm(),
-+         "inconsistent calls_vm information"); // call in remove_activation
-+
-+  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
-+    assert(state == vtos, "only valid state");
-+
-+    __ ld(c_rarg1, aaddress(0));
-+    __ load_klass(x13, c_rarg1);
-+    __ lwu(x13, Address(x13, Klass::access_flags_offset()));
-+    Label skip_register_finalizer;
-+    __ andi(t0, x13, JVM_ACC_HAS_FINALIZER);
-+    __ beqz(t0, skip_register_finalizer);
-+
-+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1);
++  if (!RewriteBytecodes)  { return; }
++  Label L_patch_done;
 +
-+    __ bind(skip_register_finalizer);
++  switch (bc) {
++    case Bytecodes::_fast_aputfield:  // fall through
++    case Bytecodes::_fast_bputfield:  // fall through
++    case Bytecodes::_fast_zputfield:  // fall through
++    case Bytecodes::_fast_cputfield:  // fall through
++    case Bytecodes::_fast_dputfield:  // fall through
++    case Bytecodes::_fast_fputfield:  // fall through
++    case Bytecodes::_fast_iputfield:  // fall through
++    case Bytecodes::_fast_lputfield:  // fall through
++    case Bytecodes::_fast_sputfield: {
++      // We skip bytecode quickening for putfield instructions when
++      // the put_code written to the constant pool cache is zero.
++      // This is required so that every execution of this instruction
++      // calls out to InterpreterRuntime::resolve_get_put to do
++      // additional, required work.
++      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
++      __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
++      __ mv(bc_reg, bc);
++      __ beqz(temp_reg, L_patch_done);
++      break;
++    }
++    default:
++      assert(byte_no == -1, "sanity");
++      // the pair bytecodes have already done the load.
++      if (load_bc_into_bc_reg) {
++        __ mv(bc_reg, bc);
++      }
 +  }
 +
-+  // Issue a StoreStore barrier after all stores but before return
-+  // from any constructor for any class with a final field. We don't
-+  // know if this is a finalizer, so we always do so.
-+  if (_desc->bytecode() == Bytecodes::_return) {
-+    __ membar(MacroAssembler::StoreStore);
++  if (JvmtiExport::can_post_breakpoint()) {
++    Label L_fast_patch;
++    // if a breakpoint is present we can't rewrite the stream directly
++    __ load_unsigned_byte(temp_reg, at_bcp(0));
++    __ addi(temp_reg, temp_reg, -Bytecodes::_breakpoint); // temp_reg is temporary register.
++    __ bnez(temp_reg, L_fast_patch);
++    // Let breakpoint table handling rewrite to quicker bytecode
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), xmethod, xbcp, bc_reg);
++    __ j(L_patch_done);
++    __ bind(L_fast_patch);
 +  }
 +
-+  // Narrow result if state is itos but result type is smaller.
-+  // Need to narrow in the return bytecode rather than in generate_return_entry
-+  // since compiled code callers expect the result to already be narrowed.
-+  if (state == itos) {
-+    __ narrow(x10);
-+  }
++#ifdef ASSERT
++  Label L_okay;
++  __ load_unsigned_byte(temp_reg, at_bcp(0));
++  __ beq(temp_reg, bc_reg, L_okay);
++  __ addi(temp_reg, temp_reg, -(int) Bytecodes::java_code(bc));
++  __ beqz(temp_reg, L_okay);
++  __ stop("patching the wrong bytecode");
++  __ bind(L_okay);
++#endif
 +
-+  __ remove_activation(state);
-+  __ ret();
++  // patch bytecode
++  __ sb(bc_reg, at_bcp(0));
++  __ bind(L_patch_done);
 +}
 +
++// Individual instructions
 +
-+// ----------------------------------------------------------------------------
-+// Volatile variables demand their effects be made known to all CPU's
-+// in order.  Store buffers on most chips allow reads & writes to
-+// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
-+// without some kind of memory barrier (i.e., it's not sufficient that
-+// the interpreter does not reorder volatile references, the hardware
-+// also must not reorder them).
-+//
-+// According to the new Java Memory Model (JMM):
-+// (1) All volatiles are serialized wrt to each other.  ALSO reads &
-+//     writes act as aquire & release, so:
-+// (2) A read cannot let unrelated NON-volatile memory refs that
-+//     happen after the read float up to before the read.  It's OK for
-+//     non-volatile memory refs that happen before the volatile read to
-+//     float down below it.
-+// (3) Similar a volatile write cannot let unrelated NON-volatile
-+//     memory refs that happen BEFORE the write float down to after the
-+//     write.  It's OK for non-volatile memory refs that happen after the
-+//     volatile write to float up before it.
-+//
-+// We only put in barriers around volatile refs (they are expensive),
-+// not _between_ memory refs (that would require us to track the
-+// flavor of the previous memory refs).  Requirements (2) and (3)
-+// require some barriers before volatile stores and after volatile
-+// loads.  These nearly cover requirement (1) but miss the
-+// volatile-store-volatile-load case.  This final case is placed after
-+// volatile-stores although it could just as well go before
-+// volatile-loads.
-+
-+void TemplateTable::resolve_cache_and_index(int byte_no,
-+                                            Register Rcache,
-+                                            Register index,
-+                                            size_t index_size) {
-+  const Register temp = x9;
-+  assert_different_registers(Rcache, index, temp);
-+
-+  Label resolved;
-+
-+  Bytecodes::Code code = bytecode();
-+  switch (code) {
-+    case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
-+    case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
-+    default: break;
-+  }
-+
-+  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
-+  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
-+  __ mv(t0, (int) code);
-+  __ beq(temp, t0, resolved);
-+
-+  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
-+  __ mv(temp, (int) code);
-+  __ call_VM(noreg, entry, temp);
-+
-+  // Update registers with resolved info
-+  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
-+  // n.b. unlike x86 Rcache is now rcpool plus the indexed offset
-+  // so all clients ofthis method must be modified accordingly
-+  __ bind(resolved);
++void TemplateTable::nop() {
++  transition(vtos, vtos);
++  // nothing to do
 +}
 +
-+// The Rcache and index registers must be set before call
-+// n.b unlike x86 cache already includes the index offset
-+void TemplateTable::load_field_cp_cache_entry(Register obj,
-+                                              Register cache,
-+                                              Register index,
-+                                              Register off,
-+                                              Register flags,
-+                                              bool is_static = false) {
-+  assert_different_registers(cache, index, flags, off);
-+
-+  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
-+  // Field offset
-+  __ ld(off, Address(cache, in_bytes(cp_base_offset +
-+                                     ConstantPoolCacheEntry::f2_offset())));
-+  // Flags
-+  __ lwu(flags, Address(cache, in_bytes(cp_base_offset +
-+                                        ConstantPoolCacheEntry::flags_offset())));
++void TemplateTable::shouldnotreachhere() {
++  transition(vtos, vtos);
++  __ stop("should not reach here bytecode");
++}
 +
-+  // klass overwrite register
-+  if (is_static) {
-+    __ ld(obj, Address(cache, in_bytes(cp_base_offset +
-+                                       ConstantPoolCacheEntry::f1_offset())));
-+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
-+    __ ld(obj, Address(obj, mirror_offset));
-+    __ resolve_oop_handle(obj);
-+  }
++void TemplateTable::aconst_null()
++{
++  transition(vtos, atos);
++  __ mv(x10, zr);
 +}
 +
-+void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
-+                                               Register method,
-+                                               Register itable_index,
-+                                               Register flags,
-+                                               bool is_invokevirtual,
-+                                               bool is_invokevfinal, /*unused*/
-+                                               bool is_invokedynamic) {
-+  // setup registers
-+  const Register cache = t1;
-+  const Register index = x14;
-+  assert_different_registers(method, flags);
-+  assert_different_registers(method, cache, index);
-+  assert_different_registers(itable_index, flags);
-+  assert_different_registers(itable_index, cache, index);
-+  // determine constant pool cache field offsets
-+  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
-+  const int method_offset = in_bytes(ConstantPoolCache::base_offset() +
-+                                     (is_invokevirtual ?
-+                                      ConstantPoolCacheEntry::f2_offset() :
-+                                      ConstantPoolCacheEntry::f1_offset()));
-+  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
-+                                    ConstantPoolCacheEntry::flags_offset());
-+  // access constant pool cache fields
-+  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
-+                                    ConstantPoolCacheEntry::f2_offset());
++void TemplateTable::iconst(int value)
++{
++  transition(vtos, itos);
++  __ li(x10, value);
++}
 +
-+  const size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
-+  resolve_cache_and_index(byte_no, cache, index, index_size);
-+  __ ld(method, Address(cache, method_offset));
++void TemplateTable::lconst(int value)
++{
++  transition(vtos, ltos);
++  __ li(x10, value);
++}
 +
-+  if (itable_index != noreg) {
-+    __ ld(itable_index, Address(cache, index_offset));
++void TemplateTable::fconst(int value)
++{
++  transition(vtos, ftos);
++  static float fBuf[2] = {1.0, 2.0};
++  __ mv(t0, (intptr_t)fBuf);
++  switch (value) {
++    case 0:
++      __ fmv_w_x(f10, zr);
++      break;
++    case 1:
++      __ flw(f10, t0, 0);
++      break;
++    case 2:
++      __ flw(f10, t0, sizeof(float));
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
-+  __ lwu(flags, Address(cache, flags_offset));
 +}
 +
-+// The registers cache and index expected to be set before call.
-+// Correct values of the cache and index registers are preserved.
-+void TemplateTable::jvmti_post_field_access(Register cache, Register index,
-+                                            bool is_static, bool has_tos) {
-+  // do the JVMTI work here to avoid disturbing the register state below
-+  // We use c_rarg registers here beacause we want to use the register used in
-+  // the call to the VM
-+  if (JvmtiExport::can_post_field_access()) {
-+    // Check to see if a field access watch has been set before we
-+    // take the time to call into the VM.
-+    Label L1;
-+    assert_different_registers(cache, index, x10);
-+    int32_t offset = 0;
-+    __ la_patchable(t0, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), offset);
-+    __ lwu(x10, Address(t0, offset));
-+
-+    __ beqz(x10, L1);
-+
-+    __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
-+    __ la(c_rarg2, Address(c_rarg2, in_bytes(ConstantPoolCache::base_offset())));
-+
-+    if (is_static) {
-+      __ mv(c_rarg1, zr); // NULL object reference
-+    } else {
-+      __ ld(c_rarg1, at_tos()); // get object pointer without popping it
-+      __ verify_oop(c_rarg1);
-+    }
-+    // c_rarg1: object pointer or NULL
-+    // c_rarg2: cache entry pointer
-+    // c_rarg3: jvalue object on the stack
-+    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                       InterpreterRuntime::post_field_access),
-+                                       c_rarg1, c_rarg2, c_rarg3);
-+    __ get_cache_and_index_at_bcp(cache, index, 1);
-+    __ bind(L1);
++void TemplateTable::dconst(int value)
++{
++  transition(vtos, dtos);
++  static double dBuf[2] = {1.0, 2.0};
++  __ mv(t0, (intptr_t)dBuf);
++  switch (value) {
++    case 0:
++      __ fmv_d_x(f10, zr);
++      break;
++    case 1:
++      __ fld(f10, t0, 0);
++      break;
++    case 2:
++      __ fld(f10, t0, sizeof(double));
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
 +}
 +
-+void TemplateTable::pop_and_check_object(Register r)
++void TemplateTable::bipush()
 +{
-+  __ pop_ptr(r);
-+  __ null_check(r);  // for field access must check obj.
-+  __ verify_oop(r);
++  transition(vtos, itos);
++  __ load_signed_byte(x10, at_bcp(1));
 +}
 +
-+void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc)
++void TemplateTable::sipush()
 +{
-+  const Register cache     = x12;
-+  const Register index     = x13;
-+  const Register obj       = x14;
-+  const Register off       = x9;
-+  const Register flags     = x10;
-+  const Register raw_flags = x16;
-+  const Register bc        = x14; // uses same reg as obj, so don't mix them
-+
-+  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
-+  jvmti_post_field_access(cache, index, is_static, false);
-+  load_field_cp_cache_entry(obj, cache, index, off, raw_flags, is_static);
++  transition(vtos, itos);
++  __ load_unsigned_short(x10, at_bcp(1));
++  __ revb_w_w(x10, x10);
++  __ sraiw(x10, x10, 16);
++}
 +
-+  if (!is_static) {
-+    // obj is on the stack
-+    pop_and_check_object(obj);
-+  }
++void TemplateTable::ldc(bool wide)
++{
++  transition(vtos, vtos);
++  Label call_ldc, notFloat, notClass, notInt, Done;
 +
-+  if (!UseBarriersForVolatile) {
-+    Label notVolatile;
-+    __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::AnyAny);
-+    __ bind(notVolatile);
++  if (wide) {
++   __ get_unsigned_2_byte_index_at_bcp(x11, 1);
++  } else {
++   __ load_unsigned_byte(x11, at_bcp(1));
 +  }
++  __ get_cpool_and_tags(x12, x10);
 +
-+  __ add(off, obj, off);
-+  const Address field(off);
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
 +
-+  Label Done, notByte, notBool, notInt, notShort, notChar,
-+              notLong, notFloat, notObj, notDouble;
++  // get type
++  __ addi(x13, x11, tags_offset);
++  __ add(x13, x10, x13);
++  __ membar(MacroAssembler::AnyAny);
++  __ lbu(x13, Address(x13, 0));
++  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
 +
-+  __ slli(flags, raw_flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
-+                                    ConstantPoolCacheEntry::tos_state_bits));
-+  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
++  // unresolved class - get the resolved class
++  __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClass);
++  __ beq(x13, t1, call_ldc);
 +
-+  assert(btos == 0, "change code, btos != 0");
-+  __ bnez(flags, notByte);
++  // unresolved class in error state - call into runtime to throw the error
++  // from the first resolution attempt
++  __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClassInError);
++  __ beq(x13, t1, call_ldc);
 +
-+  // Dont't rewrite getstatic, only getfield
-+  if (is_static) {
-+    rc = may_not_rewrite;
-+  }
++  // resolved class - need to call vm to get java mirror of the class
++  __ mv(t1, (u1)JVM_CONSTANT_Class);
++  __ bne(x13, t1, notClass);
 +
-+  // btos
-+  __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg);
-+  __ push(btos);
-+  // Rewrite bytecode to be faster
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11);
-+  }
++  __ bind(call_ldc);
++  __ mv(c_rarg1, wide);
++  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1);
++  __ push_ptr(x10);
++  __ verify_oop(x10);
 +  __ j(Done);
 +
-+  __ bind(notByte);
-+  __ sub(t0, flags, (u1)ztos);
-+  __ bnez(t0, notBool);
++  __ bind(notClass);
++  __ mv(t1, (u1)JVM_CONSTANT_Float);
++  __ bne(x13, t1, notFloat);
 +
-+  // ztos (same code as btos)
-+  __ access_load_at(T_BOOLEAN, IN_HEAP, x10, field, noreg, noreg);
-+  __ push(ztos);
-+  // Rewirte bytecode to be faster
-+  if (rc == may_rewrite) {
-+    // uses btos rewriting, no truncating to t/f bit is needed for getfield
-+    patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11);
-+  }
++  // ftos
++  __ shadd(x11, x11, x12, x11, 3);
++  __ flw(f10, Address(x11, base_offset));
++  __ push_f(f10);
 +  __ j(Done);
 +
-+  __ bind(notBool);
-+  __ sub(t0, flags, (u1)atos);
-+  __ bnez(t0, notObj);
-+  // atos
-+  do_oop_load(_masm, field, x10, IN_HEAP);
-+  __ push(atos);
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_agetfield, bc, x11);
-+  }
-+  __ j(Done);
++  __ bind(notFloat);
++
++  __ mv(t1, (u1)JVM_CONSTANT_Integer);
++  __ bne(x13, t1, notInt);
 +
-+  __ bind(notObj);
-+  __ sub(t0, flags, (u1)itos);
-+  __ bnez(t0, notInt);
 +  // itos
-+  __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg);
-+  __ addw(x10, x10, zr); // signed extended
-+  __ push(itos);
-+  // Rewrite bytecode to be faster
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_igetfield, bc, x11);
-+  }
++  __ shadd(x11, x11, x12, x11, 3);
++  __ lw(x10, Address(x11, base_offset));
++  __ push_i(x10);
 +  __ j(Done);
 +
 +  __ bind(notInt);
-+  __ sub(t0, flags, (u1)ctos);
-+  __ bnez(t0, notChar);
-+  // ctos
-+  __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg);
-+  __ push(ctos);
-+  // Rewrite bytecode to be faster
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_cgetfield, bc, x11);
-+  }
-+  __ j(Done);
++  condy_helper(Done);
 +
-+  __ bind(notChar);
-+  __ sub(t0, flags, (u1)stos);
-+  __ bnez(t0, notShort);
-+  // stos
-+  __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg);
-+  __ push(stos);
-+  // Rewrite bytecode to be faster
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_sgetfield, bc, x11);
-+  }
-+  __ j(Done);
++  __ bind(Done);
++}
 +
-+  __ bind(notShort);
-+  __ sub(t0, flags, (u1)ltos);
-+  __ bnez(t0, notLong);
-+  // ltos
-+  __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg);
-+  __ push(ltos);
-+  // Rewrite bytecode to be faster
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_lgetfield, bc, x11);
-+  }
-+  __ j(Done);
++// Fast path for caching oop constants.
++void TemplateTable::fast_aldc(bool wide)
++{
++  transition(vtos, atos);
 +
-+  __ bind(notLong);
-+  __ sub(t0, flags, (u1)ftos);
-+  __ bnez(t0, notFloat);
-+  // ftos
-+  __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
-+  __ push(ftos);
-+  // Rewrite bytecode to be faster
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_fgetfield, bc, x11);
-+  }
-+  __ j(Done);
++  const Register result = x10;
++  const Register tmp = x11;
++  const Register rarg = x12;
 +
-+  __ bind(notFloat);
-+#ifdef ASSERT
-+  __ sub(t0, flags, (u1)dtos);
-+  __ bnez(t0, notDouble);
-+#endif
-+  // dtos
-+  __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
-+  __ push(dtos);
-+  // Rewrite bytecode to be faster
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_dgetfield, bc, x11);
-+  }
-+#ifdef ASSERT
-+  __ j(Done);
++  const int index_size = wide ? sizeof(u2) : sizeof(u1);
 +
-+  __ bind(notDouble);
-+  __ stop("Bad state");
-+#endif
++  Label resolved;
 +
-+  __ bind(Done);
++  // We are resolved if the resolved reference cache entry contains a
++  // non-null object (String, MethodType, etc.)
++  assert_different_registers(result, tmp);
++  __ get_cache_index_at_bcp(tmp, 1, index_size);
++  __ load_resolved_reference_at_index(result, tmp);
++  __ bnez(result, resolved);
 +
-+  Label notVolatile;
-+  __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+  __ beqz(t0, notVolatile);
-+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+  __ bind(notVolatile);
++  const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
++
++  // first time invocation - must resolve first
++  __ mv(rarg, (int)bytecode());
++  __ call_VM(result, entry, rarg);
++
++  __ bind(resolved);
++
++  { // Check for the null sentinel.
++    // If we just called the VM, it already did the mapping for us,
++    // but it's harmless to retry.
++    Label notNull;
++
++    // Stash null_sentinel address to get its value later
++    int32_t offset = 0;
++    __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset);
++    __ ld(tmp, Address(rarg, offset));
++    __ resolve_oop_handle(tmp);
++    __ bne(result, tmp, notNull);
++    __ mv(result, zr);  // NULL object reference
++    __ bind(notNull);
++  }
++
++  if (VerifyOops) {
++    // Safe to call with 0 result
++    __ verify_oop(result);
++  }
 +}
 +
-+void TemplateTable::getfield(int byte_no)
++void TemplateTable::ldc2_w()
 +{
-+  getfield_or_static(byte_no, false);
-+}
++    transition(vtos, vtos);
++    Label notDouble, notLong, Done;
++    __ get_unsigned_2_byte_index_at_bcp(x10, 1);
 +
-+void TemplateTable::nofast_getfield(int byte_no) {
-+  getfield_or_static(byte_no, false, may_not_rewrite);
++    __ get_cpool_and_tags(x11, x12);
++    const int base_offset = ConstantPool::header_size() * wordSize;
++    const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++    // get type
++    __ add(x12, x12, x10);
++    __ load_unsigned_byte(x12, Address(x12, tags_offset));
++    __ mv(t1, JVM_CONSTANT_Double);
++    __ bne(x12, t1, notDouble);
++
++    // dtos
++    __ shadd(x12, x10, x11, x12, 3);
++    __ fld(f10, Address(x12, base_offset));
++    __ push_d(f10);
++    __ j(Done);
++
++    __ bind(notDouble);
++    __ mv(t1, (int)JVM_CONSTANT_Long);
++    __ bne(x12, t1, notLong);
++
++    // ltos
++    __ shadd(x10, x10, x11, x10, 3);
++    __ ld(x10, Address(x10, base_offset));
++    __ push_l(x10);
++    __ j(Done);
++
++    __ bind(notLong);
++    condy_helper(Done);
++    __ bind(Done);
 +}
 +
-+void TemplateTable::getstatic(int byte_no)
++void TemplateTable::condy_helper(Label& Done)
 +{
-+  getfield_or_static(byte_no, true);
-+}
++  const Register obj = x10;
++  const Register rarg = x11;
++  const Register flags = x12;
++  const Register off = x13;
 +
-+// The registers cache and index expected to be set before call.
-+// The function may destroy various registers, just not the cache and index registers.
-+void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
-+  transition(vtos, vtos);
++  const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
 +
-+  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++  __ mv(rarg, (int) bytecode());
++  __ call_VM(obj, entry, rarg);
 +
-+  if (JvmtiExport::can_post_field_modification()) {
-+    // Check to see if a field modification watch has been set before
-+    // we take the time to call into the VM.
-+    Label L1;
-+    assert_different_registers(cache, index, x10);
-+    int32_t offset = 0;
-+    __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset);
-+    __ lwu(x10, Address(t0, offset));
-+    __ beqz(x10, L1);
++  __ get_vm_result_2(flags, xthread);
 +
-+    __ get_cache_and_index_at_bcp(c_rarg2, t0, 1);
++  // VMr = obj = base address to find primitive value to push
++  // VMr2 = flags = (tos, off) using format of CPCE::_flags
++  __ mv(off, flags);
++  __ mv(t0, ConstantPoolCacheEntry::field_index_mask);
++  __ andrw(off, off, t0);
 +
-+    if (is_static) {
-+      // Life is simple. Null out the object pointer.
-+      __ mv(c_rarg1, zr);
-+    } else {
-+      // Life is harder. The stack holds the value on top, followed by
-+      // the object. We don't know the size of the value, though; it
-+      // could be one or two words depending on its type. As a result,
-+      // we must find the type to determine where the object is.
-+      __ lwu(c_rarg3, Address(c_rarg2,
-+                              in_bytes(cp_base_offset +
-+                                       ConstantPoolCacheEntry::flags_offset())));
-+      __ srli(c_rarg3, c_rarg3, ConstantPoolCacheEntry::tos_state_shift);
-+      ConstantPoolCacheEntry::verify_tos_state_shift();
-+      Label nope2, done, ok;
-+      __ ld(c_rarg1, at_tos_p1());   // initially assume a one word jvalue
-+      __ sub(t0, c_rarg3, ltos);
-+      __ beqz(t0, ok);
-+      __ sub(t0, c_rarg3, dtos);
-+      __ bnez(t0, nope2);
-+      __ bind(ok);
-+      __ ld(c_rarg1, at_tos_p2());  // ltos (two word jvalue);
-+      __ bind(nope2);
-+    }
-+    // cache entry pointer
-+    __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset));
-+    // object (tos)
-+    __ mv(c_rarg3, esp);
-+    // c_rarg1: object pointer set up above (NULL if static)
-+    // c_rarg2: cache entry pointer
-+    // c_rarg3: jvalue object on  the stack
-+    __ call_VM(noreg,
-+               CAST_FROM_FN_PTR(address,
-+                                InterpreterRuntime::post_field_modification),
-+                                c_rarg1, c_rarg2, c_rarg3);
-+    __ get_cache_and_index_at_bcp(cache, index, 1);
-+    __ bind(L1);
-+  }
-+}
-+
-+void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
-+  transition(vtos, vtos);
-+
-+  const Register cache = x12;
-+  const Register index = x13;
-+  const Register obj   = x12;
-+  const Register off   = x9;
-+  const Register flags = x10;
-+  const Register bc    = x14;
-+
-+  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
-+  jvmti_post_field_mod(cache, index, is_static);
-+  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
-+
-+  Label Done;
-+  __ mv(x15, flags);
++  __ add(off, obj, off);
++  const Address field(off, 0); // base + R---->base + offset
 +
-+  {
-+    Label notVolatile;
-+    __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore);
-+    __ bind(notVolatile);
-+  }
++  __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
++  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> flags:0~3
 +
-+  Label notByte, notBool, notInt, notShort, notChar,
-+        notLong, notFloat, notObj, notDouble;
++  switch (bytecode()) {
++    case Bytecodes::_ldc:   // fall through
++    case Bytecodes::_ldc_w: {
++      // tos in (itos, ftos, stos, btos, ctos, ztos)
++      Label notInt, notFloat, notShort, notByte, notChar, notBool;
++      __ mv(t1, itos);
++      __ bne(flags, t1, notInt);
++      // itos
++      __ lw(x10, field);
++      __ push(itos);
++      __ j(Done);
 +
-+  __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
-+                                ConstantPoolCacheEntry::tos_state_bits));
-+  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
++      __ bind(notInt);
++      __ mv(t1, ftos);
++      __ bne(flags, t1, notFloat);
++      // ftos
++      __ load_float(field);
++      __ push(ftos);
++      __ j(Done);
 +
-+  assert(btos == 0, "change code, btos != 0");
-+  __ bnez(flags, notByte);
++      __ bind(notFloat);
++      __ mv(t1, stos);
++      __ bne(flags, t1, notShort);
++      // stos
++      __ load_signed_short(x10, field);
++      __ push(stos);
++      __ j(Done);
 +
-+  // Don't rewrite putstatic, only putfield
-+  if (is_static) {
-+    rc = may_not_rewrite;
-+  }
++      __ bind(notShort);
++      __ mv(t1, btos);
++      __ bne(flags, t1, notByte);
++      // btos
++      __ load_signed_byte(x10, field);
++      __ push(btos);
++      __ j(Done);
 +
-+  // btos
-+  {
-+    __ pop(btos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0); // off register as temparator register.
-+    __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_bputfield, bc, x11, true, byte_no);
-+    }
-+    __ j(Done);
-+  }
++      __ bind(notByte);
++      __ mv(t1, ctos);
++      __ bne(flags, t1, notChar);
++      // ctos
++      __ load_unsigned_short(x10, field);
++      __ push(ctos);
++      __ j(Done);
 +
-+  __ bind(notByte);
-+  __ sub(t0, flags, (u1)ztos);
-+  __ bnez(t0, notBool);
++      __ bind(notChar);
++      __ mv(t1, ztos);
++      __ bne(flags, t1, notBool);
++      // ztos
++      __ load_signed_byte(x10, field);
++      __ push(ztos);
++      __ j(Done);
 +
-+  // ztos
-+  {
-+    __ pop(ztos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_zputfield, bc, x11, true, byte_no);
++      __ bind(notBool);
++      break;
 +    }
-+    __ j(Done);
-+  }
-+
-+  __ bind(notBool);
-+  __ sub(t0, flags, (u1)atos);
-+  __ bnez(t0, notObj);
 +
-+  // atos
-+  {
-+    __ pop(atos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    // Store into the field
-+    do_oop_store(_masm, field, x10, IN_HEAP);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_aputfield, bc, x11, true, byte_no);
-+    }
-+    __ j(Done);
-+  }
++    case Bytecodes::_ldc2_w: {
++      Label notLong, notDouble;
++      __ mv(t1, ltos);
++      __ bne(flags, t1, notLong);
++      // ltos
++      __ ld(x10, field);
++      __ push(ltos);
++      __ j(Done);
 +
-+  __ bind(notObj);
-+  __ sub(t0, flags, (u1)itos);
-+  __ bnez(t0, notInt);
++      __ bind(notLong);
++      __ mv(t1, dtos);
++      __ bne(flags, t1, notDouble);
++      // dtos
++      __ load_double(field);
++      __ push(dtos);
++      __ j(Done);
 +
-+  // itos
-+  {
-+    __ pop(itos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_iputfield, bc, x11, true, byte_no);
++      __ bind(notDouble);
++      break;
 +    }
-+    __ j(Done);
-+  }
 +
-+  __ bind(notInt);
-+  __ sub(t0, flags, (u1)ctos);
-+  __ bnez(t0, notChar);
-+
-+  // ctos
-+  {
-+    __ pop(ctos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_cputfield, bc, x11, true, byte_no);
-+    }
-+    __ j(Done);
++    default:
++      ShouldNotReachHere();
 +  }
 +
-+  __ bind(notChar);
-+  __ sub(t0, flags, (u1)stos);
-+  __ bnez(t0, notShort);
++  __ stop("bad ldc/condy");
++}
 +
-+  // stos
-+  {
-+    __ pop(stos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_sputfield, bc, x11, true, byte_no);
-+    }
-+    __ j(Done);
-+  }
++void TemplateTable::locals_index(Register reg, int offset)
++{
++  __ lbu(reg, at_bcp(offset));
++  __ neg(reg, reg);
++}
 +
-+  __ bind(notShort);
-+  __ sub(t0, flags, (u1)ltos);
-+  __ bnez(t0, notLong);
++void TemplateTable::iload() {
++  iload_internal();
++}
 +
-+  // ltos
-+  {
-+    __ pop(ltos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_lputfield, bc, x11, true, byte_no);
-+    }
-+    __ j(Done);
-+  }
++void TemplateTable::nofast_iload() {
++  iload_internal(may_not_rewrite);
++}
 +
-+  __ bind(notLong);
-+  __ sub(t0, flags, (u1)ftos);
-+  __ bnez(t0, notFloat);
++void TemplateTable::iload_internal(RewriteControl rc) {
++  transition(vtos, itos);
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    const Register bc = x14;
 +
-+  // ftos
-+  {
-+    __ pop(ftos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_fputfield, bc, x11, true, byte_no);
-+    }
-+    __ j(Done);
-+  }
++    // get next bytecode
++    __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
 +
-+  __ bind(notFloat);
-+#ifdef ASSERT
-+  __ sub(t0, flags, (u1)dtos);
-+  __ bnez(t0, notDouble);
-+#endif
++    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
++    // last two iloads in a pair.  Comparing against fast_iload means that
++    // the next bytecode is neither an iload or a caload, and therefore
++    // an iload pair.
++    __ mv(t1, Bytecodes::_iload);
++    __ beq(x11, t1, done);
 +
-+  // dtos
-+  {
-+    __ pop(dtos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_dputfield, bc, x11, true, byte_no);
-+    }
-+  }
++    // if _fast_iload rewrite to _fast_iload2
++    __ mv(t1, Bytecodes::_fast_iload);
++    __ mv(bc, Bytecodes::_fast_iload2);
++    __ beq(x11, t1, rewrite);
 +
-+#ifdef ASSERT
-+  __ j(Done);
++    // if _caload rewrite to _fast_icaload
++    __ mv(t1, Bytecodes::_caload);
++    __ mv(bc, Bytecodes::_fast_icaload);
++    __ beq(x11, t1, rewrite);
 +
-+  __ bind(notDouble);
-+  __ stop("Bad state");
-+#endif
++    // else rewrite to _fast_iload
++    __ mv(bc, Bytecodes::_fast_iload);
 +
-+  __ bind(Done);
++    // rewrite
++    // bc: new bytecode
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_iload, bc, x11, false);
++    __ bind(done);
 +
-+  {
-+    Label notVolatile;
-+    __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore);
-+    __ bind(notVolatile);
 +  }
++
++  // do iload, get the local value into tos
++  locals_index(x11);
++  __ lw(x10, iaddress(x11, x10, _masm));
 +}
 +
-+void TemplateTable::putfield(int byte_no)
++void TemplateTable::fast_iload2()
 +{
-+  putfield_or_static(byte_no, false);
++  transition(vtos, itos);
++  locals_index(x11);
++  __ lw(x10, iaddress(x11, x10, _masm));
++  __ push(itos);
++  locals_index(x11, 3);
++  __ lw(x10, iaddress(x11, x10, _masm));
 +}
 +
-+void TemplateTable::nofast_putfield(int byte_no) {
-+  putfield_or_static(byte_no, false, may_not_rewrite);
++void TemplateTable::fast_iload()
++{
++  transition(vtos, itos);
++  locals_index(x11);
++  __ lw(x10, iaddress(x11, x10, _masm));
 +}
 +
-+void TemplateTable::putstatic(int byte_no) {
-+  putfield_or_static(byte_no, true);
++void TemplateTable::lload()
++{
++  transition(vtos, ltos);
++  __ lbu(x11, at_bcp(1));
++  __ slli(x11, x11, LogBytesPerWord);
++  __ sub(x11, xlocals, x11);
++  __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1)));
 +}
 +
-+void TemplateTable::jvmti_post_fast_field_mod()
++void TemplateTable::fload()
 +{
-+  if (JvmtiExport::can_post_field_modification()) {
-+    // Check to see if a field modification watch has been set before
-+    // we take the time to call into the VM.
-+    Label L2;
-+    int32_t offset = 0;
-+    __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset);
-+    __ lwu(c_rarg3, Address(t0, offset));
-+    __ beqz(c_rarg3, L2);
-+    __ pop_ptr(x9);                  // copy the object pointer from tos
-+    __ verify_oop(x9);
-+    __ push_ptr(x9);                 // put the object pointer back on tos
-+    // Save tos values before call_VM() clobbers them. Since we have
-+    // to do it for every data type, we use the saved values as the
-+    // jvalue object.
-+    switch (bytecode()) {          // load values into the jvalue object
-+      case Bytecodes::_fast_aputfield: __ push_ptr(x10); break;
-+      case Bytecodes::_fast_bputfield: // fall through
-+      case Bytecodes::_fast_zputfield: // fall through
-+      case Bytecodes::_fast_sputfield: // fall through
-+      case Bytecodes::_fast_cputfield: // fall through
-+      case Bytecodes::_fast_iputfield: __ push_i(x10); break;
-+      case Bytecodes::_fast_dputfield: __ push_d(); break;
-+      case Bytecodes::_fast_fputfield: __ push_f(); break;
-+      case Bytecodes::_fast_lputfield: __ push_l(x10); break;
-+
-+      default:
-+        ShouldNotReachHere();
-+    }
-+    __ mv(c_rarg3, esp);             // points to jvalue on the stack
-+    // access constant pool cache entry
-+    __ get_cache_entry_pointer_at_bcp(c_rarg2, x10, 1);
-+    __ verify_oop(x9);
-+    // x9: object pointer copied above
-+    // c_rarg2: cache entry pointer
-+    // c_rarg3: jvalue object on the stack
-+    __ call_VM(noreg,
-+               CAST_FROM_FN_PTR(address,
-+                                InterpreterRuntime::post_field_modification),
-+               x9, c_rarg2, c_rarg3);
-+
-+    switch (bytecode()) {             // restore tos values
-+      case Bytecodes::_fast_aputfield: __ pop_ptr(x10); break;
-+      case Bytecodes::_fast_bputfield: // fall through
-+      case Bytecodes::_fast_zputfield: // fall through
-+      case Bytecodes::_fast_sputfield: // fall through
-+      case Bytecodes::_fast_cputfield: // fall through
-+      case Bytecodes::_fast_iputfield: __ pop_i(x10); break;
-+      case Bytecodes::_fast_dputfield: __ pop_d(); break;
-+      case Bytecodes::_fast_fputfield: __ pop_f(); break;
-+      case Bytecodes::_fast_lputfield: __ pop_l(x10); break;
-+      default: break;
-+    }
-+    __ bind(L2);
-+  }
++  transition(vtos, ftos);
++  locals_index(x11);
++  __ flw(f10, faddress(x11, t0, _masm));
 +}
 +
-+void TemplateTable::fast_storefield(TosState state)
++void TemplateTable::dload()
 +{
-+  transition(state, vtos);
++  transition(vtos, dtos);
++  __ lbu(x11, at_bcp(1));
++  __ slli(x11, x11, LogBytesPerWord);
++  __ sub(x11, xlocals, x11);
++  __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1)));
++}
 +
-+  ByteSize base = ConstantPoolCache::base_offset();
++void TemplateTable::aload()
++{
++  transition(vtos, atos);
++  locals_index(x11);
++  __ ld(x10, iaddress(x11, x10, _masm));
 +
-+  jvmti_post_fast_field_mod();
++}
 +
-+  // access constant pool cache
-+  __ get_cache_and_index_at_bcp(x12, x11, 1);
++void TemplateTable::locals_index_wide(Register reg) {
++  __ lhu(reg, at_bcp(2));
++  __ revb_h_h_u(reg, reg); // reverse bytes in half-word and zero-extend
++  __ neg(reg, reg);
++}
 +
-+  // Must prevent reordering of the following cp cache loads with bytecode load
-+  __ membar(MacroAssembler::LoadLoad);
++void TemplateTable::wide_iload() {
++  transition(vtos, itos);
++  locals_index_wide(x11);
++  __ lw(x10, iaddress(x11, t0, _masm));
++}
 +
-+  // test for volatile with x13
-+  __ lwu(x13, Address(x12, in_bytes(base +
-+                                    ConstantPoolCacheEntry::flags_offset())));
++void TemplateTable::wide_lload()
++{
++  transition(vtos, ltos);
++  __ lhu(x11, at_bcp(2));
++  __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
++  __ slli(x11, x11, LogBytesPerWord);
++  __ sub(x11, xlocals, x11);
++  __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1)));
++}
 +
-+  // replace index with field offset from cache entry
-+  __ ld(x11, Address(x12, in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
++void TemplateTable::wide_fload()
++{
++  transition(vtos, ftos);
++  locals_index_wide(x11);
++  __ flw(f10, faddress(x11, t0, _masm));
++}
 +
-+  {
-+    Label notVolatile;
-+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore);
-+    __ bind(notVolatile);
-+  }
++void TemplateTable::wide_dload()
++{
++  transition(vtos, dtos);
++  __ lhu(x11, at_bcp(2));
++  __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
++  __ slli(x11, x11, LogBytesPerWord);
++  __ sub(x11, xlocals, x11);
++  __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1)));
++}
 +
-+  // Get object from stack
-+  pop_and_check_object(x12);
++void TemplateTable::wide_aload()
++{
++  transition(vtos, atos);
++  locals_index_wide(x11);
++  __ ld(x10, aaddress(x11, t0, _masm));
++}
 +
-+  // field address
-+  __ add(x11, x12, x11);
-+  const Address field(x11, 0);
-+
-+  // access field
-+  switch (bytecode()) {
-+    case Bytecodes::_fast_aputfield:
-+      do_oop_store(_masm, field, x10, IN_HEAP);
-+      break;
-+    case Bytecodes::_fast_lputfield:
-+      __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_iputfield:
-+      __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_zputfield:
-+      __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_bputfield:
-+      __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_sputfield:
-+      __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_cputfield:
-+      __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_fputfield:
-+      __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_dputfield:
-+      __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg, noreg);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+
-+  {
-+    Label notVolatile;
-+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore);
-+    __ bind(notVolatile);
++void TemplateTable::index_check(Register array, Register index)
++{
++  // destroys x11, t0
++  // check array
++  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
++  // sign extend index for use by indexed load
++  // check index
++  const Register length = t0;
++  __ lwu(length, Address(array, arrayOopDesc::length_offset_in_bytes()));
++  if (index != x11) {
++    assert(x11 != array, "different registers");
++    __ mv(x11, index);
 +  }
++  Label ok;
++  __ addw(index, index, zr);
++  __ bltu(index, length, ok);
++  __ mv(x13, array);
++  __ mv(t0, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
++  __ jr(t0);
++  __ bind(ok);
 +}
 +
-+void TemplateTable::fast_accessfield(TosState state)
++void TemplateTable::iaload()
 +{
-+  transition(atos, state);
-+  // Do the JVMTI work here to avoid disturbing the register state below
-+  if (JvmtiExport::can_post_field_access()) {
-+    // Check to see if a field access watch has been set before we
-+    // take the time to call into the VM.
-+    Label L1;
-+    int32_t offset = 0;
-+    __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_access_count_addr()), offset);
-+    __ lwu(x12, Address(t0, offset));
-+    __ beqz(x12, L1);
-+    // access constant pool cache entry
-+    __ get_cache_entry_pointer_at_bcp(c_rarg2, t1, 1);
-+    __ verify_oop(x10);
-+    __ push_ptr(x10);  // save object pointer before call_VM() clobbers it
-+    __ mv(c_rarg1, x10);
-+    // c_rarg1: object pointer copied above
-+    // c_rarg2: cache entry pointer
-+    __ call_VM(noreg,
-+               CAST_FROM_FN_PTR(address,
-+                                InterpreterRuntime::post_field_access),
-+               c_rarg1, c_rarg2);
-+    __ pop_ptr(x10); // restore object pointer
-+    __ bind(L1);
-+  }
-+
-+  // access constant pool cache
-+  __ get_cache_and_index_at_bcp(x12, x11, 1);
-+
-+  // Must prevent reordering of the following cp cache loads with bytecode load
-+  __ membar(MacroAssembler::LoadLoad);
-+
-+  __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
-+                                   ConstantPoolCacheEntry::f2_offset())));
-+  __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
-+                                    ConstantPoolCacheEntry::flags_offset())));
-+
-+  // x10: object
-+  __ verify_oop(x10);
-+  __ null_check(x10);
-+  __ add(x11, x10, x11);
-+  const Address field(x11, 0);
++  transition(itos, itos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
++  __ shadd(x10, x11, x10, t0, 2);
++  __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
++  __ addw(x10, x10, zr); // signed extended
++}
 +
-+  if (!UseBarriersForVolatile) {
-+    Label notVolatile;
-+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::AnyAny);
-+    __ bind(notVolatile);
-+  }
++void TemplateTable::laload()
++{
++  transition(itos, ltos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
++  __ shadd(x10, x11, x10, t0, 3);
++  __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
++}
 +
-+  // access field
-+  switch (bytecode()) {
-+    case Bytecodes::_fast_agetfield:
-+      do_oop_load(_masm, field, x10, IN_HEAP);
-+      __ verify_oop(x10);
-+      break;
-+    case Bytecodes::_fast_lgetfield:
-+      __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_igetfield:
-+      __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg);
-+      __ addw(x10, x10, zr); // signed extended
-+      break;
-+    case Bytecodes::_fast_bgetfield:
-+      __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_sgetfield:
-+      __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_cgetfield:
-+      __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_fgetfield:
-+      __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_dgetfield:
-+      __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+  {
-+    Label notVolatile;
-+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+    __ bind(notVolatile);
-+  }
++void TemplateTable::faload()
++{
++  transition(itos, ftos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
++  __ shadd(x10, x11, x10, t0, 2);
++  __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 +}
 +
-+void TemplateTable::fast_xaccess(TosState state)
++void TemplateTable::daload()
 +{
-+  transition(vtos, state);
++  transition(itos, dtos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
++  __ shadd(x10, x11, x10, t0, 3);
++  __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
++}
 +
-+  // get receiver
-+  __ ld(x10, aaddress(0));
-+  // access constant pool cache
-+  __ get_cache_and_index_at_bcp(x12, x13, 2);
-+  __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
-+                                   ConstantPoolCacheEntry::f2_offset())));
++void TemplateTable::aaload()
++{
++  transition(itos, atos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
++  __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop);
++  do_oop_load(_masm,
++              Address(x10),
++              x10,
++              IS_ARRAY);
++}
 +
-+  if (!UseBarriersForVolatile) {
-+    Label notVolatile;
-+    __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
-+                                      ConstantPoolCacheEntry::flags_offset())));
-+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::AnyAny);
-+    __ bind(notVolatile);
-+  }
++void TemplateTable::baload()
++{
++  transition(itos, itos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
++  __ shadd(x10, x11, x10, t0, 0);
++  __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
++}
 +
-+  // make sure exception is reported in correct bcp range (getfield is
-+  // next instruction)
-+  __ addi(xbcp, xbcp, 1);
-+  __ null_check(x10);
-+  switch (state) {
-+    case itos:
-+      __ add(x10, x10, x11);
-+      __ access_load_at(T_INT, IN_HEAP, x10, Address(x10, 0), noreg, noreg);
-+      __ addw(x10, x10, zr); // signed extended
-+      break;
-+    case atos:
-+      __ add(x10, x10, x11);
-+      do_oop_load(_masm, Address(x10, 0), x10, IN_HEAP);
-+      __ verify_oop(x10);
-+      break;
-+    case ftos:
-+      __ add(t0, x10, x11);
-+      __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(t0), noreg, noreg);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
++void TemplateTable::caload()
++{
++ transition(itos, itos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
++  __ shadd(x10, x11, x10, t0, 1);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
++}
 +
-+  {
-+    Label notVolatile;
-+    __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
-+                                      ConstantPoolCacheEntry::flags_offset())));
-+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+    __ bind(notVolatile);
-+  }
++// iload followed by caload frequent pair
++void TemplateTable::fast_icaload()
++{
++  transition(vtos, itos);
++  // load index out of locals
++  locals_index(x12);
++  __ lw(x11, iaddress(x12, x11, _masm));
++  __ pop_ptr(x10);
 +
-+  __ sub(xbcp, xbcp, 1);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11, kills t0
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11
++  __ shadd(x10, x11, x10, t0, 1);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 +}
 +
-+//-----------------------------------------------------------------------------
-+// Calls
-+
-+void TemplateTable::count_calls(Register method, Register temp)
++void TemplateTable::saload()
 +{
-+  __ call_Unimplemented();
++  transition(itos, itos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11, kills t0
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1);
++  __ shadd(x10, x11, x10, t0, 1);
++  __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 +}
 +
-+void TemplateTable::prepare_invoke(int byte_no,
-+                                   Register method, // linked method (or i-klass)
-+                                   Register index,  // itable index, MethodType, etc.
-+                                   Register recv,   // if caller wants to see it
-+                                   Register flags   // if caller wants to test it
-+                                   ) {
-+  // determine flags
-+  const Bytecodes::Code code = bytecode();
-+  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
-+  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
-+  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
-+  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
-+  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
-+  const bool load_receiver       = (recv  != noreg);
-+  const bool save_flags          = (flags != noreg);
-+  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
-+  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
-+  assert(flags == noreg || flags == x13, "");
-+  assert(recv  == noreg || recv  == x12, "");
-+
-+  // setup registers & access constant pool cache
-+  if (recv  == noreg) {
-+    recv  = x12;
-+  }
-+  if (flags == noreg) {
-+    flags = x13;
-+  }
-+  assert_different_registers(method, index, recv, flags);
++void TemplateTable::iload(int n)
++{
++  transition(vtos, itos);
++  __ lw(x10, iaddress(n));
++}
 +
-+  // save 'interpreter return address'
-+  __ save_bcp();
++void TemplateTable::lload(int n)
++{
++  transition(vtos, ltos);
++  __ ld(x10, laddress(n));
++}
 +
-+  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
++void TemplateTable::fload(int n)
++{
++  transition(vtos, ftos);
++  __ flw(f10, faddress(n));
++}
 +
-+  // maybe push appendix to arguments (just before return address)
-+  if (is_invokedynamic || is_invokehandle) {
-+    Label L_no_push;
-+    __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::has_appendix_shift);
-+    __ beqz(t0, L_no_push);
-+    // Push the appendix as a trailing parameter.
-+    // This must be done before we get the receiver,
-+    // since the parameter_size includes it.
-+    __ push_reg(x9);
-+    __ mv(x9, index);
-+    assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
-+    __ load_resolved_reference_at_index(index, x9);
-+    __ pop_reg(x9);
-+    __ push_reg(index);  // push appendix (MethodType, CallSite, etc.)
-+    __ bind(L_no_push);
-+  }
++void TemplateTable::dload(int n)
++{
++  transition(vtos, dtos);
++  __ fld(f10, daddress(n));
++}
 +
-+  // load receiver if needed (note: no return address pushed yet)
-+  if (load_receiver) {
-+    __ andi(recv, flags, ConstantPoolCacheEntry::parameter_size_mask); // parameter_size_mask = 1 << 8
-+    __ shadd(t0, recv, esp, t0, 3);
-+    __ ld(recv, Address(t0, -Interpreter::expr_offset_in_bytes(1)));
-+    __ verify_oop(recv);
-+  }
++void TemplateTable::aload(int n)
++{
++  transition(vtos, atos);
++  __ ld(x10, iaddress(n));
++}
 +
-+  // compute return type
-+  __ slli(t1, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
-+  __ srli(t1, t1, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> t1:0~3
++void TemplateTable::aload_0() {
++  aload_0_internal();
++}
 +
-+  // load return address
-+  {
-+    const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
-+    __ mv(t0, table_addr);
-+    __ shadd(t0, t1, t0, t1, 3);
-+    __ ld(ra, Address(t0, 0));
-+  }
++void TemplateTable::nofast_aload_0() {
++  aload_0_internal(may_not_rewrite);
 +}
 +
-+void TemplateTable::invokevirtual_helper(Register index,
-+                                         Register recv,
-+                                         Register flags)
-+{
-+  // Uses temporary registers x10, x13
-+  assert_different_registers(index, recv, x10, x13);
-+  // Test for an invoke of a final method
-+  Label notFinal;
-+  __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::is_vfinal_shift);
-+  __ beqz(t0, notFinal);
++void TemplateTable::aload_0_internal(RewriteControl rc) {
++  // According to bytecode histograms, the pairs:
++  //
++  // _aload_0, _fast_igetfield
++  // _aload_0, _fast_agetfield
++  // _aload_0, _fast_fgetfield
++  //
++  // occur frequently. If RewriteFrequentPairs is set, the (slow)
++  // _aload_0 bytecode checks if the next bytecode is either
++  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
++  // rewrites the current bytecode into a pair bytecode; otherwise it
++  // rewrites the current bytecode into _fast_aload_0 that doesn't do
++  // the pair check anymore.
++  //
++  // Note: If the next bytecode is _getfield, the rewrite must be
++  //       delayed, otherwise we may miss an opportunity for a pair.
++  //
++  // Also rewrite frequent pairs
++  //   aload_0, aload_1
++  //   aload_0, iload_1
++  // These bytecodes with a small amount of code are most profitable
++  // to rewrite
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    const Register bc = x14;
 +
-+  const Register method = index;  // method must be xmethod
-+  assert(method == xmethod, "methodOop must be xmethod for interpreter calling convention");
++    // get next bytecode
++    __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
 +
-+  // do the call - the index is actually the method to call
-+  // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
++    // if _getfield then wait with rewrite
++    __ mv(t1, Bytecodes::Bytecodes::_getfield);
++    __ beq(x11, t1, done);
 +
-+  // It's final, need a null check here!
-+  __ null_check(recv);
++    // if _igetfield then rewrite to _fast_iaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
++    __ mv(t1, Bytecodes::_fast_igetfield);
++    __ mv(bc, Bytecodes::_fast_iaccess_0);
++    __ beq(x11, t1, rewrite);
 +
-+  // profile this call
-+  __ profile_final_call(x10);
-+  __ profile_arguments_type(x10, method, x14, true);
++    // if _agetfield then rewrite to _fast_aaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
++    __ mv(t1, Bytecodes::_fast_agetfield);
++    __ mv(bc, Bytecodes::_fast_aaccess_0);
++    __ beq(x11, t1, rewrite);
 +
-+  __ jump_from_interpreted(method);
++    // if _fgetfield then rewrite to _fast_faccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
++    __ mv(t1, Bytecodes::_fast_fgetfield);
++    __ mv(bc, Bytecodes::_fast_faccess_0);
++    __ beq(x11, t1, rewrite);
 +
-+  __ bind(notFinal);
++    // else rewrite to _fast_aload0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
++    __ mv(bc, Bytecodes::Bytecodes::_fast_aload_0);
 +
-+  // get receiver klass
-+  __ null_check(recv, oopDesc::klass_offset_in_bytes());
-+  __ load_klass(x10, recv);
++    // rewrite
++    // bc: new bytecode
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_aload_0, bc, x11, false);
 +
-+  // profile this call
-+  __ profile_virtual_call(x10, xlocals, x13);
++    __ bind(done);
++  }
 +
-+  // get target methodOop & entry point
-+  __ lookup_virtual_method(x10, index, method);
-+  __ profile_arguments_type(x13, method, x14, true);
-+  __ jump_from_interpreted(method);
++  // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
++  aload(0);
 +}
 +
-+void TemplateTable::invokevirtual(int byte_no)
++void TemplateTable::istore()
 +{
-+  transition(vtos, vtos);
-+  assert(byte_no == f2_byte, "use this argument");
++  transition(itos, vtos);
++  locals_index(x11);
++  __ sw(x10, iaddress(x11, t0, _masm));
++}
 +
-+  prepare_invoke(byte_no, xmethod, noreg, x12, x13);
++void TemplateTable::lstore()
++{
++  transition(ltos, vtos);
++  locals_index(x11);
++  __ sd(x10, laddress(x11, t0, _masm));
++}
 +
-+  // xmethod: index (actually a Method*)
-+  // x12: receiver
-+  // x13: flags
++void TemplateTable::fstore() {
++  transition(ftos, vtos);
++  locals_index(x11);
++  __ fsw(f10, iaddress(x11, t0, _masm));
++}
 +
-+  invokevirtual_helper(xmethod, x12, x13);
++void TemplateTable::dstore() {
++  transition(dtos, vtos);
++  locals_index(x11);
++  __ fsd(f10, daddress(x11, t0, _masm));
 +}
 +
-+void TemplateTable::invokespecial(int byte_no)
++void TemplateTable::astore()
 +{
 +  transition(vtos, vtos);
-+  assert(byte_no == f1_byte, "use this argument");
-+
-+  prepare_invoke(byte_no, xmethod, noreg,  // get f1 Method*
-+                 x12);  // get receiver also for null check
-+  __ verify_oop(x12);
-+  __ null_check(x12);
-+  // do the call
-+  __ profile_call(x10);
-+  __ profile_arguments_type(x10, xmethod, xbcp, false);
-+  __ jump_from_interpreted(xmethod);
++  __ pop_ptr(x10);
++  locals_index(x11);
++  __ sd(x10, aaddress(x11, t0, _masm));
 +}
 +
-+void TemplateTable::invokestatic(int byte_no)
-+{
++void TemplateTable::wide_istore() {
 +  transition(vtos, vtos);
-+  assert(byte_no == f1_byte, "use this arugment");
-+
-+  prepare_invoke(byte_no, xmethod);  // get f1 Method*
-+  // do the call
-+  __ profile_call(x10);
-+  __ profile_arguments_type(x10, xmethod, x14, false);
-+  __ jump_from_interpreted(xmethod);
++  __ pop_i();
++  locals_index_wide(x11);
++  __ sw(x10, iaddress(x11, t0, _masm));
 +}
 +
-+void TemplateTable::fast_invokevfinal(int byte_no)
-+{
-+  __ call_Unimplemented();
++void TemplateTable::wide_lstore() {
++  transition(vtos, vtos);
++  __ pop_l();
++  locals_index_wide(x11);
++  __ sd(x10, laddress(x11, t0, _masm));
 +}
 +
-+void TemplateTable::invokeinterface(int byte_no) {
++void TemplateTable::wide_fstore() {
 +  transition(vtos, vtos);
-+  assert(byte_no == f1_byte, "use this argument");
++  __ pop_f();
++  locals_index_wide(x11);
++  __ fsw(f10, faddress(x11, t0, _masm));
++}
 +
-+  prepare_invoke(byte_no, x10, xmethod,  // get f1 Klass*, f2 Method*
-+                 x12, x13);  // recv, flags
++void TemplateTable::wide_dstore() {
++  transition(vtos, vtos);
++  __ pop_d();
++  locals_index_wide(x11);
++  __ fsd(f10, daddress(x11, t0, _masm));
++}
 +
-+  // x10: interface klass (from f1)
-+  // xmethod: method (from f2)
-+  // x12: receiver
-+  // x13: flags
++void TemplateTable::wide_astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(x10);
++  locals_index_wide(x11);
++  __ sd(x10, aaddress(x11, t0, _masm));
++}
 +
-+  // First check for Object case, then private interface method,
-+  // then regular interface method.
++void TemplateTable::iastore() {
++  transition(itos, vtos);
++  __ pop_i(x11);
++  __ pop_ptr(x13);
++  // x10: value
++  // x11: index
++  // x13: array
++  index_check(x13, x11); // prefer index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
++  __ shadd(t0, x11, x13, t0, 2);
++  __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
++}
 +
-+  // Special case of invokeinterface called for virtual method of
-+  // java.lang.Object. See cpCache.cpp for details
-+  Label notObjectMethod;
-+  __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_forced_virtual_shift);
-+  __ beqz(t0, notObjectMethod);
++void TemplateTable::lastore() {
++  transition(ltos, vtos);
++  __ pop_i(x11);
++  __ pop_ptr(x13);
++  // x10: value
++  // x11: index
++  // x13: array
++  index_check(x13, x11); // prefer index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
++  __ shadd(t0, x11, x13, t0, 3);
++  __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
++}
 +
-+  invokevirtual_helper(xmethod, x12, x13);
-+  __ bind(notObjectMethod);
++void TemplateTable::fastore() {
++  transition(ftos, vtos);
++  __ pop_i(x11);
++  __ pop_ptr(x13);
++  // f10: value
++  // x11:  index
++  // x13:  array
++  index_check(x13, x11); // prefer index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
++  __ shadd(t0, x11, x13, t0, 2);
++  __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg);
++}
 +
-+  Label no_such_interface;
++void TemplateTable::dastore() {
++  transition(dtos, vtos);
++  __ pop_i(x11);
++  __ pop_ptr(x13);
++  // f10: value
++  // x11:  index
++  // x13:  array
++  index_check(x13, x11); // prefer index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
++  __ shadd(t0, x11, x13, t0, 3);
++  __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg);
++}
 +
-+  // Check for private method invocation - indicated by vfinal
-+  Label notVFinal;
-+  __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_vfinal_shift);
-+  __ beqz(t0, notVFinal);
++void TemplateTable::aastore() {
++  Label is_null, ok_is_subtype, done;
++  transition(vtos, vtos);
++  // stack: ..., array, index, value
++  __ ld(x10, at_tos());    // value
++  __ ld(x12, at_tos_p1()); // index
++  __ ld(x13, at_tos_p2()); // array
 +
-+  // Check receiver klass into x13 - also a null check
-+  __ null_check(x12, oopDesc::klass_offset_in_bytes());
-+  __ load_klass(x13, x12);
++  index_check(x13, x12);     // kills x11
++  __ add(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
++  __ shadd(x14, x14, x13, x14, LogBytesPerHeapOop);
 +
-+  Label subtype;
-+  __ check_klass_subtype(x13, x10, x14, subtype);
-+  // If we get here the typecheck failed
-+  __ j(no_such_interface);
-+  __ bind(subtype);
++  Address element_address(x14, 0);
 +
-+  __ profile_final_call(x10);
-+  __ profile_arguments_type(x10, xmethod, x14, true);
-+  __ jump_from_interpreted(xmethod);
++  // do array store check - check for NULL value first
++  __ beqz(x10, is_null);
 +
-+  __ bind(notVFinal);
++  // Move subklass into x11
++  __ load_klass(x11, x10);
++  // Move superklass into x10
++  __ load_klass(x10, x13);
++  __ ld(x10, Address(x10,
++                     ObjArrayKlass::element_klass_offset()));
++  // Compress array + index * oopSize + 12 into a single register.  Frees x12.
 +
-+  // Get receiver klass into x13 - also a null check
-+  __ restore_locals();
-+  __ null_check(x12, oopDesc::klass_offset_in_bytes());
-+  __ load_klass(x13, x12);
++  // Generate subtype check.  Blows x12, x15
++  // Superklass in x10.  Subklass in x11.
++  __ gen_subtype_check(x11, ok_is_subtype); //todo
 +
-+  Label no_such_method;
++  // Come here on failure
++  // object is at TOS
++  __ j(Interpreter::_throw_ArrayStoreException_entry);
 +
-+  // Preserve method for the throw_AbstractMethodErrorVerbose.
-+  __ mv(x28, xmethod);
-+  // Receiver subtype check against REFC.
-+  // Superklass in x10. Subklass in x13. Blows t1, x30
-+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
-+                             x13, x10, noreg,
-+                             // outputs: scan temp. reg, scan temp. reg
-+                             t1, x30,
-+                             no_such_interface,
-+                             /*return_method=*/false);
++  // Come here on success
++  __ bind(ok_is_subtype);
 +
-+  // profile this call
-+  __ profile_virtual_call(x13, x30, x9);
++  // Get the value we will store
++  __ ld(x10, at_tos());
++  // Now store using the appropriate barrier
++  do_oop_store(_masm, element_address, x10, IS_ARRAY);
++  __ j(done);
 +
-+  // Get declaring interface class from method, and itable index
-+  __ ld(x10, Address(xmethod, Method::const_offset()));
-+  __ ld(x10, Address(x10, ConstMethod::constants_offset()));
-+  __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes()));
-+  __ lwu(xmethod, Address(xmethod, Method::itable_index_offset()));
-+  __ subw(xmethod, xmethod, Method::itable_index_max);
-+  __ negw(xmethod, xmethod);
++  // Have a NULL in x10, x13=array, x12=index.  Store NULL at ary[idx]
++  __ bind(is_null);
++  __ profile_null_seen(x12);
 +
-+  // Preserve recvKlass for throw_AbstractMethodErrorVerbose
-+  __ mv(xlocals, x13);
-+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
-+                             xlocals, x10, xmethod,
-+                             // outputs: method, scan temp. reg
-+                             xmethod, x30,
-+                             no_such_interface);
++  // Store a NULL
++  do_oop_store(_masm, element_address, noreg, IS_ARRAY);
 +
-+  // xmethod: methodOop to call
-+  // x12: receiver
-+  // Check for abstract method error
-+  // Note: This should be done more efficiently via a throw_abstract_method_error
-+  //       interpreter entry point and a conditional jump to it in case of a null
-+  //       method.
-+  __ beqz(xmethod, no_such_method);
++  // Pop stack arguments
++  __ bind(done);
++  __ add(esp, esp, 3 * Interpreter::stackElementSize);
 +
-+  __ profile_arguments_type(x13, xmethod, x30, true);
++}
 +
-+  // do the call
-+  // x12: receiver
-+  // xmethod,: methodOop
-+  __ jump_from_interpreted(xmethod);
-+  __ should_not_reach_here();
++void TemplateTable::bastore()
++{
++  transition(itos, vtos);
++  __ pop_i(x11);
++  __ pop_ptr(x13);
++  // x10: value
++  // x11: index
++  // x13: array
++  index_check(x13, x11); // prefer index in x11
 +
-+  // exception handling code follows ...
-+  // note: must restore interpreter registers to canonical
-+  //       state for exception handling to work correctly!
++  // Need to check whether array is boolean or byte
++  // since both types share the bastore bytecode.
++  __ load_klass(x12, x13);
++  __ lwu(x12, Address(x12, Klass::layout_helper_offset()));
++  Label L_skip;
++  __ andi(t0, x12, Klass::layout_helper_boolean_diffbit());
++  __ beqz(t0, L_skip);
++  __ andi(x10, x10, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
++  __ bind(L_skip);
 +
-+  __ bind(no_such_method);
-+  // throw exception
-+  __ restore_bcp();    // bcp must be correct for exception handler   (was destroyed)
-+  __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
-+  // Pass arguments for generating a verbose error message.
-+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), x13, x28);
-+  // the call_VM checks for exception, so we should never return here.
-+  __ should_not_reach_here();
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
 +
-+  __ bind(no_such_interface);
-+  // throw exceptiong
-+  __ restore_bcp();    // bcp must be correct for exception handler   (was destroyed)
-+  __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
-+  // Pass arguments for generating a verbose error message.
-+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                     InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), x13, x10);
-+  // the call_VM checks for exception, so we should never return here.
-+  __ should_not_reach_here();
-+  return;
++  __ add(x11, x13, x11);
++  __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg);
 +}
 +
-+void TemplateTable::invokehandle(int byte_no) {
-+  transition(vtos, vtos);
-+  assert(byte_no == f1_byte, "use this argument");
++void TemplateTable::castore()
++{
++  transition(itos, vtos);
++  __ pop_i(x11);
++  __ pop_ptr(x13);
++  // x10: value
++  // x11: index
++  // x13: array
++  index_check(x13, x11); // prefer index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
++  __ shadd(t0, x11, x13, t0, 1);
++  __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
++}
 +
-+  prepare_invoke(byte_no, xmethod, x10, x12);
-+  __ verify_method_ptr(x12);
-+  __ verify_oop(x12);
-+  __ null_check(x12);
++void TemplateTable::sastore()
++{
++  castore();
++}
 +
-+  // FIXME: profile the LambdaForm also
++void TemplateTable::istore(int n)
++{
++  transition(itos, vtos);
++  __ sd(x10, iaddress(n));
++}
 +
-+  // x30 is safe to use here as a temp reg because it is about to
-+  // be clobbered by jump_from_interpreted().
-+  __ profile_final_call(x30);
-+  __ profile_arguments_type(x30, xmethod, x14, true);
++void TemplateTable::lstore(int n)
++{
++  transition(ltos, vtos);
++  __ sd(x10, laddress(n));
++}
 +
-+  __ jump_from_interpreted(xmethod);
++void TemplateTable::fstore(int n)
++{
++  transition(ftos, vtos);
++  __ fsw(f10, faddress(n));
 +}
 +
-+void TemplateTable::invokedynamic(int byte_no) {
-+  transition(vtos, vtos);
-+  assert(byte_no == f1_byte, "use this argument");
++void TemplateTable::dstore(int n)
++{
++  transition(dtos, vtos);
++  __ fsd(f10, daddress(n));
++}
 +
-+  prepare_invoke(byte_no, xmethod, x10);
++void TemplateTable::astore(int n)
++{
++  transition(vtos, vtos);
++  __ pop_ptr(x10);
++  __ sd(x10, iaddress(n));
++}
 +
-+  // x10: CallSite object (from cpool->resolved_references[])
-+  // xmethod: MH.linkToCallSite method (from f2)
++void TemplateTable::pop()
++{
++  transition(vtos, vtos);
++  __ addi(esp, esp, Interpreter::stackElementSize);
++}
 +
-+  // Note: x10_callsite is already pushed by prepare_invoke
++void TemplateTable::pop2()
++{
++  transition(vtos, vtos);
++  __ addi(esp, esp, 2 * Interpreter::stackElementSize);
++}
 +
-+  // %%% should make a type profile for any invokedynamic that takes a ref argument
-+  // profile this call
-+  __ profile_call(xbcp);
-+  __ profile_arguments_type(x13, xmethod, x30, false);
++void TemplateTable::dup()
++{
++  transition(vtos, vtos);
++  __ ld(x10, Address(esp, 0));
++  __ push_reg(x10);
++  // stack: ..., a, a
++}
 +
-+  __ verify_oop(x10);
++void TemplateTable::dup_x1()
++{
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ ld(x10, at_tos());  // load b
++  __ ld(x12, at_tos_p1());  // load a
++  __ sd(x10, at_tos_p1());  // store b
++  __ sd(x12, at_tos());  // store a
++  __ push_reg(x10);                  // push b
++  // stack: ..., b, a, b
++}
 +
-+  __ jump_from_interpreted(xmethod);
++void TemplateTable::dup_x2()
++{
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ ld(x10, at_tos());  // load c
++  __ ld(x12, at_tos_p2());  // load a
++  __ sd(x10, at_tos_p2());  // store c in a
++  __ push_reg(x10);      // push c
++  // stack: ..., c, b, c, c
++  __ ld(x10, at_tos_p2());  // load b
++  __ sd(x12, at_tos_p2());  // store a in b
++  // stack: ..., c, a, c, c
++  __ sd(x10, at_tos_p1());  // store b in c
++  // stack: ..., c, a, b, c
 +}
 +
-+//-----------------------------------------------------------------------------
-+// Allocation
++void TemplateTable::dup2()
++{
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ ld(x10, at_tos_p1());  // load a
++  __ push_reg(x10);                  // push a
++  __ ld(x10, at_tos_p1());  // load b
++  __ push_reg(x10);                  // push b
++  // stack: ..., a, b, a, b
++}
 +
-+void TemplateTable::_new() {
-+  transition(vtos, atos);
++void TemplateTable::dup2_x1()
++{
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ ld(x12, at_tos());     // load c
++  __ ld(x10, at_tos_p1());  // load b
++  __ push_reg(x10);             // push b
++  __ push_reg(x12);             // push c
++  // stack: ..., a, b, c, b, c
++  __ sd(x12, at_tos_p3());  // store c in b
++  // stack: ..., a, c, c, b, c
++  __ ld(x12, at_tos_p4());  // load a
++  __ sd(x12, at_tos_p2());  // store a in 2nd c
++  // stack: ..., a, c, a, b, c
++  __ sd(x10, at_tos_p4());  // store b in a
++  // stack: ..., b, c, a, b, c
++}
 +
-+  __ get_unsigned_2_byte_index_at_bcp(x13, 1);
-+  Label slow_case;
-+  Label done;
-+  Label initialize_header;
-+  Label initialize_object; // including clearing the fields
++void TemplateTable::dup2_x2()
++{
++  transition(vtos, vtos);
++  // stack: ..., a, b, c, d
++  __ ld(x12, at_tos());     // load d
++  __ ld(x10, at_tos_p1());  // load c
++  __ push_reg(x10);             // push c
++  __ push_reg(x12);             // push d
++  // stack: ..., a, b, c, d, c, d
++  __ ld(x10, at_tos_p4());  // load b
++  __ sd(x10, at_tos_p2());  // store b in d
++  __ sd(x12, at_tos_p4());  // store d in b
++  // stack: ..., a, d, c, b, c, d
++  __ ld(x12, at_tos_p5());  // load a
++  __ ld(x10, at_tos_p3());  // load c
++  __ sd(x12, at_tos_p3());  // store a in c
++  __ sd(x10, at_tos_p5());  // store c in a
++  // stack: ..., c, d, a, b, c, d
++}
 +
-+  __ get_cpool_and_tags(x14, x10);
-+  // Make sure the class we're about to instantiate has been resolved.
-+  // This is done before loading InstanceKlass to be consistent with the order
-+  // how Constant Pool is update (see ConstantPool::klass_at_put)
-+  const int tags_offset = Array<u1>::base_offset_in_bytes();
-+  __ add(t0, x10, x13);
-+  __ la(t0, Address(t0, tags_offset));
-+  __ membar(MacroAssembler::AnyAny);
-+  __ lbu(t0, t0);
-+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+  __ sub(t1, t0, (u1)JVM_CONSTANT_Class);
-+  __ bnez(t1, slow_case);
++void TemplateTable::swap()
++{
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ ld(x12, at_tos_p1());  // load a
++  __ ld(x10, at_tos());     // load b
++  __ sd(x12, at_tos());     // store a in b
++  __ sd(x10, at_tos_p1());  // store b in a
++  // stack: ..., b, a
++}
 +
-+  // get InstanceKlass
-+  __ load_resolved_klass_at_offset(x14, x13, x14, t0);
++void TemplateTable::iop2(Operation op)
++{
++  transition(itos, itos);
++  // x10 <== x11 op x10
++  __ pop_i(x11);
++  switch (op) {
++    case add  : __ addw(x10, x11, x10);  break;
++    case sub  : __ subw(x10, x11, x10);  break;
++    case mul  : __ mulw(x10, x11, x10);  break;
++    case _and : __ andrw(x10, x11, x10); break;
++    case _or  : __ orrw(x10, x11, x10);  break;
++    case _xor : __ xorrw(x10, x11, x10); break;
++    case shl  : __ sllw(x10, x11, x10);  break;
++    case shr  : __ sraw(x10, x11, x10);  break;
++    case ushr : __ srlw(x10, x11, x10);  break;
++    default   : ShouldNotReachHere();
++  }
++}
 +
-+  // make sure klass is initialized & doesn't have finalizer
-+  // make sure klass is fully initialized
-+  __ lbu(t0, Address(x14, InstanceKlass::init_state_offset()));
-+  __ sub(t1, t0, (u1)InstanceKlass::fully_initialized);
-+  __ bnez(t1, slow_case);
++void TemplateTable::lop2(Operation op)
++{
++  transition(ltos, ltos);
++  // x10 <== x11 op x10
++  __ pop_l(x11);
++  switch (op) {
++    case add  : __ add(x10, x11, x10);  break;
++    case sub  : __ sub(x10, x11, x10);  break;
++    case mul  : __ mul(x10, x11, x10);  break;
++    case _and : __ andr(x10, x11, x10); break;
++    case _or  : __ orr(x10, x11, x10);  break;
++    case _xor : __ xorr(x10, x11, x10); break;
++    default   : ShouldNotReachHere();
++  }
++}
 +
-+  // get instance_size in InstanceKlass (scaled to a count of bytes)
-+  __ lwu(x13, Address(x14, Klass::layout_helper_offset()));
-+  // test to see if it has a finalizer or is malformed in some way
-+  __ andi(t0, x13, Klass::_lh_instance_slow_path_bit);
-+  __ bnez(t0, slow_case);
++void TemplateTable::idiv()
++{
++  transition(itos, itos);
++  // explicitly check for div0
++  Label no_div0;
++  __ bnez(x10, no_div0);
++  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
++  __ jr(t0);
++  __ bind(no_div0);
++  __ pop_i(x11);
++  // x10 <== x11 idiv x10
++  __ corrected_idivl(x10, x11, x10, /* want_remainder */ false);
++}
 +
-+  // Allocate the instance:
-+  //  If TLAB is enabled:
-+  //    Try to allocate in the TLAB.
-+  //    If fails, go to the slow path.
-+  //  Else If inline contiguous allocations are enabled:
-+  //    Try to allocate in eden.
-+  //    If fails due to heap end, go to slow path
-+  //
-+  //  If TLAB is enabled OR inline contiguous is enabled:
-+  //    Initialize the allocation.
-+  //    Exit.
-+  //  Go to slow path.
-+  const bool allow_shared_alloc = Universe::heap()->supports_inline_contig_alloc();
++void TemplateTable::irem()
++{
++  transition(itos, itos);
++  // explicitly check for div0
++  Label no_div0;
++  __ bnez(x10, no_div0);
++  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
++  __ jr(t0);
++  __ bind(no_div0);
++  __ pop_i(x11);
++  // x10 <== x11 irem x10
++  __ corrected_idivl(x10, x11, x10, /* want_remainder */ true);
++}
 +
-+  if (UseTLAB) {
-+    __ tlab_allocate(x10, x13, 0, noreg, x11, slow_case);
++void TemplateTable::lmul()
++{
++  transition(ltos, ltos);
++  __ pop_l(x11);
++  __ mul(x10, x10, x11);
++}
 +
-+    if (ZeroTLAB) {
-+      // the fields have been already cleared
-+      __ j(initialize_header);
-+    } else {
-+      // initialize both the header and fields
-+      __ j(initialize_object);
-+    }
-+  } else {
-+    // Allocation in the shared Eden, if allowed.
-+    //
-+    // x13: instance size in bytes
-+    if (allow_shared_alloc) {
-+      __ eden_allocate(x10, x13, 0, x28, slow_case);
-+    }
-+  }
-+
-+  // If USETLAB or allow_shared_alloc are true, the object is created above and
-+  // there is an initialized need. Otherwise, skip and go to the slow path.
-+  if (UseTLAB || allow_shared_alloc) {
-+    // The object is initialized before the header. If the object size is
-+    // zero, go directly to the header initialization.
-+    __ bind(initialize_object);
-+    __ sub(x13, x13, sizeof(oopDesc));
-+    __ beqz(x13, initialize_header);
-+
-+    // Initialize obejct fields
-+    {
-+      __ add(x12, x10, sizeof(oopDesc));
-+      Label loop;
-+      __ bind(loop);
-+      __ sd(zr, Address(x12));
-+      __ add(x12, x12, BytesPerLong);
-+      __ sub(x13, x13, BytesPerLong);
-+      __ bnez(x13, loop);
-+    }
++void TemplateTable::ldiv()
++{
++  transition(ltos, ltos);
++  // explicitly check for div0
++  Label no_div0;
++  __ bnez(x10, no_div0);
++  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
++  __ jr(t0);
++  __ bind(no_div0);
++  __ pop_l(x11);
++  // x10 <== x11 ldiv x10
++  __ corrected_idivq(x10, x11, x10, /* want_remainder */ false);
++}
 +
-+    // initialize object hader only.
-+    __ bind(initialize_header);
-+    if (UseBiasedLocking) {
-+      __ ld(t0, Address(x14, Klass::prototype_header_offset()));
-+    } else {
-+      __ mv(t0, (intptr_t)markOopDesc::prototype());
-+    }
-+    __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes()));
-+    __ store_klass_gap(x10, zr);   // zero klass gap for compressed oops
-+    __ store_klass(x10, x14);      // store klass last
++void TemplateTable::lrem()
++{
++  transition(ltos, ltos);
++  // explicitly check for div0
++  Label no_div0;
++  __ bnez(x10, no_div0);
++  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
++  __ jr(t0);
++  __ bind(no_div0);
++  __ pop_l(x11);
++  // x10 <== x11 lrem x10
++  __ corrected_idivq(x10, x11, x10, /* want_remainder */ true);
++}
 +
-+    {
-+      SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
-+      // Trigger dtrace event for fastpath
-+      __ push(atos); // save the return value
-+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10);
-+      __ pop(atos); // restore the return value
-+    }
-+    __ j(done);
-+  }
++void TemplateTable::lshl()
++{
++  transition(itos, ltos);
++  // shift count is in x10
++  __ pop_l(x11);
++  __ sll(x10, x11, x10);
++}
 +
-+  // slow case
-+  __ bind(slow_case);
-+  __ get_constant_pool(c_rarg1);
-+  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
-+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2);
-+  __ verify_oop(x10);
++void TemplateTable::lshr()
++{
++  transition(itos, ltos);
++  // shift count is in x10
++  __ pop_l(x11);
++  __ sra(x10, x11, x10);
++}
 +
-+  // continue
-+  __ bind(done);
-+  // Must prevent reordering of stores for object initialization with stores that publish the new object.
-+  __ membar(MacroAssembler::StoreStore);
++void TemplateTable::lushr()
++{
++  transition(itos, ltos);
++  // shift count is in x10
++  __ pop_l(x11);
++  __ srl(x10, x11, x10);
 +}
 +
-+void TemplateTable::newarray() {
-+  transition(itos, atos);
-+  __ load_unsigned_byte(c_rarg1, at_bcp(1));
-+  __ mv(c_rarg2, x10);
-+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
-+          c_rarg1, c_rarg2);
-+  // Must prevent reordering of stores for object initialization with stores that publish the new object.
-+  __ membar(MacroAssembler::StoreStore);
++void TemplateTable::fop2(Operation op)
++{
++  transition(ftos, ftos);
++  switch (op) {
++    case add:
++      __ pop_f(f11);
++      __ fadd_s(f10, f11, f10);
++      break;
++    case sub:
++      __ pop_f(f11);
++      __ fsub_s(f10, f11, f10);
++      break;
++    case mul:
++      __ pop_f(f11);
++      __ fmul_s(f10, f11, f10);
++      break;
++    case div:
++      __ pop_f(f11);
++      __ fdiv_s(f10, f11, f10);
++      break;
++    case rem:
++      __ fmv_s(f11, f10);
++      __ pop_f(f10);
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem));
++      break;
++    default:
++      ShouldNotReachHere();
++  }
 +}
 +
-+void TemplateTable::anewarray() {
-+  transition(itos, atos);
-+  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
-+  __ get_constant_pool(c_rarg1);
-+  __ mv(c_rarg3, x10);
-+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
-+          c_rarg1, c_rarg2, c_rarg3);
-+  // Must prevent reordering of stores for object initialization with stores that publish the new object.
-+  __ membar(MacroAssembler::StoreStore);
++void TemplateTable::dop2(Operation op)
++{
++  transition(dtos, dtos);
++  switch (op) {
++    case add:
++      __ pop_d(f11);
++      __ fadd_d(f10, f11, f10);
++      break;
++    case sub:
++      __ pop_d(f11);
++      __ fsub_d(f10, f11, f10);
++      break;
++    case mul:
++      __ pop_d(f11);
++      __ fmul_d(f10, f11, f10);
++      break;
++    case div:
++      __ pop_d(f11);
++      __ fdiv_d(f10, f11, f10);
++      break;
++    case rem:
++      __ fmv_d(f11, f10);
++      __ pop_d(f10);
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem));
++      break;
++    default:
++      ShouldNotReachHere();
++  }
 +}
 +
-+void TemplateTable::arraylength() {
-+  transition(atos, itos);
-+  __ null_check(x10, arrayOopDesc::length_offset_in_bytes());
-+  __ lwu(x10, Address(x10, arrayOopDesc::length_offset_in_bytes()));
++void TemplateTable::ineg()
++{
++  transition(itos, itos);
++  __ negw(x10, x10);
 +}
 +
-+void TemplateTable::checkcast()
++void TemplateTable::lneg()
 +{
-+  transition(atos, atos);
-+  Label done, is_null, ok_is_subtype, quicked, resolved;
-+  __ beqz(x10, is_null);
++  transition(ltos, ltos);
++  __ neg(x10, x10);
++}
 +
-+  // Get cpool & tags index
-+  __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array
-+  __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index
-+  // See if bytecode has already been quicked
-+  __ add(t0, x13, Array<u1>::base_offset_in_bytes());
-+  __ add(x11, t0, x9);
-+  __ membar(MacroAssembler::AnyAny);
-+  __ lbu(x11, x11);
-+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+  __ sub(t0, x11, (u1)JVM_CONSTANT_Class);
-+  __ beqz(t0, quicked);
++void TemplateTable::fneg()
++{
++  transition(ftos, ftos);
++  __ fneg_s(f10, f10);
++}
 +
-+  __ push(atos); // save receiver for result, and for GC
-+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
-+  // vm_result_2 has metadata result
-+  __ get_vm_result_2(x10, xthread);
-+  __ pop_reg(x13); // restore receiver
-+  __ j(resolved);
++void TemplateTable::dneg()
++{
++  transition(dtos, dtos);
++  __ fneg_d(f10, f10);
++}
 +
-+  // Get superklass in x10 and subklass in x13
-+  __ bind(quicked);
-+  __ mv(x13, x10); // Save object in x13; x10 needed for subtype check
-+  __ load_resolved_klass_at_offset(x12, x9, x10, t0); // x10 = klass
++void TemplateTable::iinc()
++{
++  transition(vtos, vtos);
++  __ load_signed_byte(x11, at_bcp(2)); // get constant
++  locals_index(x12);
++  __ ld(x10, iaddress(x12, x10, _masm));
++  __ addw(x10, x10, x11);
++  __ sd(x10, iaddress(x12, t0, _masm));
++}
 +
-+  __ bind(resolved);
-+  __ load_klass(x9, x13);
++void TemplateTable::wide_iinc()
++{
++  transition(vtos, vtos);
++  __ lwu(x11, at_bcp(2)); // get constant and index
++  __ revb_h_w_u(x11, x11); // reverse bytes in half-word (32bit) and zero-extend
++  __ zero_extend(x12, x11, 16);
++  __ neg(x12, x12);
++  __ slli(x11, x11, 32);
++  __ srai(x11, x11, 48);
++  __ ld(x10, iaddress(x12, t0, _masm));
++  __ addw(x10, x10, x11);
++  __ sd(x10, iaddress(x12, t0, _masm));
++}
 +
-+  // Generate subtype check.  Blows x12, x15.  Object in x13.
-+  // Superklass in x10.  Subklass in x9.
-+  __ gen_subtype_check(x9, ok_is_subtype);
++void TemplateTable::convert()
++{
++  // Checking
++#ifdef ASSERT
++  {
++    TosState tos_in  = ilgl;
++    TosState tos_out = ilgl;
++    switch (bytecode()) {
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_in = itos; break;
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_l2d: tos_in = ltos; break;
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_f2d: tos_in = ftos; break;
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_d2l: // fall through
++      case Bytecodes::_d2f: tos_in = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    switch (bytecode()) {
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_out = itos; break;
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_d2l: tos_out = ltos; break;
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_d2f: tos_out = ftos; break;
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_l2d: // fall through
++      case Bytecodes::_f2d: tos_out = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    transition(tos_in, tos_out);
++  }
++#endif // ASSERT
 +
-+  // Come here on failure
-+  __ push_reg(x13);
-+  // object is at TOS
-+  __ j(Interpreter::_throw_ClassCastException_entry);
++  // Conversion
++  switch (bytecode()) {
++    case Bytecodes::_i2l:
++      __ sign_extend(x10, x10, 32);
++      break;
++    case Bytecodes::_i2f:
++      __ fcvt_s_w(f10, x10);
++      break;
++    case Bytecodes::_i2d:
++      __ fcvt_d_w(f10, x10);
++      break;
++    case Bytecodes::_i2b:
++      __ sign_extend(x10, x10, 8);
++      break;
++    case Bytecodes::_i2c:
++      __ zero_extend(x10, x10, 16);
++      break;
++    case Bytecodes::_i2s:
++      __ sign_extend(x10, x10, 16);
++      break;
++    case Bytecodes::_l2i:
++      __ addw(x10, x10, zr);
++      break;
++    case Bytecodes::_l2f:
++      __ fcvt_s_l(f10, x10);
++      break;
++    case Bytecodes::_l2d:
++      __ fcvt_d_l(f10, x10);
++      break;
++    case Bytecodes::_f2i:
++      __ fcvt_w_s_safe(x10, f10);
++      break;
++    case Bytecodes::_f2l:
++      __ fcvt_l_s_safe(x10, f10);
++      break;
++    case Bytecodes::_f2d:
++      __ fcvt_d_s(f10, f10);
++      break;
++    case Bytecodes::_d2i:
++      __ fcvt_w_d_safe(x10, f10);
++      break;
++    case Bytecodes::_d2l:
++      __ fcvt_l_d_safe(x10, f10);
++      break;
++    case Bytecodes::_d2f:
++      __ fcvt_s_d(f10, f10);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
 +
-+  // Come here on success
-+  __ bind(ok_is_subtype);
-+  __ mv(x10, x13); // Restore object in x13
++void TemplateTable::lcmp()
++{
++  transition(ltos, itos);
++  __ pop_l(x11);
++  __ cmp_l2i(t0, x11, x10);
++  __ mv(x10, t0);
++}
 +
-+  // Collect counts on whether this test sees NULLs a lot or not.
-+  if (ProfileInterpreter) {
-+    __ j(done);
-+    __ bind(is_null);
-+    __ profile_null_seen(x12);
++void TemplateTable::float_cmp(bool is_float, int unordered_result)
++{
++  // For instruction feq, flt and fle, the result is 0 if either operand is NaN
++  if (is_float) {
++    __ pop_f(f11);
++    // if unordered_result < 0:
++    //   we want -1 for unordered or less than, 0 for equal and 1 for
++    //   greater than.
++    // else:
++    //   we want -1 for less than, 0 for equal and 1 for unordered or
++    //   greater than.
++    // f11 primary, f10 secondary
++    __ float_compare(x10, f11, f10, unordered_result);
 +  } else {
-+    __ bind(is_null);   // same as 'done'
++    __ pop_d(f11);
++    // if unordered_result < 0:
++    //   we want -1 for unordered or less than, 0 for equal and 1 for
++    //   greater than.
++    // else:
++    //   we want -1 for less than, 0 for equal and 1 for unordered or
++    //   greater than.
++    // f11 primary, f10 secondary
++    __ double_compare(x10, f11, f10, unordered_result);
 +  }
-+  __ bind(done);
 +}
 +
-+void TemplateTable::instanceof() {
-+  transition(atos, itos);
-+  Label done, is_null, ok_is_subtype, quicked, resolved;
-+  __ beqz(x10, is_null);
-+
-+  // Get cpool & tags index
-+  __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array
-+  __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index
-+  // See if bytecode has already been quicked
-+  __ add(t0, x13, Array<u1>::base_offset_in_bytes());
-+  __ add(x11, t0, x9);
-+  __ membar(MacroAssembler::AnyAny);
-+  __ lbu(x11, x11);
-+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+  __ sub(t0, x11, (u1)JVM_CONSTANT_Class);
-+  __ beqz(t0, quicked);
-+
-+  __ push(atos); // save receiver for result, and for GC
-+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
-+  // vm_result_2 has metadata result
-+  __ get_vm_result_2(x10, xthread);
-+  __ pop_reg(x13); // restore receiver
-+  __ verify_oop(x13);
-+  __ load_klass(x13, x13);
-+  __ j(resolved);
-+
-+  // Get superklass in x10 and subklass in x13
-+  __ bind(quicked);
-+  __ load_klass(x13, x10);
-+  __ load_resolved_klass_at_offset(x12, x9, x10, t0);
-+
-+  __ bind(resolved);
-+
-+  // Generate subtype check.  Blows x12, x15
-+  // Superklass in x10.  Subklass in x13.
-+  __ gen_subtype_check(x13, ok_is_subtype);
++void TemplateTable::branch(bool is_jsr, bool is_wide)
++{
++  // We might be moving to a safepoint.  The thread which calls
++  // Interpreter::notice_safepoints() will effectively flush its cache
++  // when it makes a system call, but we need to do something to
++  // ensure that we see the changed dispatch table.
++  __ membar(MacroAssembler::LoadLoad);
 +
-+  // Come here on failure
-+  __ mv(x10, zr);
-+  __ j(done);
-+  // Come here on success
-+  __ bind(ok_is_subtype);
-+  __ mv(x10, 1);
++  __ profile_taken_branch(x10, x11);
++  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
++                             InvocationCounter::counter_offset();
++  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
++                              InvocationCounter::counter_offset();
 +
-+  // Collect counts on whether this test sees NULLs a lot or not.
-+  if (ProfileInterpreter) {
-+    __ j(done);
-+    __ bind(is_null);
-+    __ profile_null_seen(x12);
++  // load branch displacement
++  if (!is_wide) {
++    __ lhu(x12, at_bcp(1));
++    __ revb_h_h(x12, x12); // reverse bytes in half-word and sign-extend
 +  } else {
-+    __ bind(is_null);   // same as 'done'
++    __ lwu(x12, at_bcp(1));
++    __ revb_w_w(x12, x12); // reverse bytes in word and sign-extend
 +  }
-+  __ bind(done);
-+  // x10 = 0: obj == NULL or  obj is not an instanceof the specified klass
-+  // x10 = 1: obj != NULL and obj is     an instanceof the specified klass
-+}
 +
-+//-----------------------------------------------------------------------------
-+// Breakpoints
-+void TemplateTable::_breakpoint() {
-+  // Note: We get here even if we are single stepping..
-+  // jbug inists on setting breakpoints at every bytecode
-+  // even if we are in single step mode.
++  // Handle all the JSR stuff here, then exit.
++  // It's much shorter and cleaner than intermingling with the non-JSR
++  // normal-branch stuff occurring below.
 +
-+  transition(vtos, vtos);
++  if (is_jsr) {
++    // compute return address as bci
++    __ ld(t1, Address(xmethod, Method::const_offset()));
++    __ add(t1, t1,
++           in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3));
++    __ sub(x11, xbcp, t1);
++    __ push_i(x11);
++    // Adjust the bcp by the 16-bit displacement in x12
++    __ add(xbcp, xbcp, x12);
++    __ load_unsigned_byte(t0, Address(xbcp, 0));
++    // load the next target bytecode into t0, it is the argument of dispatch_only
++    __ dispatch_only(vtos, /*generate_poll*/true);
++    return;
++  }
 +
-+  // get the unpatched byte code
-+  __ get_method(c_rarg1);
-+  __ call_VM(noreg,
-+             CAST_FROM_FN_PTR(address,
-+                              InterpreterRuntime::get_original_bytecode_at),
-+             c_rarg1, xbcp);
-+  __ mv(x9, x10);
++  // Normal (non-jsr) branch handling
 +
-+  // post the breakpoint event
-+  __ call_VM(noreg,
-+             CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
-+             xmethod, xbcp);
++  // Adjust the bcp by the displacement in x12
++  __ add(xbcp, xbcp, x12);
 +
-+  // complete the execution of original bytecode
-+  __ mv(t0, x9);
-+  __ dispatch_only_normal(vtos);
-+}
++  assert(UseLoopCounter || !UseOnStackReplacement,
++         "on-stack-replacement requires loop counters");
++  Label backedge_counter_overflow;
++  Label dispatch;
++  if (UseLoopCounter) {
++    // increment backedge counter for backward branches
++    // x10: MDO
++    // x11: MDO bumped taken-count
++    // x12: target offset
++    __ bgtz(x12, dispatch); // count only if backward branch
 +
-+//-----------------------------------------------------------------------------
-+// Exceptions
-+
-+void TemplateTable::athrow() {
-+  transition(atos, vtos);
-+  __ null_check(x10);
-+  __ j(Interpreter::throw_exception_entry());
-+}
++    // check if MethodCounters exists
++    Label has_counters;
++    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
++    __ bnez(t0, has_counters);
++    __ push_reg(x10);
++    __ push_reg(x11);
++    __ push_reg(x12);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++            InterpreterRuntime::build_method_counters), xmethod);
++    __ pop_reg(x12);
++    __ pop_reg(x11);
++    __ pop_reg(x10);
++    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
++    __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory
++    __ bind(has_counters);
 +
-+//-----------------------------------------------------------------------------
-+// Synchronization
-+//
-+// Note: monitorenter & exit are symmetric routines; which is reflected
-+//       in the assembly code structure as well
-+//
-+// Stack layout:
-+//
-+// [expressions  ] <--- esp               = expression stack top
-+// ..
-+// [expressions  ]
-+// [monitor entry] <--- monitor block top = expression stack bot
-+// ..
-+// [monitor entry]
-+// [frame data   ] <--- monitor block bot
-+// ...
-+// [saved fp     ] <--- fp
-+void TemplateTable::monitorenter()
-+{
-+  transition(atos, vtos);
++    Label no_mdo;
++    int increment = InvocationCounter::count_increment;
++    if (ProfileInterpreter) {
++      // Are we profiling?
++      __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
++      __ beqz(x11, no_mdo);
++      // Increment the MDO backedge counter
++      const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
++                                         in_bytes(InvocationCounter::counter_offset()));
++      const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
++      __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
++                                 x10, t0, false,
++                                 UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
++      __ j(dispatch);
++    }
++    __ bind(no_mdo);
++    // Increment backedge counter in MethodCounters*
++    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
++    const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset()));
++    __ increment_mask_and_jump(Address(t0, be_offset), increment, mask,
++                               x10, t1, false,
++                               UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
++    __ bind(dispatch);
++  }
 +
-+   // check for NULL object
-+   __ null_check(x10);
++  // Pre-load the next target bytecode into t0
++  __ load_unsigned_byte(t0, Address(xbcp, 0));
 +
-+   const Address monitor_block_top(
-+         fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
-+   const Address monitor_block_bot(
-+         fp, frame::interpreter_frame_initial_sp_offset * wordSize);
-+   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++  // continue with the bytecode @ target
++  // t0: target bytecode
++  // xbcp: target bcp
++  __ dispatch_only(vtos, /*generate_poll*/true);
 +
-+   Label allocated;
++  if (UseLoopCounter && UseOnStackReplacement) {
++    // invocation counter overflow
++    __ bind(backedge_counter_overflow);
++    __ neg(x12, x12);
++    __ add(x12, x12, xbcp);     // branch xbcp
++    // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::frequency_counter_overflow),
++               x12);
++    __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
 +
-+   // initialize entry pointer
-+   __ mv(c_rarg1, zr); // points to free slot or NULL
++    // x10: osr nmethod (osr ok) or NULL (osr not possible)
++    // w11: target bytecode
++    // x12: temporary
++    __ beqz(x10, dispatch);     // test result -- no osr if null
++    // nmethod may have been invalidated (VM may block upon call_VM return)
++    __ lbu(x12, Address(x10, nmethod::state_offset()));
++    if (nmethod::in_use != 0) {
++      __ sub(x12, x12, nmethod::in_use);
++    }
++    __ bnez(x12, dispatch);
 +
-+   // find a free slot in the monitor block (result in c_rarg1)
-+   {
-+     Label entry, loop, exit, notUsed;
-+     __ ld(c_rarg3, monitor_block_top); // points to current entry,
-+                                        // starting with top-most entry
-+     __ la(c_rarg2, monitor_block_bot); // points to word before bottom
++    // We have the address of an on stack replacement routine in x10
++    // We need to prepare to execute the OSR method. First we must
++    // migrate the locals and monitors off of the stack.
 +
-+     __ j(entry);
++    __ mv(x9, x10);                             // save the nmethod
 +
-+     __ bind(loop);
-+     // check if current entry is used
-+     // if not used then remember entry in c_rarg1
-+     __ ld(t0, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()));
-+     __ bnez(t0, notUsed);
-+     __ mv(c_rarg1, c_rarg3);
-+     __ bind(notUsed);
-+     // check if current entry is for same object
-+     // if same object then stop searching
-+     __ beq(x10, t0, exit);
-+     // otherwise advance to next entry
-+     __ add(c_rarg3, c_rarg3, entry_size);
-+     __ bind(entry);
-+     // check if bottom reached
-+     // if not at bottom then check this entry
-+     __ bne(c_rarg3, c_rarg2, loop);
-+     __ bind(exit);
-+   }
++    call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
 +
-+   __ bnez(c_rarg1, allocated); // check if a slot has been found and
-+                             // if found, continue with that on
++    // x10 is OSR buffer, move it to expected parameter location
++    __ mv(j_rarg0, x10);
 +
-+   // allocate one if there's no free slot
-+   {
-+     Label entry, loop;
-+     // 1. compute new pointers            // esp: old expression stack top
-+     __ ld(c_rarg1, monitor_block_bot);    // c_rarg1: old expression stack bottom
-+     __ sub(esp, esp, entry_size);         // move expression stack top
-+     __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom
-+     __ mv(c_rarg3, esp);                  // set start value for copy loop
-+     __ sd(c_rarg1, monitor_block_bot);    // set new monitor block bottom
-+     __ sub(sp, sp, entry_size);           // make room for the monitor
++    // remove activation
++    // get sender esp
++    __ ld(esp,
++        Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
++    // remove frame anchor
++    __ leave();
++    // Ensure compiled code always sees stack at proper alignment
++    __ andi(sp, esp, -16);
 +
-+     __ j(entry);
-+     // 2. move expression stack contents
-+     __ bind(loop);
-+     __ ld(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack
-+                                                   // word from old location
-+     __ sd(c_rarg2, Address(c_rarg3, 0));          // and store it at new location
-+     __ add(c_rarg3, c_rarg3, wordSize);           // advance to next word
-+     __ bind(entry);
-+     __ bne(c_rarg3, c_rarg1, loop);    // check if bottom reached.if not at bottom
-+                                        // then copy next word
-+   }
++    // and begin the OSR nmethod
++    __ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
++    __ jr(t0);
++  }
++}
 +
-+   // call run-time routine
-+   // c_rarg1: points to monitor entry
-+   __ bind(allocated);
++void TemplateTable::if_0cmp(Condition cc)
++{
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
 +
-+   // Increment bcp to point to the next bytecode, so exception
-+   // handling for async. exceptions work correctly.
-+   // The object has already been poped from the stack, so the
-+   // expression stack looks correct.
-+   __ addi(xbcp, xbcp, 1);
++  __ addw(x10, x10, zr);
++  switch (cc) {
++    case equal:
++      __ bnez(x10, not_taken);
++      break;
++    case not_equal:
++      __ beqz(x10, not_taken);
++      break;
++    case less:
++      __ bgez(x10, not_taken);
++      break;
++    case less_equal:
++      __ bgtz(x10, not_taken);
++      break;
++    case greater:
++      __ blez(x10, not_taken);
++      break;
++    case greater_equal:
++      __ bltz(x10, not_taken);
++      break;
++    default:
++      break;
++  }
 +
-+   // store object
-+   __ sd(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
-+   __ lock_object(c_rarg1);
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(x10);
++}
 +
-+   // check to make sure this monitor doesn't cause stack overflow after locking
-+   __ save_bcp();  // in case of exception
-+   __ generate_stack_overflow_check(0);
++void TemplateTable::if_icmp(Condition cc)
++{
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  __ pop_i(x11);
++  __ addw(x10, x10, zr);
++  switch (cc) {
++    case equal:
++      __ bne(x11, x10, not_taken);
++      break;
++    case not_equal:
++      __ beq(x11, x10, not_taken);
++      break;
++    case less:
++      __ bge(x11, x10, not_taken);
++      break;
++    case less_equal:
++      __ bgt(x11, x10, not_taken);
++      break;
++    case greater:
++      __ ble(x11, x10, not_taken);
++      break;
++    case greater_equal:
++      __ blt(x11, x10, not_taken);
++      break;
++    default:
++      break;
++  }
 +
-+   // The bcp has already been incremented. Just need to dispatch to
-+   // next instruction.
-+   __ dispatch_next(vtos);
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(x10);
 +}
 +
-+void TemplateTable::monitorexit()
++void TemplateTable::if_nullcmp(Condition cc)
 +{
 +  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  if (cc == equal) {
++    __ bnez(x10, not_taken);
++  } else {
++    __ beqz(x10, not_taken);
++  }
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(x10);
++}
 +
-+  // check for NULL object
-+  __ null_check(x10);
-+
-+  const Address monitor_block_top(
-+        fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
-+  const Address monitor_block_bot(
-+        fp, frame::interpreter_frame_initial_sp_offset * wordSize);
-+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
-+
-+  Label found;
-+
-+  // find matching slot
-+  {
-+    Label entry, loop;
-+    __ ld(c_rarg1, monitor_block_top); // points to current entry,
-+                                        // starting with top-most entry
-+    __ la(c_rarg2, monitor_block_bot); // points to word before bottom
-+                                        // of monitor block
-+    __ j(entry);
++void TemplateTable::if_acmp(Condition cc)
++{
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  __ pop_ptr(x11);
 +
-+    __ bind(loop);
-+    // check if current entry is for same object
-+    __ ld(t0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
-+    // if same object then stop searching
-+    __ beq(x10, t0, found);
-+    // otherwise advance to next entry
-+    __ add(c_rarg1, c_rarg1, entry_size);
-+    __ bind(entry);
-+    // check if bottom reached
-+    // if not at bottom then check this entry
-+    __ bne(c_rarg1, c_rarg2, loop);
++  if (cc == equal) {
++    __ bne(x11, x10, not_taken);
++  } else if (cc == not_equal) {
++    __ beq(x11, x10, not_taken);
 +  }
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(x10);
++}
 +
-+  // error handling. Unlocking was not block-structured
-+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                   InterpreterRuntime::throw_illegal_monitor_state_exception));
-+  __ should_not_reach_here();
++void TemplateTable::ret() {
++  transition(vtos, vtos);
++  // We might be moving to a safepoint.  The thread which calls
++  // Interpreter::notice_safepoints() will effectively flush its cache
++  // when it makes a system call, but we need to do something to
++  // ensure that we see the changed dispatch table.
++  __ membar(MacroAssembler::LoadLoad);
 +
-+  // call run-time routine
-+  __ bind(found);
-+  __ push_ptr(x10); // make sure object is on stack (contract with oopMaps)
-+  __ unlock_object(c_rarg1);
-+  __ pop_ptr(x10); // discard object
++  locals_index(x11);
++  __ ld(x11, aaddress(x11, t1, _masm)); // get return bci, compute return bcp
++  __ profile_ret(x11, x12);
++  __ ld(xbcp, Address(xmethod, Method::const_offset()));
++  __ add(xbcp, xbcp, x11);
++  __ addi(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));
++  __ dispatch_next(vtos, 0, /*generate_poll*/true);
 +}
 +
-+// Wide instructions
-+void TemplateTable::wide()
-+{
-+  __ load_unsigned_byte(x9, at_bcp(1));
-+  __ mv(t0, (address)Interpreter::_wentry_point);
-+  __ shadd(t0, x9, t0, t1, 3);
-+  __ ld(t0, Address(t0));
-+  __ jr(t0);
++void TemplateTable::wide_ret() {
++  transition(vtos, vtos);
++  locals_index_wide(x11);
++  __ ld(x11, aaddress(x11, t0, _masm)); // get return bci, compute return bcp
++  __ profile_ret(x11, x12);
++  __ ld(xbcp, Address(xmethod, Method::const_offset()));
++  __ add(xbcp, xbcp, x11);
++  __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));
++  __ dispatch_next(vtos, 0, /*generate_poll*/true);
 +}
 +
-+// Multi arrays
-+void TemplateTable::multianewarray() {
-+  transition(vtos, atos);
-+  __ load_unsigned_byte(x10, at_bcp(3)); // get number of dimensions
-+  // last dim is on top of stack; we want address of first one:
-+  // first_addr = last_addr + (ndims - 1) * wordSize
-+  __ shadd(c_rarg1, x10, esp, c_rarg1, 3);
-+  __ sub(c_rarg1, c_rarg1, wordSize);
-+  call_VM(x10,
-+          CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
-+          c_rarg1);
-+  __ load_unsigned_byte(x11, at_bcp(3));
-+  __ shadd(esp, x11, esp, t0, 3);
++void TemplateTable::tableswitch() {
++  Label default_case, continue_execution;
++  transition(itos, vtos);
++  // align xbcp
++  __ la(x11, at_bcp(BytesPerInt));
++  __ andi(x11, x11, -BytesPerInt);
++  // load lo & hi
++  __ lwu(x12, Address(x11, BytesPerInt));
++  __ lwu(x13, Address(x11, 2 * BytesPerInt));
++  __ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend
++  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
++  // check against lo & hi
++  __ blt(x10, x12, default_case);
++  __ bgt(x10, x13, default_case);
++  // lookup dispatch offset
++  __ subw(x10, x10, x12);
++  __ shadd(x13, x10, x11, t0, 2);
++  __ lwu(x13, Address(x13, 3 * BytesPerInt));
++  __ profile_switch_case(x10, x11, x12);
++  // continue execution
++  __ bind(continue_execution);
++  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
++  __ add(xbcp, xbcp, x13);
++  __ load_unsigned_byte(t0, Address(xbcp));
++  __ dispatch_only(vtos, /*generate_poll*/true);
++  // handle default
++  __ bind(default_case);
++  __ profile_switch_default(x10);
++  __ lwu(x13, Address(x11, 0));
++  __ j(continue_execution);
 +}
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
-new file mode 100644
-index 000000000..b437c8f4c
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
-@@ -0,0 +1,42 @@
-+/*
-+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#ifndef CPU_RISCV_TEMPLATETABLE_RISCV_HPP
-+#define CPU_RISCV_TEMPLATETABLE_RISCV_HPP
++void TemplateTable::lookupswitch() {
++  transition(itos, itos);
++  __ stop("lookupswitch bytecode should have been rewritten");
++}
 +
-+static void prepare_invoke(int byte_no,
-+                           Register method,         // linked method (or i-klass)
-+                           Register index = noreg,  // itable index, MethodType, etc.
-+                           Register recv  = noreg,  // if caller wants to see it
-+                           Register flags = noreg   // if caller wants to test it
-+                           );
-+static void invokevirtual_helper(Register index, Register recv,
-+                                 Register flags);
++void TemplateTable::fast_linearswitch() {
++  transition(itos, vtos);
++  Label loop_entry, loop, found, continue_execution;
++  // bswap x10 so we can avoid bswapping the table entries
++  __ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend
++  // align xbcp
++  __ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of
++                                    // this instruction (change offsets
++                                    // below)
++  __ andi(x9, x9, -BytesPerInt);
++  // set counter
++  __ lwu(x11, Address(x9, BytesPerInt));
++  __ revb_w(x11, x11);
++  __ j(loop_entry);
++  // table search
++  __ bind(loop);
++  __ shadd(t0, x11, x9, t0, 3);
++  __ lw(t0, Address(t0, 2 * BytesPerInt));
++  __ beq(x10, t0, found);
++  __ bind(loop_entry);
++  __ addi(x11, x11, -1);
++  __ bgez(x11, loop);
++  // default case
++  __ profile_switch_default(x10);
++  __ lwu(x13, Address(x9, 0));
++  __ j(continue_execution);
++  // entry found -> get offset
++  __ bind(found);
++  __ shadd(t0, x11, x9, t0, 3);
++  __ lwu(x13, Address(t0, 3 * BytesPerInt));
++  __ profile_switch_case(x11, x10, x9);
++  // continue execution
++  __ bind(continue_execution);
++  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
++  __ add(xbcp, xbcp, x13);
++  __ lbu(t0, Address(xbcp, 0));
++  __ dispatch_only(vtos, /*generate_poll*/true);
++}
 +
-+// Helpers
-+static void index_check(Register array, Register index);
++void TemplateTable::fast_binaryswitch() {
++  transition(itos, vtos);
++  // Implementation using the following core algorithm:
++  //
++  // int binary_search(int key, LookupswitchPair* array, int n)
++  //   binary_search start:
++  //   #Binary search according to "Methodik des Programmierens" by
++  //   # Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
++  //   int i = 0;
++  //   int j = n;
++  //   while (i + 1 < j) do
++  //     # invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
++  //     # with      Q: for all i: 0 <= i < n: key < a[i]
++  //     # where a stands for the array and assuming that the (inexisting)
++  //     # element a[n] is infinitely big.
++  //     int h = (i + j) >> 1
++  //     # i < h < j
++  //     if (key < array[h].fast_match())
++  //     then [j = h]
++  //     else [i = h]
++  //   end
++  //   # R: a[i] <= key < a[i+1] or Q
++  //   # (i.e., if key is within array, i is the correct index)
++  //   return i
++  // binary_search end
 +
-+#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
-new file mode 100644
-index 000000000..03079aec0
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
-@@ -0,0 +1,43 @@
-+/*
-+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#ifndef CPU_RISCV_VMSTRUCTS_RISCV_HPP
-+#define CPU_RISCV_VMSTRUCTS_RISCV_HPP
++  // Register allocation
++  const Register key   = x10; // already set (tosca)
++  const Register array = x11;
++  const Register i     = x12;
++  const Register j     = x13;
++  const Register h     = x14;
++  const Register temp  = x15;
 +
-+// These are the CPU-specific fields, types and integer
-+// constants required by the Serviceability Agent. This file is
-+// referenced by vmStructs.cpp.
++  // Find array start
++  __ la(array, at_bcp(3 * BytesPerInt));  // btw: should be able to
++                                          // get rid of this
++                                          // instruction (change
++                                          // offsets below)
++  __ andi(array, array, -BytesPerInt);
 +
-+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
-+  volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
++  // Initialize i & j
++  __ mv(i, zr);                            // i = 0
++  __ lwu(j, Address(array, -BytesPerInt)); // j = length(array)
 +
-+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
++  // Convert j into native byteordering
++  __ revb_w(j, j);
 +
-+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++  // And start
++  Label entry;
++  __ j(entry);
 +
-+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++  // binary search loop
++  {
++    Label loop;
++    __ bind(loop);
++    __ addw(h, i, j);                           // h = i + j
++    __ srliw(h, h, 1);                          // h = (i + j) >> 1
++    // if [key < array[h].fast_match()]
++    // then [j = h]
++    // else [i = h]
++    // Convert array[h].match to native byte-ordering before compare
++    __ shadd(temp, h, array, temp, 3);
++    __ ld(temp, Address(temp, 0));
++    __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
 +
-+#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
-new file mode 100644
-index 000000000..dd4f5c9ae
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
-@@ -0,0 +1,91 @@
-+/*
-+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++    Label L_done, L_greater;
++    __ bge(key, temp, L_greater);
++    // if [key < array[h].fast_match()] then j = h
++    __ mv(j, h);
++    __ j(L_done);
++    __ bind(L_greater);
++    // if [key >= array[h].fast_match()] then i = h
++    __ mv(i, h);
++    __ bind(L_done);
 +
-+#include "memory/allocation.hpp"
-+#include "memory/allocation.inline.hpp"
-+#include "runtime/os.inline.hpp"
-+#include "vm_version_ext_riscv.hpp"
++    // while [i + 1 < j]
++    __ bind(entry);
++    __ addiw(h, i, 1);         // i + 1
++    __ blt(h, j, loop);        // i + 1 < j
++  }
 +
-+// VM_Version_Ext statics
-+int VM_Version_Ext::_no_of_threads = 0;
-+int VM_Version_Ext::_no_of_cores = 0;
-+int VM_Version_Ext::_no_of_sockets = 0;
-+bool VM_Version_Ext::_initialized = false;
-+char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
-+char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
++  // end of binary search, result index is i (must check again!)
++  Label default_case;
++  // Convert array[i].match to native byte-ordering before compare
++  __ shadd(temp, i, array, temp, 3);
++  __ ld(temp, Address(temp, 0));
++  __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
++  __ bne(key, temp, default_case);
 +
-+void VM_Version_Ext::initialize_cpu_information(void) {
-+  // do nothing if cpu info has been initialized
-+  if (_initialized) {
-+    return;
-+  }
++  // entry found -> j = offset
++  __ shadd(temp, i, array, temp, 3);
++  __ lwu(j, Address(temp, BytesPerInt));
++  __ profile_switch_case(i, key, array);
++  __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
 +
-+  int core_id = -1;
-+  int chip_id = -1;
-+  int len = 0;
-+  char* src_string = NULL;
++  __ add(temp, xbcp, j);
++  __ load_unsigned_byte(t0, Address(temp, 0));
 +
-+  _no_of_cores  = os::processor_count();
-+  _no_of_threads = _no_of_cores;
-+  _no_of_sockets = _no_of_cores;
-+  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
-+  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
-+  _initialized = true;
-+}
++  __ add(xbcp, xbcp, j);
++  __ la(xbcp, Address(xbcp, 0));
++  __ dispatch_only(vtos, /*generate_poll*/true);
 +
-+int VM_Version_Ext::number_of_threads(void) {
-+  initialize_cpu_information();
-+  return _no_of_threads;
-+}
++  // default case -> j = default offset
++  __ bind(default_case);
++  __ profile_switch_default(i);
++  __ lwu(j, Address(array, -2 * BytesPerInt));
++  __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
 +
-+int VM_Version_Ext::number_of_cores(void) {
-+  initialize_cpu_information();
-+  return _no_of_cores;
-+}
++  __ add(temp, xbcp, j);
++  __ load_unsigned_byte(t0, Address(temp, 0));
 +
-+int VM_Version_Ext::number_of_sockets(void) {
-+  initialize_cpu_information();
-+  return _no_of_sockets;
++  __ add(xbcp, xbcp, j);
++  __ la(xbcp, Address(xbcp, 0));
++  __ dispatch_only(vtos, /*generate_poll*/true);
 +}
 +
-+const char* VM_Version_Ext::cpu_name(void) {
-+  initialize_cpu_information();
-+  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
-+  if (NULL == tmp) {
-+    return NULL;
-+  }
-+  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
-+  return tmp;
-+}
++void TemplateTable::_return(TosState state)
++{
++  transition(state, state);
++  assert(_desc->calls_vm(),
++         "inconsistent calls_vm information"); // call in remove_activation
 +
-+const char* VM_Version_Ext::cpu_description(void) {
-+  initialize_cpu_information();
-+  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
-+  if (NULL == tmp) {
-+    return NULL;
++  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
++    assert(state == vtos, "only valid state");
++
++    __ ld(c_rarg1, aaddress(0));
++    __ load_klass(x13, c_rarg1);
++    __ lwu(x13, Address(x13, Klass::access_flags_offset()));
++    Label skip_register_finalizer;
++    __ andi(t0, x13, JVM_ACC_HAS_FINALIZER);
++    __ beqz(t0, skip_register_finalizer);
++
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1);
++
++    __ bind(skip_register_finalizer);
 +  }
-+  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
-+  return tmp;
-+}
-diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
-new file mode 100644
-index 000000000..0982b6668
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
-@@ -0,0 +1,55 @@
-+/*
-+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
-+#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
++  // Issue a StoreStore barrier after all stores but before return
++  // from any constructor for any class with a final field. We don't
++  // know if this is a finalizer, so we always do so.
++  if (_desc->bytecode() == Bytecodes::_return) {
++    __ membar(MacroAssembler::StoreStore);
++  }
 +
-+#include "runtime/vm_version.hpp"
-+#include "utilities/macros.hpp"
++  // Narrow result if state is itos but result type is smaller.
++  // Need to narrow in the return bytecode rather than in generate_return_entry
++  // since compiled code callers expect the result to already be narrowed.
++  if (state == itos) {
++    __ narrow(x10);
++  }
 +
-+class VM_Version_Ext : public VM_Version {
-+ private:
-+  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
-+  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
++  __ remove_activation(state);
++  __ ret();
++}
 +
-+  static int               _no_of_threads;
-+  static int               _no_of_cores;
-+  static int               _no_of_sockets;
-+  static bool              _initialized;
-+  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
-+  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
 +
-+ public:
-+  static int number_of_threads(void);
-+  static int number_of_cores(void);
-+  static int number_of_sockets(void);
++// ----------------------------------------------------------------------------
++// Volatile variables demand their effects be made known to all CPU's
++// in order.  Store buffers on most chips allow reads & writes to
++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
++// without some kind of memory barrier (i.e., it's not sufficient that
++// the interpreter does not reorder volatile references, the hardware
++// also must not reorder them).
++//
++// According to the new Java Memory Model (JMM):
++// (1) All volatiles are serialized wrt to each other.  ALSO reads &
++//     writes act as aquire & release, so:
++// (2) A read cannot let unrelated NON-volatile memory refs that
++//     happen after the read float up to before the read.  It's OK for
++//     non-volatile memory refs that happen before the volatile read to
++//     float down below it.
++// (3) Similar a volatile write cannot let unrelated NON-volatile
++//     memory refs that happen BEFORE the write float down to after the
++//     write.  It's OK for non-volatile memory refs that happen after the
++//     volatile write to float up before it.
++//
++// We only put in barriers around volatile refs (they are expensive),
++// not _between_ memory refs (that would require us to track the
++// flavor of the previous memory refs).  Requirements (2) and (3)
++// require some barriers before volatile stores and after volatile
++// loads.  These nearly cover requirement (1) but miss the
++// volatile-store-volatile-load case.  This final case is placed after
++// volatile-stores although it could just as well go before
++// volatile-loads.
 +
-+  static const char* cpu_name(void);
-+  static const char* cpu_description(void);
-+  static void initialize_cpu_information(void);
++void TemplateTable::resolve_cache_and_index(int byte_no,
++                                            Register Rcache,
++                                            Register index,
++                                            size_t index_size) {
++  const Register temp = x9;
++  assert_different_registers(Rcache, index, temp);
 +
-+};
++  Label resolved, clinit_barrier_slow;
 +
-+#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-new file mode 100644
-index 000000000..31d5bb5f4
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -0,0 +1,190 @@
-+/*
-+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  Bytecodes::Code code = bytecode();
++  switch (code) {
++    case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
++    case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
++    default: break;
++  }
 +
-+#include "precompiled.hpp"
-+#include "runtime/java.hpp"
-+#include "runtime/vm_version.hpp"
-+#include "utilities/macros.hpp"
-+#include "utilities/formatBuffer.hpp"
++  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
++  __ mv(t0, (int) code);
++  __ beq(temp, t0, resolved);
 +
-+#include OS_HEADER_INLINE(os)
++  // resolve first time through
++  // Class initialization barrier slow path lands here as well.
++  __ bind(clinit_barrier_slow);
 +
-+const char* VM_Version::_uarch = "";
-+uint32_t VM_Version::_initial_vector_length = 0;
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
++  __ mv(temp, (int) code);
++  __ call_VM(noreg, entry, temp);
 +
-+void VM_Version::initialize() {
-+  get_os_cpu_info();
++  // Update registers with resolved info
++  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
++  // n.b. unlike x86 Rcache is now rcpool plus the indexed offset
++  // so all clients ofthis method must be modified accordingly
++  __ bind(resolved);
 +
-+  if (FLAG_IS_DEFAULT(UseFMA)) {
-+    FLAG_SET_DEFAULT(UseFMA, true);
-+  }
-+  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
-+    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0);
++  // Class initialization barrier for static methods
++  if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) {
++    __ load_resolved_method_at_index(byte_no, temp, Rcache);
++    __ load_method_holder(temp, temp);
++    __ clinit_barrier(temp, t0, NULL, &clinit_barrier_slow);
 +  }
++}
 +
-+  if (UseAES || UseAESIntrinsics) {
-+    if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
-+      warning("AES instructions are not available on this CPU");
-+      FLAG_SET_DEFAULT(UseAES, false);
-+    }
-+    if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
-+      warning("AES intrinsics are not available on this CPU");
-+      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
-+    }
-+  }
++// The Rcache and index registers must be set before call
++// n.b unlike x86 cache already includes the index offset
++void TemplateTable::load_field_cp_cache_entry(Register obj,
++                                              Register cache,
++                                              Register index,
++                                              Register off,
++                                              Register flags,
++                                              bool is_static = false) {
++  assert_different_registers(cache, index, flags, off);
 +
-+  if (UseAESCTRIntrinsics) {
-+    warning("AES/CTR intrinsics are not available on this CPU");
-+    FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
-+  }
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++  // Field offset
++  __ ld(off, Address(cache, in_bytes(cp_base_offset +
++                                     ConstantPoolCacheEntry::f2_offset())));
++  // Flags
++  __ lwu(flags, Address(cache, in_bytes(cp_base_offset +
++                                        ConstantPoolCacheEntry::flags_offset())));
 +
-+  if (UseSHA) {
-+    warning("SHA instructions are not available on this CPU");
-+    FLAG_SET_DEFAULT(UseSHA, false);
++  // klass overwrite register
++  if (is_static) {
++    __ ld(obj, Address(cache, in_bytes(cp_base_offset +
++                                       ConstantPoolCacheEntry::f1_offset())));
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ ld(obj, Address(obj, mirror_offset));
++    __ resolve_oop_handle(obj);
 +  }
++}
 +
-+  if (UseSHA1Intrinsics) {
-+    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
-+    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
-+  }
++void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
++                                               Register method,
++                                               Register itable_index,
++                                               Register flags,
++                                               bool is_invokevirtual,
++                                               bool is_invokevfinal, /*unused*/
++                                               bool is_invokedynamic) {
++  // setup registers
++  const Register cache = t1;
++  const Register index = x14;
++  assert_different_registers(method, flags);
++  assert_different_registers(method, cache, index);
++  assert_different_registers(itable_index, flags);
++  assert_different_registers(itable_index, cache, index);
++  // determine constant pool cache field offsets
++  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
++  const int method_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                     (is_invokevirtual ?
++                                      ConstantPoolCacheEntry::f2_offset() :
++                                      ConstantPoolCacheEntry::f1_offset()));
++  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::flags_offset());
++  // access constant pool cache fields
++  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::f2_offset());
 +
-+  if (UseSHA256Intrinsics) {
-+    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
-+    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
-+  }
++  const size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
++  resolve_cache_and_index(byte_no, cache, index, index_size);
++  __ ld(method, Address(cache, method_offset));
 +
-+  if (UseSHA512Intrinsics) {
-+    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
-+    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++  if (itable_index != noreg) {
++    __ ld(itable_index, Address(cache, index_offset));
 +  }
++  __ lwu(flags, Address(cache, flags_offset));
++}
 +
-+  if (UseCRC32Intrinsics) {
-+    warning("CRC32Intrinsics instructions are not available on this CPU.");
-+    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
-+  }
++// The registers cache and index expected to be set before call.
++// Correct values of the cache and index registers are preserved.
++void TemplateTable::jvmti_post_field_access(Register cache, Register index,
++                                            bool is_static, bool has_tos) {
++  // do the JVMTI work here to avoid disturbing the register state below
++  // We use c_rarg registers here beacause we want to use the register used in
++  // the call to the VM
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the time to call into the VM.
++    Label L1;
++    assert_different_registers(cache, index, x10);
++    int32_t offset = 0;
++    __ la_patchable(t0, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), offset);
++    __ lwu(x10, Address(t0, offset));
 +
-+  if (UseCRC32CIntrinsics) {
-+    warning("CRC32CIntrinsics instructions are not available on this CPU.");
-+    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
-+  }
++    __ beqz(x10, L1);
 +
-+  if (UseRVV) {
-+    if (!(_features & CPU_V)) {
-+      warning("RVV is not supported on this CPU");
-+      FLAG_SET_DEFAULT(UseRVV, false);
++    __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
++    __ la(c_rarg2, Address(c_rarg2, in_bytes(ConstantPoolCache::base_offset())));
++
++    if (is_static) {
++      __ mv(c_rarg1, zr); // NULL object reference
 +    } else {
-+      // read vector length from vector CSR vlenb
-+      _initial_vector_length = get_current_vector_length();
++      __ ld(c_rarg1, at_tos()); // get object pointer without popping it
++      __ verify_oop(c_rarg1);
 +    }
++    // c_rarg1: object pointer or NULL
++    // c_rarg2: cache entry pointer
++    // c_rarg3: jvalue object on the stack
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                       InterpreterRuntime::post_field_access),
++                                       c_rarg1, c_rarg2, c_rarg3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
 +  }
++}
 +
-+  if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) {
-+    FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true);
-+  }
++void TemplateTable::pop_and_check_object(Register r)
++{
++  __ pop_ptr(r);
++  __ null_check(r);  // for field access must check obj.
++  __ verify_oop(r);
++}
 +
-+  if (UseZbb) {
-+    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
-+      FLAG_SET_DEFAULT(UsePopCountInstruction, true);
-+    }
-+  } else {
-+    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc)
++{
++  const Register cache     = x12;
++  const Register index     = x13;
++  const Register obj       = x14;
++  const Register off       = x9;
++  const Register flags     = x10;
++  const Register raw_flags = x16;
++  const Register bc        = x14; // uses same reg as obj, so don't mix them
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_access(cache, index, is_static, false);
++  load_field_cp_cache_entry(obj, cache, index, off, raw_flags, is_static);
++
++  if (!is_static) {
++    // obj is on the stack
++    pop_and_check_object(obj);
 +  }
 +
-+  char buf[512];
-+  buf[0] = '\0';
-+  if (_uarch != NULL && strcmp(_uarch, "") != 0) snprintf(buf, sizeof(buf), "%s,", _uarch);
-+  strcat(buf, "rv64");
-+#define ADD_FEATURE_IF_SUPPORTED(id, name, bit) if (_features & CPU_##id) strcat(buf, name);
-+  CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED)
-+#undef ADD_FEATURE_IF_SUPPORTED
++  __ add(off, obj, off);
++  const Address field(off);
 +
-+  _features_string = os::strdup(buf);
++  Label Done, notByte, notBool, notInt, notShort, notChar,
++              notLong, notFloat, notObj, notDouble;
 +
-+#ifdef COMPILER2
-+  initialize_c2();
-+#endif // COMPILER2
-+}
++  __ slli(flags, raw_flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
++                                    ConstantPoolCacheEntry::tos_state_bits));
++  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
 +
-+#ifdef COMPILER2
-+void VM_Version::initialize_c2() {
-+  // lack of cmove in riscv
-+  if (UseCMoveUnconditionally) {
-+    FLAG_SET_DEFAULT(UseCMoveUnconditionally, false);
-+  }
-+  if (ConditionalMoveLimit > 0) {
-+    FLAG_SET_DEFAULT(ConditionalMoveLimit, 0);
++  assert(btos == 0, "change code, btos != 0");
++  __ bnez(flags, notByte);
++
++  // Dont't rewrite getstatic, only getfield
++  if (is_static) {
++    rc = may_not_rewrite;
 +  }
 +
-+  if (!UseRVV) {
-+    FLAG_SET_DEFAULT(SpecialEncodeISOArray, false);
++  // btos
++  __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg);
++  __ push(btos);
++  // Rewrite bytecode to be faster
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11);
 +  }
++  __ j(Done);
 +
-+  if (!UseRVV && MaxVectorSize) {
-+    FLAG_SET_DEFAULT(MaxVectorSize, 0);
-+  }
++  __ bind(notByte);
++  __ sub(t0, flags, (u1)ztos);
++  __ bnez(t0, notBool);
 +
-+  if (UseRVV) {
-+    if (FLAG_IS_DEFAULT(MaxVectorSize)) {
-+      MaxVectorSize = _initial_vector_length;
-+    } else if (MaxVectorSize < 16) {
-+      warning("RVV does not support vector length less than 16 bytes. Disabling RVV.");
-+      UseRVV = false;
-+    } else if (is_power_of_2(MaxVectorSize)) {
-+      if (MaxVectorSize > _initial_vector_length) {
-+        warning("Current system only supports max RVV vector length %d. Set MaxVectorSize to %d",
-+                _initial_vector_length, _initial_vector_length);
-+      }
-+      MaxVectorSize = _initial_vector_length;
-+    } else {
-+      vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
-+    }
++  // ztos (same code as btos)
++  __ access_load_at(T_BOOLEAN, IN_HEAP, x10, field, noreg, noreg);
++  __ push(ztos);
++  // Rewirte bytecode to be faster
++  if (rc == may_rewrite) {
++    // uses btos rewriting, no truncating to t/f bit is needed for getfield
++    patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11);
 +  }
++  __ j(Done);
 +
-+  // disable prefetch
-+  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
-+    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
++  __ bind(notBool);
++  __ sub(t0, flags, (u1)atos);
++  __ bnez(t0, notObj);
++  // atos
++  do_oop_load(_masm, field, x10, IN_HEAP);
++  __ push(atos);
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_agetfield, bc, x11);
 +  }
++  __ j(Done);
 +
-+  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
-+    FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
++  __ bind(notObj);
++  __ sub(t0, flags, (u1)itos);
++  __ bnez(t0, notInt);
++  // itos
++  __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg);
++  __ addw(x10, x10, zr); // signed extended
++  __ push(itos);
++  // Rewrite bytecode to be faster
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_igetfield, bc, x11);
 +  }
++  __ j(Done);
 +
-+  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
-+    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
++  __ bind(notInt);
++  __ sub(t0, flags, (u1)ctos);
++  __ bnez(t0, notChar);
++  // ctos
++  __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg);
++  __ push(ctos);
++  // Rewrite bytecode to be faster
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cgetfield, bc, x11);
 +  }
++  __ j(Done);
 +
-+  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
-+    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
++  __ bind(notChar);
++  __ sub(t0, flags, (u1)stos);
++  __ bnez(t0, notShort);
++  // stos
++  __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg);
++  __ push(stos);
++  // Rewrite bytecode to be faster
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sgetfield, bc, x11);
 +  }
++  __ j(Done);
 +
-+  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
-+    FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
++  __ bind(notShort);
++  __ sub(t0, flags, (u1)ltos);
++  __ bnez(t0, notLong);
++  // ltos
++  __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg);
++  __ push(ltos);
++  // Rewrite bytecode to be faster
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_lgetfield, bc, x11);
 +  }
++  __ j(Done);
 +
-+  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
-+    FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true);
++  __ bind(notLong);
++  __ sub(t0, flags, (u1)ftos);
++  __ bnez(t0, notFloat);
++  // ftos
++  __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++  __ push(ftos);
++  // Rewrite bytecode to be faster
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fgetfield, bc, x11);
 +  }
-+}
-+#endif // COMPILER2
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
-new file mode 100644
-index 000000000..0178e6d75
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
-@@ -0,0 +1,65 @@
-+/*
-+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  __ j(Done);
 +
-+#ifndef CPU_RISCV_VM_VERSION_RISCV_HPP
-+#define CPU_RISCV_VM_VERSION_RISCV_HPP
++  __ bind(notFloat);
++#ifdef ASSERT
++  __ sub(t0, flags, (u1)dtos);
++  __ bnez(t0, notDouble);
++#endif
++  // dtos
++  __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++  __ push(dtos);
++  // Rewrite bytecode to be faster
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dgetfield, bc, x11);
++  }
++#ifdef ASSERT
++  __ j(Done);
 +
-+#include "runtime/abstract_vm_version.hpp"
-+#include "runtime/globals_extension.hpp"
-+#include "utilities/sizes.hpp"
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
 +
-+class VM_Version : public Abstract_VM_Version {
-+public:
-+  // Initialization
-+  static void initialize();
++  __ bind(Done);
 +
-+  enum Feature_Flag {
-+#define CPU_FEATURE_FLAGS(decl)               \
-+    decl(I,            "i",            8)     \
-+    decl(M,            "m",           12)     \
-+    decl(A,            "a",            0)     \
-+    decl(F,            "f",            5)     \
-+    decl(D,            "d",            3)     \
-+    decl(C,            "c",            2)     \
-+    decl(V,            "v",           21)
++  Label notVolatile;
++  __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
++  __ beqz(t0, notVolatile);
++  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++  __ bind(notVolatile);
++}
 +
-+#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit),
-+    CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
-+#undef DECLARE_CPU_FEATURE_FLAG
-+  };
++void TemplateTable::getfield(int byte_no)
++{
++  getfield_or_static(byte_no, false);
++}
 +
-+protected:
-+  static const char* _uarch;
-+  static uint32_t _initial_vector_length;
-+  static void get_os_cpu_info();
-+  static uint32_t get_current_vector_length();
++void TemplateTable::nofast_getfield(int byte_no) {
++  getfield_or_static(byte_no, false, may_not_rewrite);
++}
 +
-+#ifdef COMPILER2
-+private:
-+  static void initialize_c2();
-+#endif // COMPILER2
-+};
++void TemplateTable::getstatic(int byte_no)
++{
++  getfield_or_static(byte_no, true);
++}
 +
-+#endif // CPU_RISCV_VM_VERSION_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-new file mode 100644
-index 000000000..6572d9334
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-@@ -0,0 +1,60 @@
-+/*
-+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++// The registers cache and index expected to be set before call.
++// The function may destroy various registers, just not the cache and index registers.
++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
++  transition(vtos, vtos);
 +
-+#include "precompiled.hpp"
-+#include "asm/assembler.hpp"
-+#include "code/vmreg.hpp"
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
 +
-+void VMRegImpl::set_regName() {
-+  Register reg = ::as_Register(0);
-+  int i = 0;
-+  for ( ; i < ConcreteRegisterImpl::max_gpr ; ) {
-+    for (int j = 0; j < RegisterImpl::max_slots_per_register; j++) {
-+      regName[i++] = reg->name();
-+    }
-+    reg = reg->successor();
-+  }
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L1;
++    assert_different_registers(cache, index, x10);
++    int32_t offset = 0;
++    __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset);
++    __ lwu(x10, Address(t0, offset));
++    __ beqz(x10, L1);
 +
-+  FloatRegister freg = ::as_FloatRegister(0);
-+  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
-+    for (int j = 0; j < FloatRegisterImpl::max_slots_per_register; j++) {
-+      regName[i++] = freg->name();
-+    }
-+    freg = freg->successor();
-+  }
++    __ get_cache_and_index_at_bcp(c_rarg2, t0, 1);
 +
-+  VectorRegister vreg = ::as_VectorRegister(0);
-+  for ( ; i < ConcreteRegisterImpl::max_vpr ; ) {
-+    for (int j = 0; j < VectorRegisterImpl::max_slots_per_register; j++) {
-+      regName[i++] = vreg->name();
++    if (is_static) {
++      // Life is simple. Null out the object pointer.
++      __ mv(c_rarg1, zr);
++    } else {
++      // Life is harder. The stack holds the value on top, followed by
++      // the object. We don't know the size of the value, though; it
++      // could be one or two words depending on its type. As a result,
++      // we must find the type to determine where the object is.
++      __ lwu(c_rarg3, Address(c_rarg2,
++                              in_bytes(cp_base_offset +
++                                       ConstantPoolCacheEntry::flags_offset())));
++      __ srli(c_rarg3, c_rarg3, ConstantPoolCacheEntry::tos_state_shift);
++      ConstantPoolCacheEntry::verify_tos_state_shift();
++      Label nope2, done, ok;
++      __ ld(c_rarg1, at_tos_p1());   // initially assume a one word jvalue
++      __ sub(t0, c_rarg3, ltos);
++      __ beqz(t0, ok);
++      __ sub(t0, c_rarg3, dtos);
++      __ bnez(t0, nope2);
++      __ bind(ok);
++      __ ld(c_rarg1, at_tos_p2());  // ltos (two word jvalue);
++      __ bind(nope2);
 +    }
-+    vreg = vreg->successor();
-+  }
-+
-+  for ( ; i < ConcreteRegisterImpl::number_of_registers; i++) {
-+    regName[i] = "NON-GPR-FPR-VPR";
++    // cache entry pointer
++    __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset));
++    // object (tos)
++    __ mv(c_rarg3, esp);
++    // c_rarg1: object pointer set up above (NULL if static)
++    // c_rarg2: cache entry pointer
++    // c_rarg3: jvalue object on  the stack
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++                                c_rarg1, c_rarg2, c_rarg3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
 +  }
 +}
-diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
-new file mode 100644
-index 000000000..ec76a1db1
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
-@@ -0,0 +1,64 @@
-+/*
-+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#ifndef CPU_RISCV_VMREG_RISCV_HPP
-+#define CPU_RISCV_VMREG_RISCV_HPP
++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
 +
-+inline bool is_Register() {
-+  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
-+}
++  const Register cache = x12;
++  const Register index = x13;
++  const Register obj   = x12;
++  const Register off   = x9;
++  const Register flags = x10;
++  const Register bc    = x14;
 +
-+inline bool is_FloatRegister() {
-+  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
-+}
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_mod(cache, index, is_static);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
 +
-+inline bool is_VectorRegister() {
-+  return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr;
-+}
++  Label Done;
++  __ mv(x15, flags);
 +
-+inline Register as_Register() {
-+  assert( is_Register(), "must be");
-+  return ::as_Register(value() / RegisterImpl::max_slots_per_register);
-+}
++  {
++    Label notVolatile;
++    __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
++    __ beqz(t0, notVolatile);
++    __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore);
++    __ bind(notVolatile);
++  }
 +
-+inline FloatRegister as_FloatRegister() {
-+  assert( is_FloatRegister() && is_even(value()), "must be" );
-+  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
-+                            FloatRegisterImpl::max_slots_per_register);
-+}
++  Label notByte, notBool, notInt, notShort, notChar,
++        notLong, notFloat, notObj, notDouble;
 +
-+inline VectorRegister as_VectorRegister() {
-+  assert( is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be" );
-+  return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) /
-+                             VectorRegisterImpl::max_slots_per_register);
-+}
++  __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
++                                ConstantPoolCacheEntry::tos_state_bits));
++  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
 +
-+inline bool is_concrete() {
-+  assert(is_reg(), "must be");
-+  return is_even(value());
-+}
++  assert(btos == 0, "change code, btos != 0");
++  __ bnez(flags, notByte);
 +
-+#endif // CPU_RISCV_VMREG_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
-new file mode 100644
-index 000000000..9605e59f4
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
-@@ -0,0 +1,47 @@
-+/*
-+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // Don't rewrite putstatic, only putfield
++  if (is_static) {
++    rc = may_not_rewrite;
++  }
 +
-+#ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
-+#define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
++  // btos
++  {
++    __ pop(btos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0); // off register as temparator register.
++    __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_bputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
++  }
 +
-+inline VMReg RegisterImpl::as_VMReg() {
-+  if( this == noreg ) {
-+    return VMRegImpl::Bad();
++  __ bind(notByte);
++  __ sub(t0, flags, (u1)ztos);
++  __ bnez(t0, notBool);
++
++  // ztos
++  {
++    __ pop(ztos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_zputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
 +  }
-+  return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register);
-+}
 +
-+inline VMReg FloatRegisterImpl::as_VMReg() {
-+  return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) +
-+                             ConcreteRegisterImpl::max_gpr);
-+}
++  __ bind(notBool);
++  __ sub(t0, flags, (u1)atos);
++  __ bnez(t0, notObj);
 +
-+inline VMReg VectorRegisterImpl::as_VMReg() {
-+  return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) +
-+                             ConcreteRegisterImpl::max_fpr);
-+}
++  // atos
++  {
++    __ pop(atos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    // Store into the field
++    do_oop_store(_masm, field, x10, IN_HEAP);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_aputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
++  }
 +
-+#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
-diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
-new file mode 100644
-index 000000000..b2aa87ab8
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
-@@ -0,0 +1,260 @@
-+/*
-+ * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  __ bind(notObj);
++  __ sub(t0, flags, (u1)itos);
++  __ bnez(t0, notInt);
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "assembler_riscv.inline.hpp"
-+#include "code/vtableStubs.hpp"
-+#include "interp_masm_riscv.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "oops/compiledICHolder.hpp"
-+#include "oops/instanceKlass.hpp"
-+#include "oops/klassVtable.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "vmreg_riscv.inline.hpp"
-+#ifdef COMPILER2
-+#include "opto/runtime.hpp"
-+#endif
++  // itos
++  {
++    __ pop(itos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_iputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
++  }
 +
-+// machine-dependent part of VtableStubs: create VtableStub of correct size and
-+// initialize its code
++  __ bind(notInt);
++  __ sub(t0, flags, (u1)ctos);
++  __ bnez(t0, notChar);
 +
-+#define __ masm->
++  // ctos
++  {
++    __ pop(ctos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_cputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
++  }
 +
-+#ifndef PRODUCT
-+extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
-+#endif
++  __ bind(notChar);
++  __ sub(t0, flags, (u1)stos);
++  __ bnez(t0, notShort);
 +
-+VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
-+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
-+  const int stub_code_length = code_size_limit(true);
-+  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
-+  // Can be NULL if there is no free space in the code cache.
-+  if (s == NULL) {
-+    return NULL;
++  // stos
++  {
++    __ pop(stos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_sputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
 +  }
 +
-+  // Count unused bytes in instruction sequences of variable size.
-+  // We add them to the computed buffer size in order to avoid
-+  // overflow in subsequently generated stubs.
-+  address   start_pc = NULL;
-+  int       slop_bytes = 0;
-+  int       slop_delta = 0;
-+
-+  ResourceMark    rm;
-+  CodeBuffer      cb(s->entry_point(), stub_code_length);
-+  MacroAssembler* masm = new MacroAssembler(&cb);
-+  assert_cond(masm != NULL);
++  __ bind(notShort);
++  __ sub(t0, flags, (u1)ltos);
++  __ bnez(t0, notLong);
 +
-+#if (!defined(PRODUCT) && defined(COMPILER2))
-+  if (CountCompiledCalls) {
-+    __ la(t2, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
-+    __ increment(Address(t2));
++  // ltos
++  {
++    __ pop(ltos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_lputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
 +  }
-+#endif
 +
-+  // get receiver (need to skip return address on top of stack)
-+  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
++  __ bind(notLong);
++  __ sub(t0, flags, (u1)ftos);
++  __ bnez(t0, notFloat);
 +
-+  // get receiver klass
-+  address npe_addr = __ pc();
-+  __ load_klass(t2, j_rarg0);
++  // ftos
++  {
++    __ pop(ftos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_fputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
++  }
 +
-+#ifndef PRODUCT
-+  if (DebugVtables) {
-+    Label L;
-+    start_pc = __ pc();
++  __ bind(notFloat);
++#ifdef ASSERT
++  __ sub(t0, flags, (u1)dtos);
++  __ bnez(t0, notDouble);
++#endif
 +
-+    // check offset vs vtable length
-+    __ lwu(t0, Address(t2, Klass::vtable_length_offset()));
-+    __ mvw(t1, vtable_index * vtableEntry::size());
-+    __ bgt(t0, t1, L);
-+    __ enter();
-+    __ mv(x12, vtable_index);
++  // dtos
++  {
++    __ pop(dtos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_dputfield, bc, x11, true, byte_no);
++    }
++  }
 +
-+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, x12);
-+    const ptrdiff_t estimate = 256;
-+    const ptrdiff_t codesize = __ pc() - start_pc;
-+    slop_delta = estimate - codesize;  // call_VM varies in length, depending on data
-+    slop_bytes += slop_delta;
-+    assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
++#ifdef ASSERT
++  __ j(Done);
 +
-+    __ leave();
-+    __ bind(L);
-+  }
-+#endif // PRODUCT
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
 +
-+  start_pc = __ pc();
-+  __ lookup_virtual_method(t2, vtable_index, xmethod);
-+  // lookup_virtual_method generates
-+  // 4 instructions (maximum value encountered in normal case):li(lui + addiw) + add + ld
-+  // 1 instruction (best case):ld * 1
-+  slop_delta = 16 - (int)(__ pc() - start_pc);
-+  slop_bytes += slop_delta;
-+  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++  __ bind(Done);
 +
-+#ifndef PRODUCT
-+  if (DebugVtables) {
-+    Label L;
-+    __ beqz(xmethod, L);
-+    __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
-+    __ bnez(t0, L);
-+    __ stop("Vtable entry is NULL");
-+    __ bind(L);
++  {
++    Label notVolatile;
++    __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
++    __ beqz(t0, notVolatile);
++    __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore);
++    __ bind(notVolatile);
 +  }
-+#endif // PRODUCT
++}
 +
-+  // x10: receiver klass
-+  // xmethod: Method*
-+  // x12: receiver
-+  address ame_addr = __ pc();
-+  __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
-+  __ jr(t0);
++void TemplateTable::putfield(int byte_no)
++{
++  putfield_or_static(byte_no, false);
++}
 +
-+  masm->flush();
-+  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
++void TemplateTable::nofast_putfield(int byte_no) {
++  putfield_or_static(byte_no, false, may_not_rewrite);
++}
 +
-+  return s;
++void TemplateTable::putstatic(int byte_no) {
++  putfield_or_static(byte_no, true);
 +}
 +
-+VtableStub* VtableStubs::create_itable_stub(int itable_index) {
-+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
-+  const int stub_code_length = code_size_limit(false);
-+  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
-+  // Can be NULL if there is no free space in the code cache.
-+  if (s == NULL) {
-+    return NULL;
-+  }
-+  // Count unused bytes in instruction sequences of variable size.
-+  // We add them to the computed buffer size in order to avoid
-+  // overflow in subsequently generated stubs.
-+  address   start_pc = NULL;
-+  int       slop_bytes = 0;
-+  int       slop_delta = 0;
++void TemplateTable::jvmti_post_fast_field_mod()
++{
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L2;
++    int32_t offset = 0;
++    __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset);
++    __ lwu(c_rarg3, Address(t0, offset));
++    __ beqz(c_rarg3, L2);
++    __ pop_ptr(x9);                  // copy the object pointer from tos
++    __ verify_oop(x9);
++    __ push_ptr(x9);                 // put the object pointer back on tos
++    // Save tos values before call_VM() clobbers them. Since we have
++    // to do it for every data type, we use the saved values as the
++    // jvalue object.
++    switch (bytecode()) {          // load values into the jvalue object
++      case Bytecodes::_fast_aputfield: __ push_ptr(x10); break;
++      case Bytecodes::_fast_bputfield: // fall through
++      case Bytecodes::_fast_zputfield: // fall through
++      case Bytecodes::_fast_sputfield: // fall through
++      case Bytecodes::_fast_cputfield: // fall through
++      case Bytecodes::_fast_iputfield: __ push_i(x10); break;
++      case Bytecodes::_fast_dputfield: __ push_d(); break;
++      case Bytecodes::_fast_fputfield: __ push_f(); break;
++      case Bytecodes::_fast_lputfield: __ push_l(x10); break;
 +
-+  ResourceMark    rm;
-+  CodeBuffer      cb(s->entry_point(), stub_code_length);
-+  MacroAssembler* masm = new MacroAssembler(&cb);
-+  assert_cond(masm != NULL);
++      default:
++        ShouldNotReachHere();
++    }
++    __ mv(c_rarg3, esp);             // points to jvalue on the stack
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(c_rarg2, x10, 1);
++    __ verify_oop(x9);
++    // x9: object pointer copied above
++    // c_rarg2: cache entry pointer
++    // c_rarg3: jvalue object on the stack
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               x9, c_rarg2, c_rarg3);
 +
-+#if (!defined(PRODUCT) && defined(COMPILER2))
-+  if (CountCompiledCalls) {
-+    __ la(x18, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
-+    __ increment(Address(x18));
++    switch (bytecode()) {             // restore tos values
++      case Bytecodes::_fast_aputfield: __ pop_ptr(x10); break;
++      case Bytecodes::_fast_bputfield: // fall through
++      case Bytecodes::_fast_zputfield: // fall through
++      case Bytecodes::_fast_sputfield: // fall through
++      case Bytecodes::_fast_cputfield: // fall through
++      case Bytecodes::_fast_iputfield: __ pop_i(x10); break;
++      case Bytecodes::_fast_dputfield: __ pop_d(); break;
++      case Bytecodes::_fast_fputfield: __ pop_f(); break;
++      case Bytecodes::_fast_lputfield: __ pop_l(x10); break;
++      default: break;
++    }
++    __ bind(L2);
 +  }
-+#endif
++}
 +
-+  // get receiver (need to skip return address on top of stack)
-+  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
++void TemplateTable::fast_storefield(TosState state)
++{
++  transition(state, vtos);
 +
-+  // Entry arguments:
-+  //  t2: CompiledICHolder
-+  //  j_rarg0: Receiver
++  ByteSize base = ConstantPoolCache::base_offset();
 +
-+  // This stub is called from compiled code which has no callee-saved registers,
-+  // so all registers except arguments are free at this point.
-+  const Register recv_klass_reg     = x18;
-+  const Register holder_klass_reg   = x19; // declaring interface klass (DECC)
-+  const Register resolved_klass_reg = xmethod; // resolved interface klass (REFC)
-+  const Register temp_reg           = x28;
-+  const Register temp_reg2          = x29;
-+  const Register icholder_reg       = t1;
++  jvmti_post_fast_field_mod();
 +
-+  Label L_no_such_interface;
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(x12, x11, 1);
 +
-+  __ ld(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
-+  __ ld(holder_klass_reg,   Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ membar(MacroAssembler::LoadLoad);
 +
-+  start_pc = __ pc();
++  // test for volatile with x13
++  __ lwu(x13, Address(x12, in_bytes(base +
++                                    ConstantPoolCacheEntry::flags_offset())));
 +
-+  // get receiver klass (also an implicit null-check)
-+  address npe_addr = __ pc();
-+  __ load_klass(recv_klass_reg, j_rarg0);
++  // replace index with field offset from cache entry
++  __ ld(x11, Address(x12, in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
 +
-+  // Receiver subtype check against REFC.
-+  __ lookup_interface_method(// inputs: rec. class, interface
-+                             recv_klass_reg, resolved_klass_reg, noreg,
-+                             // outputs:  scan temp. reg1, scan temp. reg2
-+                             temp_reg2, temp_reg,
-+                             L_no_such_interface,
-+                             /*return_method=*/false);
++  {
++    Label notVolatile;
++    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
++    __ beqz(t0, notVolatile);
++    __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore);
++    __ bind(notVolatile);
++  }
 +
-+  const ptrdiff_t typecheckSize = __ pc() - start_pc;
-+  start_pc = __ pc();
++  // Get object from stack
++  pop_and_check_object(x12);
 +
-+  // Get selected method from declaring class and itable index
-+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
-+                             recv_klass_reg, holder_klass_reg, itable_index,
-+                             // outputs: method, scan temp. reg
-+                             xmethod, temp_reg,
-+                             L_no_such_interface);
++  // field address
++  __ add(x11, x12, x11);
++  const Address field(x11, 0);
 +
-+  const ptrdiff_t lookupSize = __ pc() - start_pc;
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_aputfield:
++      do_oop_store(_masm, field, x10, IN_HEAP);
++      break;
++    case Bytecodes::_fast_lputfield:
++      __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg);
++      break;
++    case Bytecodes::_fast_iputfield:
++      __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg);
++      break;
++    case Bytecodes::_fast_zputfield:
++      __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg);
++      break;
++    case Bytecodes::_fast_bputfield:
++      __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg);
++      break;
++    case Bytecodes::_fast_sputfield:
++      __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg);
++      break;
++    case Bytecodes::_fast_cputfield:
++      __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg);
++      break;
++    case Bytecodes::_fast_fputfield:
++      __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg);
++      break;
++    case Bytecodes::_fast_dputfield:
++      __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
 +
-+  // Reduce "estimate" such that "padding" does not drop below 8.
-+  const ptrdiff_t estimate = 256;
-+  const ptrdiff_t codesize = typecheckSize + lookupSize;
-+  slop_delta = (int)(estimate - codesize);
-+  slop_bytes += slop_delta;
-+  assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
++  {
++    Label notVolatile;
++    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
++    __ beqz(t0, notVolatile);
++    __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore);
++    __ bind(notVolatile);
++  }
++}
 +
-+#ifdef ASSERT
-+  if (DebugVtables) {
-+    Label L2;
-+    __ beqz(xmethod, L2);
-+    __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
-+    __ bnez(t0, L2);
-+    __ stop("compiler entrypoint is null");
-+    __ bind(L2);
++void TemplateTable::fast_accessfield(TosState state)
++{
++  transition(atos, state);
++  // Do the JVMTI work here to avoid disturbing the register state below
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the time to call into the VM.
++    Label L1;
++    int32_t offset = 0;
++    __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_access_count_addr()), offset);
++    __ lwu(x12, Address(t0, offset));
++    __ beqz(x12, L1);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(c_rarg2, t1, 1);
++    __ verify_oop(x10);
++    __ push_ptr(x10);  // save object pointer before call_VM() clobbers it
++    __ mv(c_rarg1, x10);
++    // c_rarg1: object pointer copied above
++    // c_rarg2: cache entry pointer
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_access),
++               c_rarg1, c_rarg2);
++    __ pop_ptr(x10); // restore object pointer
++    __ bind(L1);
 +  }
-+#endif // ASSERT
 +
-+  // xmethod: Method*
-+  // j_rarg0: receiver
-+  address ame_addr = __ pc();
-+  __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
-+  __ jr(t0);
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(x12, x11, 1);
 +
-+  __ bind(L_no_such_interface);
-+  // Handle IncompatibleClassChangeError in itable stubs.
-+  // More detailed error message.
-+  // We force resolving of the call site by jumping to the "handle
-+  // wrong method" stub, and so let the interpreter runtime do all the
-+  // dirty work.
-+  assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order");
-+  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ membar(MacroAssembler::LoadLoad);
 +
-+  masm->flush();
-+  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
++  __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
++                                   ConstantPoolCacheEntry::f2_offset())));
++  __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::flags_offset())));
 +
-+  return s;
-+}
++  // x10: object
++  __ verify_oop(x10);
++  __ null_check(x10);
++  __ add(x11, x10, x11);
++  const Address field(x11, 0);
 +
-+int VtableStub::pd_code_alignment() {
-+  // riscv cache line size is 64 bytes, but we want to limit alignment loss.
-+  const unsigned int icache_line_size = wordSize;
-+  return icache_line_size;
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_agetfield:
++      do_oop_load(_masm, field, x10, IN_HEAP);
++      __ verify_oop(x10);
++      break;
++    case Bytecodes::_fast_lgetfield:
++      __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_igetfield:
++      __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg);
++      __ addw(x10, x10, zr); // signed extended
++      break;
++    case Bytecodes::_fast_bgetfield:
++      __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_sgetfield:
++      __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_cgetfield:
++      __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_fgetfield:
++      __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_dgetfield:
++      __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  {
++    Label notVolatile;
++    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
++    __ beqz(t0, notVolatile);
++    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++    __ bind(notVolatile);
++  }
 +}
-diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
-index 897be2209..3b836fe6b 100644
---- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
-+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
-@@ -1447,7 +1447,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op
- }
- 
- // result = condition ? opr1 : opr2
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr || cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on s390");
 +
-   Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual;
-   switch (condition) {
-     case lir_cond_equal:        acond = Assembler::bcondEqual;    ncond = Assembler::bcondNotEqual; break;
-diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
-index e335f473d..53ad912cb 100644
---- a/src/hotspot/cpu/s390/s390.ad
-+++ b/src/hotspot/cpu/s390/s390.ad
-@@ -1522,14 +1522,16 @@ const bool Matcher::match_rule_supported(int opcode) {
-                 // BUT: make sure match rule is not disabled by a false predicate!
- }
- 
--const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
-   // TODO
-   // Identify extra cases that we might want to provide match rules for
-   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen.
--  bool ret_value = match_rule_supported(opcode);
-+  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
-+    return false;
-+  }
-   // Add rules here.
- 
--  return ret_value;  // Per default match rules are supported.
-+  return true; // Per default match rules are supported.
- }
- 
- int Matcher::regnum_to_fpu_offset(int regnum) {
-@@ -1578,6 +1580,14 @@ const uint Matcher::vector_shift_count_ideal_reg(int size) {
-   return Node::NotAMachineReg;
- }
- 
-+const bool Matcher::supports_scalable_vector() {
-+  return false;
-+}
-+
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return -1;
-+}
-+
- // z/Architecture does support misaligned store/load at minimal extra cost.
- const bool Matcher::misaligned_vectors_ok() {
-   return true;
-diff --git a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad
-index 7a2798a51..7d9b17b44 100644
---- a/src/hotspot/cpu/sparc/sparc.ad
-+++ b/src/hotspot/cpu/sparc/sparc.ad
-@@ -1710,7 +1710,7 @@ const bool Matcher::match_rule_supported(int opcode) {
-   return true;  // Per default match rules are supported.
- }
- 
--const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
- 
-   // TODO
-   // identify extra cases that we might want to provide match rules for
-@@ -1764,6 +1764,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
-   return max_vector_size(bt); // Same as max.
- }
- 
-+const bool Matcher::supports_scalable_vector() {
-+  return false;
-+}
-+
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return -1;
-+}
++void TemplateTable::fast_xaccess(TosState state)
++{
++  transition(vtos, state);
 +
- // SPARC doesn't support misaligned vectors store/load.
- const bool Matcher::misaligned_vectors_ok() {
-   return false;
-diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
-index cee3140f4..d38c63600 100644
---- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
-+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
-@@ -1970,7 +1970,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
-   }
- }
- 
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on x86");
++  // get receiver
++  __ ld(x10, aaddress(0));
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(x12, x13, 2);
++  __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
++                                   ConstantPoolCacheEntry::f2_offset())));
 +
-   Assembler::Condition acond, ncond;
-   switch (condition) {
-     case lir_cond_equal:        acond = Assembler::equal;        ncond = Assembler::notEqual;     break;
-diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
-index 82fd8522b..8016d328a 100644
---- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
-+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
-@@ -6606,6 +6606,99 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register
-   bind(DONE_LABEL);
- } // string_indexof_char
- 
-+void MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
-+                                            XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
-+  ShortBranchVerifier sbv(this);
-+  assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required");
-+
-+  int stride = 16;
-+
-+  Label FOUND_CHAR, SCAN_TO_CHAR_INIT, SCAN_TO_CHAR_LOOP,
-+        SCAN_TO_16_CHAR, SCAN_TO_16_CHAR_LOOP, SCAN_TO_32_CHAR_LOOP,
-+        RET_NOT_FOUND, SCAN_TO_16_CHAR_INIT,
-+        FOUND_SEQ_CHAR, DONE_LABEL;
-+
-+  movptr(result, str1);
-+  if (UseAVX >= 2) {
-+    cmpl(cnt1, stride);
-+    jcc(Assembler::less, SCAN_TO_CHAR_INIT);
-+    cmpl(cnt1, stride*2);
-+    jcc(Assembler::less, SCAN_TO_16_CHAR_INIT);
-+    movdl(vec1, ch);
-+    vpbroadcastb(vec1, vec1, Assembler::AVX_256bit);
-+    vpxor(vec2, vec2);
-+    movl(tmp, cnt1);
-+    andl(tmp, 0xFFFFFFE0);  //vector count (in chars)
-+    andl(cnt1,0x0000001F);  //tail count (in chars)
-+
-+    bind(SCAN_TO_32_CHAR_LOOP);
-+    vmovdqu(vec3, Address(result, 0));
-+    vpcmpeqb(vec3, vec3, vec1, Assembler::AVX_256bit);
-+    vptest(vec2, vec3);
-+    jcc(Assembler::carryClear, FOUND_CHAR);
-+    addptr(result, 32);
-+    subl(tmp, stride*2);
-+    jcc(Assembler::notZero, SCAN_TO_32_CHAR_LOOP);
-+    jmp(SCAN_TO_16_CHAR);
-+
-+    bind(SCAN_TO_16_CHAR_INIT);
-+    movdl(vec1, ch);
-+    pxor(vec2, vec2);
-+    pshufb(vec1, vec2);
-+  }
-+
-+  bind(SCAN_TO_16_CHAR);
-+  cmpl(cnt1, stride);
-+  jcc(Assembler::less, SCAN_TO_CHAR_INIT);//less than 16 entires left
-+  if (UseAVX < 2) {
-+    movdl(vec1, ch);
-+    pxor(vec2, vec2);
-+    pshufb(vec1, vec2);
-+  }
-+  movl(tmp, cnt1);
-+  andl(tmp, 0xFFFFFFF0);  //vector count (in bytes)
-+  andl(cnt1,0x0000000F);  //tail count (in bytes)
-+
-+  bind(SCAN_TO_16_CHAR_LOOP);
-+  movdqu(vec3, Address(result, 0));
-+  pcmpeqb(vec3, vec1);
-+  ptest(vec2, vec3);
-+  jcc(Assembler::carryClear, FOUND_CHAR);
-+  addptr(result, 16);
-+  subl(tmp, stride);
-+  jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);//last 16 items...
-+
-+  bind(SCAN_TO_CHAR_INIT);
-+  testl(cnt1, cnt1);
-+  jcc(Assembler::zero, RET_NOT_FOUND);
-+  bind(SCAN_TO_CHAR_LOOP);
-+  load_unsigned_byte(tmp, Address(result, 0));
-+  cmpl(ch, tmp);
-+  jccb(Assembler::equal, FOUND_SEQ_CHAR);
-+  addptr(result, 1);
-+  subl(cnt1, 1);
-+  jccb(Assembler::zero, RET_NOT_FOUND);
-+  jmp(SCAN_TO_CHAR_LOOP);
-+
-+  bind(RET_NOT_FOUND);
-+  movl(result, -1);
-+  jmpb(DONE_LABEL);
-+
-+  bind(FOUND_CHAR);
-+  if (UseAVX >= 2) {
-+    vpmovmskb(tmp, vec3);
-+  } else {
-+    pmovmskb(tmp, vec3);
++  // make sure exception is reported in correct bcp range (getfield is
++  // next instruction)
++  __ addi(xbcp, xbcp, 1);
++  __ null_check(x10);
++  switch (state) {
++    case itos:
++      __ add(x10, x10, x11);
++      __ access_load_at(T_INT, IN_HEAP, x10, Address(x10, 0), noreg, noreg);
++      __ addw(x10, x10, zr); // signed extended
++      break;
++    case atos:
++      __ add(x10, x10, x11);
++      do_oop_load(_masm, Address(x10, 0), x10, IN_HEAP);
++      __ verify_oop(x10);
++      break;
++    case ftos:
++      __ add(x10, x10, x11);
++      __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(x10), noreg, noreg);
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
-+  bsfl(ch, tmp);
-+  addptr(result, ch);
-+
-+  bind(FOUND_SEQ_CHAR);
-+  subptr(result, str1);
 +
-+  bind(DONE_LABEL);
-+} // stringL_indexof_char
-+
- // helper function for string_compare
- void MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
-                                         Address::ScaleFactor scale, Address::ScaleFactor scale1,
-diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
-index 1bed0cce9..47a062c11 100644
---- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
-+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
-@@ -1659,6 +1659,8 @@ public:
- #ifdef COMPILER2
-   void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
-                            XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
-+  void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
-+                           XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
- 
-   // IndexOf strings.
-   // Small strings are loaded through stack if they cross page boundary.
-diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
-index baa7cc774..238d8729b 100644
---- a/src/hotspot/cpu/x86/x86.ad
-+++ b/src/hotspot/cpu/x86/x86.ad
-@@ -1511,10 +1511,13 @@ const bool Matcher::match_rule_supported(int opcode) {
-   return ret_value;  // Per default match rules are supported.
- }
- 
--const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
-   // identify extra cases that we might want to provide match rules for
-   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
-   bool ret_value = match_rule_supported(opcode);
-+  if (!vector_size_supported(bt, vlen)) {
-+    ret_value = false;
-+  }
-   if (ret_value) {
-     switch (opcode) {
-       case Op_AbsVB:
-@@ -1642,6 +1645,15 @@ const int Matcher::min_vector_size(const BasicType bt) {
-   return MIN2(size,max_size);
- }
- 
-+const bool Matcher::supports_scalable_vector() {
-+  return false;
-+}
++  {
++    Label notVolatile;
++    __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
++                                      ConstantPoolCacheEntry::flags_offset())));
++    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
++    __ beqz(t0, notVolatile);
++    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++    __ bind(notVolatile);
++  }
 +
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return -1;
++  __ sub(xbcp, xbcp, 1);
 +}
 +
++//-----------------------------------------------------------------------------
++// Calls
 +
- // Vector ideal reg corresponding to specified size in bytes
- const uint Matcher::vector_ideal_reg(int size) {
-   assert(MaxVectorSize >= size, "");
-diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad
-index bc9947327..bbe49bd62 100644
---- a/src/hotspot/cpu/x86/x86_32.ad
-+++ b/src/hotspot/cpu/x86/x86_32.ad
-@@ -11909,12 +11909,12 @@ instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2
-   ins_pipe( pipe_slow );
- %}
- 
--instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
-+instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
-                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
--  predicate(UseSSE42Intrinsics);
-+  predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n) -> encoding() == StrIntrinsicNode::U));
-   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
--  format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
-+  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
-   ins_encode %{
-     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
-                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
-@@ -11922,6 +11922,19 @@ instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
-   ins_pipe( pipe_slow );
- %}
- 
-+instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
-+                              eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
-+  predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
-+  format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
-+  ins_encode %{
-+    __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
-+                           $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
-+  %}
-+  ins_pipe( pipe_slow );
-+%}
-+
- // fast array equals
- instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
-                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
-diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
-index 7e6739ffe..53f887ea6 100644
---- a/src/hotspot/cpu/x86/x86_64.ad
-+++ b/src/hotspot/cpu/x86/x86_64.ad
-@@ -2975,7 +2975,7 @@ frame
-       RAX_H_num     // Op_RegL
-     };
-     // Excluded flags and vector registers.
--    assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type");
-+    assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
-     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
-   %}
- %}
-@@ -11509,13 +11509,13 @@ instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI
-   ins_pipe( pipe_slow );
- %}
- 
--instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
--                              rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr)
-+instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
-+                             rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr)
- %{
--  predicate(UseSSE42Intrinsics);
-+  predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
-   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
--  format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
-+  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
-   ins_encode %{
-     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
-                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
-@@ -11523,6 +11523,20 @@ instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
-   ins_pipe( pipe_slow );
- %}
- 
-+instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
-+                              rbx_RegI result, legVecS tmp_vec1, legVecS tmp_vec2, legVecS tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
-+%{
-+  predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
-+  format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
-+  ins_encode %{
-+    __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
-+                           $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
-+  %}
-+  ins_pipe( pipe_slow );
-+%}
-+
- // fast string equals
- instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
-                        legVecS tmp1, legVecS tmp2, rbx_RegI tmp3, rFlagsReg cr)
-diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
-index 74945999e..6c79d20a4 100644
---- a/src/hotspot/os/linux/os_linux.cpp
-+++ b/src/hotspot/os/linux/os_linux.cpp
-@@ -1903,7 +1903,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
-     {EM_PARISC,      EM_PARISC,  ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"},
-     {EM_68K,         EM_68K,     ELFCLASS32, ELFDATA2MSB, (char*)"M68k"},
-     {EM_AARCH64,     EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"},
--    {EM_RISCV,       EM_RISCV,   ELFCLASS64, ELFDATA2LSB, (char*)"RISC-V"},
-+#ifdef _LP64
-+    {EM_RISCV,       EM_RISCV,   ELFCLASS64, ELFDATA2LSB, (char*)"RISC-V64"},
-+#else
-+    {EM_RISCV,       EM_RISCV,   ELFCLASS32, ELFDATA2LSB, (char*)"RISC-V32"},
-+#endif
-     {EM_LOONGARCH,   EM_LOONGARCH, ELFCLASS64, ELFDATA2LSB, (char*)"LoongArch"},
-   };
- 
-@@ -2735,6 +2739,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) {
-   strncpy(cpuinfo, "IA64", length);
- #elif defined(PPC)
-   strncpy(cpuinfo, "PPC64", length);
-+#elif defined(RISCV)
-+  strncpy(cpuinfo, LP64_ONLY("RISCV64") NOT_LP64("RISCV32"), length);
- #elif defined(S390)
-   strncpy(cpuinfo, "S390", length);
- #elif defined(SPARC)
-@@ -3966,7 +3972,8 @@ size_t os::Linux::find_large_page_size() {
-     IA64_ONLY(256 * M)
-     PPC_ONLY(4 * M)
-     S390_ONLY(1 * M)
--    SPARC_ONLY(4 * M);
-+    SPARC_ONLY(4 * M)
-+    RISCV64_ONLY(2 * M);
- #endif // ZERO
- 
-   FILE *fp = fopen("/proc/meminfo", "r");
-diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
-new file mode 100644
-index 000000000..961fff011
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
-@@ -0,0 +1,113 @@
-+/*
-+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
++void TemplateTable::prepare_invoke(int byte_no,
++                                   Register method, // linked method (or i-klass)
++                                   Register index,  // itable index, MethodType, etc.
++                                   Register recv,   // if caller wants to see it
++                                   Register flags   // if caller wants to test it
++                                   ) {
++  // determine flags
++  const Bytecodes::Code code = bytecode();
++  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
++  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
++  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
++  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
++  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
++  const bool load_receiver       = (recv  != noreg);
++  const bool save_flags          = (flags != noreg);
++  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
++  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
++  assert(flags == noreg || flags == x13, "");
++  assert(recv  == noreg || recv  == x12, "");
 +
-+#include "vm_version_riscv.hpp"
++  // setup registers & access constant pool cache
++  if (recv == noreg) {
++    recv = x12;
++  }
++  if (flags == noreg) {
++    flags = x13;
++  }
++  assert_different_registers(method, index, recv, flags);
 +
-+// Implementation of class atomic
-+// Note that memory_order_conservative requires a full barrier after atomic stores.
-+// See https://patchwork.kernel.org/patch/3575821/
++  // save 'interpreter return address'
++  __ save_bcp();
 +
-+#define FULL_MEM_BARRIER  __sync_synchronize()
-+#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
-+#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
++  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
 +
-+template<size_t byte_size>
-+struct Atomic::PlatformAdd
-+  : public Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
-+{
-+  template<typename I, typename D>
-+  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
-+    D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
-+    FULL_MEM_BARRIER;
-+    return res;
++  // maybe push appendix to arguments (just before return address)
++  if (is_invokedynamic || is_invokehandle) {
++    Label L_no_push;
++    __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::has_appendix_shift);
++    __ beqz(t0, L_no_push);
++    // Push the appendix as a trailing parameter.
++    // This must be done before we get the receiver,
++    // since the parameter_size includes it.
++    __ push_reg(x9);
++    __ mv(x9, index);
++    __ load_resolved_reference_at_index(index, x9);
++    __ pop_reg(x9);
++    __ push_reg(index);  // push appendix (MethodType, CallSite, etc.)
++    __ bind(L_no_push);
 +  }
-+};
-+
-+template<size_t byte_size>
-+template<typename T>
-+inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value,
-+                                                     T volatile* dest,
-+                                                     atomic_memory_order order) const {
-+  STATIC_ASSERT(byte_size == sizeof(T));
-+  T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
-+  FULL_MEM_BARRIER;
-+  return res;
-+}
 +
-+// No direct support for cmpxchg of bytes; emulate using int.
-+template<size_t byte_size>
-+template<typename T>
-+inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
-+                                                        T volatile* dest,
-+                                                        T compare_value,
-+                                                        atomic_memory_order order) const {
-+  STATIC_ASSERT(byte_size == sizeof(T));
-+  T value = compare_value;
-+  if (order != memory_order_relaxed) {
-+    FULL_MEM_BARRIER;
++  // load receiver if needed (note: no return address pushed yet)
++  if (load_receiver) {
++    __ andi(recv, flags, ConstantPoolCacheEntry::parameter_size_mask); // parameter_size_mask = 1 << 8
++    __ shadd(t0, recv, esp, t0, 3);
++    __ ld(recv, Address(t0, -Interpreter::expr_offset_in_bytes(1)));
++    __ verify_oop(recv);
 +  }
 +
-+  __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */ false,
-+                            __ATOMIC_RELAXED, __ATOMIC_RELAXED);
++  // compute return type
++  __ slli(t1, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
++  __ srli(t1, t1, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> t1:0~3
 +
-+  if (order != memory_order_relaxed) {
-+    FULL_MEM_BARRIER;
++  // load return address
++  {
++    const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
++    __ mv(t0, table_addr);
++    __ shadd(t0, t1, t0, t1, 3);
++    __ ld(ra, Address(t0, 0));
 +  }
-+  return value;
 +}
 +
-+template<>
-+template<typename T>
-+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
-+                                                T volatile* dest,
-+                                                T compare_value,
-+                                                atomic_memory_order order) const {
-+  STATIC_ASSERT(4 == sizeof(T));
-+  if (order != memory_order_relaxed) {
-+    FULL_MEM_BARRIER;
-+  }
-+  T rv;
-+  int tmp;
-+  __asm volatile(
-+    "1:\n\t"
-+    " addiw     %[tmp], %[cv], 0\n\t" // make sure compare_value signed_extend
-+    " lr.w.aq   %[rv], (%[dest])\n\t"
-+    " bne       %[rv], %[tmp], 2f\n\t"
-+    " sc.w.rl   %[tmp], %[ev], (%[dest])\n\t"
-+    " bnez      %[tmp], 1b\n\t"
-+    "2:\n\t"
-+    : [rv] "=&r" (rv), [tmp] "=&r" (tmp)
-+    : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value)
-+    : "memory");
-+  if (order != memory_order_relaxed) {
-+    FULL_MEM_BARRIER;
-+  }
-+  return rv;
-+}
++void TemplateTable::invokevirtual_helper(Register index,
++                                         Register recv,
++                                         Register flags)
++{
++  // Uses temporary registers x10, x13
++  assert_different_registers(index, recv, x10, x13);
++  // Test for an invoke of a final method
++  Label notFinal;
++  __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::is_vfinal_shift);
++  __ beqz(t0, notFinal);
 +
-+#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp
-new file mode 100644
-index 000000000..44f04d1a9
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp
-@@ -0,0 +1,44 @@
-+/*
-+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ */
++  const Register method = index;  // method must be xmethod
++  assert(method == xmethod, "Method must be xmethod for interpreter calling convention");
 +
-+#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP
-+#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP
++  // do the call - the index is actually the method to call
++  // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
 +
-+#include <byteswap.h>
++  // It's final, need a null check here!
++  __ null_check(recv);
 +
-+// Efficient swapping of data bytes from Java byte
-+// ordering to native byte ordering and vice versa.
-+inline u2 Bytes::swap_u2(u2 x) {
-+  return bswap_16(x);
-+}
++  // profile this call
++  __ profile_final_call(x10);
++  __ profile_arguments_type(x10, method, x14, true);
 +
-+inline u4 Bytes::swap_u4(u4 x) {
-+  return bswap_32(x);
-+}
++  __ jump_from_interpreted(method);
 +
-+inline u8 Bytes::swap_u8(u8 x) {
-+  return bswap_64(x);
-+}
++  __ bind(notFinal);
 +
-+#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
-new file mode 100644
-index 000000000..645b40a7c
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
-@@ -0,0 +1,116 @@
-+/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // get receiver klass
++  __ null_check(recv, oopDesc::klass_offset_in_bytes());
++  __ load_klass(x10, recv);
 +
-+#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
-+#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
++  // profile this call
++  __ profile_virtual_call(x10, xlocals, x13);
 +
-+static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  (void)memmove(to, from, count * HeapWordSize);
++  // get target Method & entry point
++  __ lookup_virtual_method(x10, index, method);
++  __ profile_arguments_type(x13, method, x14, true);
++  __ jump_from_interpreted(method);
 +}
 +
-+static inline void pd_disjoint_words_helper(const HeapWord* from, HeapWord* to, size_t count, bool is_atomic) {
-+  switch (count) {
-+    case 8:  to[7] = from[7];   // fall through
-+    case 7:  to[6] = from[6];   // fall through
-+    case 6:  to[5] = from[5];   // fall through
-+    case 5:  to[4] = from[4];   // fall through
-+    case 4:  to[3] = from[3];   // fall through
-+    case 3:  to[2] = from[2];   // fall through
-+    case 2:  to[1] = from[1];   // fall through
-+    case 1:  to[0] = from[0];   // fall through
-+    case 0:  break;
-+    default:
-+      if(is_atomic) {
-+        while (count-- > 0) { *to++ = *from++; }
-+      } else {
-+        memcpy(to, from, count * HeapWordSize);
-+      }
-+  }
-+}
++void TemplateTable::invokevirtual(int byte_no)
++{
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
 +
-+static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  pd_disjoint_words_helper(from, to, count, false);
-+}
++  prepare_invoke(byte_no, xmethod, noreg, x12, x13);
 +
-+static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
-+  pd_disjoint_words_helper(from, to, count, true);
-+}
++  // xmethod: index (actually a Method*)
++  // x12: receiver
++  // x13: flags
 +
-+static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  pd_conjoint_words(from, to, count);
++  invokevirtual_helper(xmethod, x12, x13);
 +}
 +
-+static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  pd_disjoint_words(from, to, count);
-+}
++void TemplateTable::invokespecial(int byte_no)
++{
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
 +
-+static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
-+  (void)memmove(to, from, count);
++  prepare_invoke(byte_no, xmethod, noreg,  // get f1 Method*
++                 x12);  // get receiver also for null check
++  __ verify_oop(x12);
++  __ null_check(x12);
++  // do the call
++  __ profile_call(x10);
++  __ profile_arguments_type(x10, xmethod, xbcp, false);
++  __ jump_from_interpreted(xmethod);
 +}
 +
-+static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
-+  pd_conjoint_bytes(from, to, count);
-+}
++void TemplateTable::invokestatic(int byte_no)
++{
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this arugment");
 +
-+static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
-+  _Copy_conjoint_jshorts_atomic(from, to, count);
++  prepare_invoke(byte_no, xmethod);  // get f1 Method*
++  // do the call
++  __ profile_call(x10);
++  __ profile_arguments_type(x10, xmethod, x14, false);
++  __ jump_from_interpreted(xmethod);
 +}
 +
-+static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
-+  _Copy_conjoint_jints_atomic(from, to, count);
++void TemplateTable::fast_invokevfinal(int byte_no)
++{
++  __ call_Unimplemented();
 +}
 +
-+static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
-+  _Copy_conjoint_jlongs_atomic(from, to, count);
-+}
++void TemplateTable::invokeinterface(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
 +
-+static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
-+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size.");
-+  _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
-+}
++  prepare_invoke(byte_no, x10, xmethod,  // get f1 Klass*, f2 Method*
++                 x12, x13);  // recv, flags
 +
-+static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_bytes(from, to, count);
-+}
++  // x10: interface klass (from f1)
++  // xmethod: method (from f2)
++  // x12: receiver
++  // x13: flags
 +
-+static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_jshorts(from, to, count);
-+}
++  // First check for Object case, then private interface method,
++  // then regular interface method.
 +
-+static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_jints(from, to, count);
-+}
++  // Special case of invokeinterface called for virtual method of
++  // java.lang.Object. See cpCache.cpp for details
++  Label notObjectMethod;
++  __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_forced_virtual_shift);
++  __ beqz(t0, notObjectMethod);
 +
-+static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_jlongs(from, to, count);
-+}
++  invokevirtual_helper(xmethod, x12, x13);
++  __ bind(notObjectMethod);
 +
-+static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
-+  assert(!UseCompressedOops, "foo!");
-+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
-+  _Copy_arrayof_conjoint_jlongs(from, to, count);
-+}
++  Label no_such_interface;
 +
-+#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
-new file mode 100644
-index 000000000..041cdf4ff
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
-@@ -0,0 +1,43 @@
-+/*
-+ * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // Check for private method invocation - indicated by vfinal
++  Label notVFinal;
++  __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_vfinal_shift);
++  __ beqz(t0, notVFinal);
 +
-+#ifndef OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
++  // Check receiver klass into x13 - also a null check
++  __ null_check(x12, oopDesc::klass_offset_in_bytes());
++  __ load_klass(x13, x12);
 +
-+// Sets the default values for platform dependent flags used by the runtime system.
-+// (see globals.hpp)
++  Label subtype;
++  __ check_klass_subtype(x13, x10, x14, subtype);
++  // If we get here the typecheck failed
++  __ j(no_such_interface);
++  __ bind(subtype);
 +
-+define_pd_global(bool,  DontYieldALot,            false);
-+define_pd_global(intx,  ThreadStackSize,          2048); // 0 => use system default
-+define_pd_global(intx,  VMThreadStackSize,        2048);
++  __ profile_final_call(x10);
++  __ profile_arguments_type(x10, xmethod, x14, true);
++  __ jump_from_interpreted(xmethod);
 +
-+define_pd_global(intx,  CompilerThreadStackSize,  2048);
++  __ bind(notVFinal);
 +
-+define_pd_global(uintx, JVMInvokeMethodSlack,     8192);
++  // Get receiver klass into x13 - also a null check
++  __ restore_locals();
++  __ null_check(x12, oopDesc::klass_offset_in_bytes());
++  __ load_klass(x13, x12);
 +
-+// Used on 64 bit platforms for UseCompressedOops base address
-+define_pd_global(uintx, HeapBaseMinAddress,       2 * G);
++  Label no_such_method;
 +
-+#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
-new file mode 100644
-index 000000000..842aa51e0
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
-@@ -0,0 +1,73 @@
-+/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // Preserve method for the throw_AbstractMethodErrorVerbose.
++  __ mv(x28, xmethod);
++  // Receiver subtype check against REFC.
++  // Superklass in x10. Subklass in x13. Blows t1, x30
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             x13, x10, noreg,
++                             // outputs: scan temp. reg, scan temp. reg
++                             t1, x30,
++                             no_such_interface,
++                             /*return_method=*/false);
 +
-+#ifndef OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
++  // profile this call
++  __ profile_virtual_call(x13, x30, x9);
 +
-+// Included in orderAccess.hpp header file.
++  // Get declaring interface class from method, and itable index
++  __ load_method_holder(x10, xmethod);
++  __ lwu(xmethod, Address(xmethod, Method::itable_index_offset()));
++  __ subw(xmethod, xmethod, Method::itable_index_max);
++  __ negw(xmethod, xmethod);
 +
-+#include "vm_version_riscv.hpp"
++  // Preserve recvKlass for throw_AbstractMethodErrorVerbose
++  __ mv(xlocals, x13);
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             xlocals, x10, xmethod,
++                             // outputs: method, scan temp. reg
++                             xmethod, x30,
++                             no_such_interface);
 +
-+// Implementation of class OrderAccess.
++  // xmethod: Method to call
++  // x12: receiver
++  // Check for abstract method error
++  // Note: This should be done more efficiently via a throw_abstract_method_error
++  //       interpreter entry point and a conditional jump to it in case of a null
++  //       method.
++  __ beqz(xmethod, no_such_method);
 +
-+inline void OrderAccess::loadload()   { acquire(); }
-+inline void OrderAccess::storestore() { release(); }
-+inline void OrderAccess::loadstore()  { acquire(); }
-+inline void OrderAccess::storeload()  { fence(); }
++  __ profile_arguments_type(x13, xmethod, x30, true);
 +
-+inline void OrderAccess::acquire() {
-+  READ_MEM_BARRIER;
-+}
++  // do the call
++  // x12: receiver
++  // xmethod: Method
++  __ jump_from_interpreted(xmethod);
++  __ should_not_reach_here();
 +
-+inline void OrderAccess::release() {
-+  WRITE_MEM_BARRIER;
-+}
++  // exception handling code follows ...
++  // note: must restore interpreter registers to canonical
++  //       state for exception handling to work correctly!
 +
-+inline void OrderAccess::fence() {
-+  FULL_MEM_BARRIER;
++  __ bind(no_such_method);
++  // throw exception
++  __ restore_bcp();    // bcp must be correct for exception handler   (was destroyed)
++  __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
++  // Pass arguments for generating a verbose error message.
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), x13, x28);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  __ bind(no_such_interface);
++  // throw exceptiong
++  __ restore_bcp();    // bcp must be correct for exception handler   (was destroyed)
++  __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
++  // Pass arguments for generating a verbose error message.
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                     InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), x13, x10);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++  return;
 +}
 +
-+template<size_t byte_size>
-+struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
-+{
-+  template <typename T>
-+  T operator()(const volatile T* p) const { T data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-+};
++void TemplateTable::invokehandle(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
 +
-+template<size_t byte_size>
-+struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X>
-+{
-+  template <typename T>
-+  void operator()(T v, volatile T* p) const { __atomic_store(p, &v, __ATOMIC_RELEASE); }
-+};
++  prepare_invoke(byte_no, xmethod, x10, x12);
++  __ verify_method_ptr(x12);
++  __ verify_oop(x12);
++  __ null_check(x12);
 +
-+template<size_t byte_size>
-+struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
-+{
-+  template <typename T>
-+  void operator()(T v, volatile T* p) const { release_store(p, v); fence(); }
-+};
++  // FIXME: profile the LambdaForm also
 +
-+#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-new file mode 100644
-index 000000000..37947701b
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-@@ -0,0 +1,628 @@
-+/*
-+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // x30 is safe to use here as a temp reg because it is about to
++  // be clobbered by jump_from_interpreted().
++  __ profile_final_call(x30);
++  __ profile_arguments_type(x30, xmethod, x14, true);
 +
-+// no precompiled headers
-+#include "asm/macroAssembler.hpp"
-+#include "classfile/classLoader.hpp"
-+#include "classfile/systemDictionary.hpp"
-+#include "classfile/vmSymbols.hpp"
-+#include "code/codeCache.hpp"
-+#include "code/icBuffer.hpp"
-+#include "code/nativeInst.hpp"
-+#include "code/vtableStubs.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "jvm.h"
-+#include "memory/allocation.inline.hpp"
-+#include "os_share_linux.hpp"
-+#include "prims/jniFastGetField.hpp"
-+#include "prims/jvm_misc.hpp"
-+#include "runtime/arguments.hpp"
-+#include "runtime/extendedPC.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/interfaceSupport.inline.hpp"
-+#include "runtime/java.hpp"
-+#include "runtime/javaCalls.hpp"
-+#include "runtime/mutexLocker.hpp"
-+#include "runtime/osThread.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/thread.inline.hpp"
-+#include "runtime/timer.hpp"
-+#include "utilities/debug.hpp"
-+#include "utilities/events.hpp"
-+#include "utilities/vmError.hpp"
++  __ jump_from_interpreted(xmethod);
++}
 +
-+// put OS-includes here
-+# include <dlfcn.h>
-+# include <errno.h>
-+# include <pthread.h>
-+# include <signal.h>
-+# include <stdio.h>
-+# include <stdlib.h>
-+# include <sys/mman.h>
-+# include <sys/resource.h>
-+# include <sys/socket.h>
-+# include <sys/stat.h>
-+# include <sys/time.h>
-+# include <sys/types.h>
-+# include <sys/utsname.h>
-+# include <sys/wait.h>
-+# include <poll.h>
-+# include <pwd.h>
-+# include <ucontext.h>
-+# include <unistd.h>
++void TemplateTable::invokedynamic(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
 +
-+#define REG_LR       1
-+#define REG_FP       8
++  prepare_invoke(byte_no, xmethod, x10);
 +
-+NOINLINE address os::current_stack_pointer() {
-+  return (address)__builtin_frame_address(0);
-+}
++  // x10: CallSite object (from cpool->resolved_references[])
++  // xmethod: MH.linkToCallSite method (from f2)
 +
-+char* os::non_memory_address_word() {
-+  // Must never look like an address returned by reserve_memory,
-+  return (char*) -1;
-+}
++  // Note: x10_callsite is already pushed by prepare_invoke
 +
-+address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
-+  return (address)uc->uc_mcontext.__gregs[REG_PC];
-+}
++  // %%% should make a type profile for any invokedynamic that takes a ref argument
++  // profile this call
++  __ profile_call(xbcp);
++  __ profile_arguments_type(x13, xmethod, x30, false);
 +
-+void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
-+  uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc;
-+}
++  __ verify_oop(x10);
 +
-+intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
-+  return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP];
++  __ jump_from_interpreted(xmethod);
 +}
 +
-+intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
-+  return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
-+}
++//-----------------------------------------------------------------------------
++// Allocation
 +
-+// For Forte Analyzer AsyncGetCallTrace profiling support - thread
-+// is currently interrupted by SIGPROF.
-+// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
-+// frames. Currently we don't do that on Linux, so it's the same as
-+// os::fetch_frame_from_context().
-+ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
-+                                                const ucontext_t* uc,
-+                                                intptr_t** ret_sp,
-+                                                intptr_t** ret_fp) {
++void TemplateTable::_new() {
++  transition(vtos, atos);
 +
-+  assert(thread != NULL, "just checking");
-+  assert(ret_sp != NULL, "just checking");
-+  assert(ret_fp != NULL, "just checking");
++  __ get_unsigned_2_byte_index_at_bcp(x13, 1);
++  Label slow_case;
++  Label done;
++  Label initialize_header;
++  Label initialize_object; // including clearing the fields
 +
-+  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
-+}
++  __ get_cpool_and_tags(x14, x10);
++  // Make sure the class we're about to instantiate has been resolved.
++  // This is done before loading InstanceKlass to be consistent with the order
++  // how Constant Pool is update (see ConstantPool::klass_at_put)
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++  __ add(t0, x10, x13);
++  __ la(t0, Address(t0, tags_offset));
++  __ membar(MacroAssembler::AnyAny);
++  __ lbu(t0, t0);
++  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++  __ sub(t1, t0, (u1)JVM_CONSTANT_Class);
++  __ bnez(t1, slow_case);
 +
-+ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
-+                                        intptr_t** ret_sp, intptr_t** ret_fp) {
++  // get InstanceKlass
++  __ load_resolved_klass_at_offset(x14, x13, x14, t0);
 +
-+  ExtendedPC  epc;
-+  const ucontext_t* uc = (const ucontext_t*)ucVoid;
++  // make sure klass is initialized & doesn't have finalizer
++  // make sure klass is fully initialized
++  __ lbu(t0, Address(x14, InstanceKlass::init_state_offset()));
++  __ sub(t1, t0, (u1)InstanceKlass::fully_initialized);
++  __ bnez(t1, slow_case);
 +
-+  if (uc != NULL) {
-+    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
-+    if (ret_sp != NULL) {
-+      *ret_sp = os::Linux::ucontext_get_sp(uc);
-+    }
-+    if (ret_fp != NULL) {
-+      *ret_fp = os::Linux::ucontext_get_fp(uc);
-+    }
-+  } else {
-+    // construct empty ExtendedPC for return value checking
-+    epc = ExtendedPC(NULL);
-+    if (ret_sp != NULL) {
-+      *ret_sp = (intptr_t *)NULL;
-+    }
-+    if (ret_fp != NULL) {
-+      *ret_fp = (intptr_t *)NULL;
-+    }
-+  }
++  // get instance_size in InstanceKlass (scaled to a count of bytes)
++  __ lwu(x13, Address(x14, Klass::layout_helper_offset()));
++  // test to see if it has a finalizer or is malformed in some way
++  __ andi(t0, x13, Klass::_lh_instance_slow_path_bit);
++  __ bnez(t0, slow_case);
 +
-+  return epc;
-+}
++  // Allocate the instance:
++  //  If TLAB is enabled:
++  //    Try to allocate in the TLAB.
++  //    If fails, go to the slow path.
++  //  Else If inline contiguous allocations are enabled:
++  //    Try to allocate in eden.
++  //    If fails due to heap end, go to slow path
++  //
++  //  If TLAB is enabled OR inline contiguous is enabled:
++  //    Initialize the allocation.
++  //    Exit.
++  //  Go to slow path.
++  const bool allow_shared_alloc = Universe::heap()->supports_inline_contig_alloc();
 +
-+frame os::fetch_frame_from_context(const void* ucVoid) {
-+  intptr_t* frame_sp = NULL;
-+  intptr_t* frame_fp = NULL;
-+  ExtendedPC epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp);
-+  return frame(frame_sp, frame_fp, epc.pc());
-+}
++  if (UseTLAB) {
++    __ tlab_allocate(x10, x13, 0, noreg, x11, slow_case);
 +
-+bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
-+  address pc = (address) os::Linux::ucontext_get_pc(uc);
-+  if (Interpreter::contains(pc)) {
-+    // interpreter performs stack banging after the fixed frame header has
-+    // been generated while the compilers perform it before. To maintain
-+    // semantic consistency between interpreted and compiled frames, the
-+    // method returns the Java sender of the current frame.
-+    *fr = os::fetch_frame_from_context(uc);
-+    if (!fr->is_first_java_frame()) {
-+      assert(fr->safe_for_sender(thread), "Safety check");
-+      *fr = fr->java_sender();
++    if (ZeroTLAB) {
++      // the fields have been already cleared
++      __ j(initialize_header);
++    } else {
++      // initialize both the header and fields
++      __ j(initialize_object);
 +    }
 +  } else {
-+    // more complex code with compiled code
-+    assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
-+    CodeBlob* cb = CodeCache::find_blob(pc);
-+    if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
-+      // Not sure where the pc points to, fallback to default
-+      // stack overflow handling
-+      return false;
-+    } else {
-+      // In compiled code, the stack banging is performed before RA
-+      // has been saved in the frame.  RA is live, and SP and FP
-+      // belong to the caller.
-+      intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc);
-+      intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc);
-+      address frame_pc = (address)(uintptr_t)(uc->uc_mcontext.__gregs[REG_LR] -
-+                         NativeInstruction::instruction_size);
-+      *fr = frame(frame_sp, frame_fp, frame_pc);
-+      if (!fr->is_java_frame()) {
-+        assert(fr->safe_for_sender(thread), "Safety check");
-+        assert(!fr->is_first_frame(), "Safety check");
-+        *fr = fr->java_sender();
-+      }
++    // Allocation in the shared Eden, if allowed.
++    //
++    // x13: instance size in bytes
++    if (allow_shared_alloc) {
++      __ eden_allocate(x10, x13, 0, x28, slow_case);
 +    }
 +  }
-+  assert(fr->is_java_frame(), "Safety check");
-+  return true;
-+}
 +
-+// By default, gcc always saves frame pointer rfp on this stack. This
-+// may get turned off by -fomit-frame-pointer.
-+frame os::get_sender_for_C_frame(frame* fr) {
-+  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
-+}
++  // If USETLAB or allow_shared_alloc are true, the object is created above and
++  // there is an initialized need. Otherwise, skip and go to the slow path.
++  if (UseTLAB || allow_shared_alloc) {
++    // The object is initialized before the header. If the object size is
++    // zero, go directly to the header initialization.
++    __ bind(initialize_object);
++    __ sub(x13, x13, sizeof(oopDesc));
++    __ beqz(x13, initialize_header);
 +
-+NOINLINE frame os::current_frame() {
-+  intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0);
-+  if(sender_sp != NULL) {
-+    frame myframe((intptr_t*)os::current_stack_pointer(),
-+                  sender_sp[frame::link_offset],
-+                  CAST_FROM_FN_PTR(address, os::current_frame));
-+    if (os::is_first_C_frame(&myframe)) {
-+      // stack is not walkable
-+      return frame();
-+    } else {
-+      return os::get_sender_for_C_frame(&myframe);
++    // Initialize obejct fields
++    {
++      __ add(x12, x10, sizeof(oopDesc));
++      Label loop;
++      __ bind(loop);
++      __ sd(zr, Address(x12));
++      __ add(x12, x12, BytesPerLong);
++      __ sub(x13, x13, BytesPerLong);
++      __ bnez(x13, loop);
 +    }
-+  } else {
-+    ShouldNotReachHere();
-+    return frame();
-+  }
-+}
-+
-+// Utility functions
-+extern "C" JNIEXPORT int
-+JVM_handle_linux_signal(int sig,
-+                        siginfo_t* info,
-+                        void* ucVoid,
-+                        int abort_if_unrecognized) {
-+  ucontext_t* uc = (ucontext_t*) ucVoid;
-+
-+  Thread* t = Thread::current_or_null_safe();
-+
-+  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
-+  // (no destructors can be run)
-+  os::ThreadCrashProtection::check_crash_protection(sig, t);
-+
-+  SignalHandlerMark shm(t);
-+
-+  // Note: it's not uncommon that JNI code uses signal/sigset to install
-+  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
-+  // or have a SIGILL handler when detecting CPU type). When that happens,
-+  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
-+  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
-+  // that do not require siginfo/ucontext first.
 +
-+  if (sig == SIGPIPE || sig == SIGXFSZ) {
-+    // allow chained handler to go first
-+    if (os::Linux::chained_handler(sig, info, ucVoid)) {
-+      return true;
-+    } else {
-+      // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
-+      return true;
-+    }
-+  }
++    // initialize object hader only.
++    __ bind(initialize_header);
++    __ mv(t0, (intptr_t)markWord::prototype().value());
++    __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes()));
++    __ store_klass_gap(x10, zr);   // zero klass gap for compressed oops
++    __ store_klass(x10, x14);      // store klass last
 +
-+#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
-+  if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
-+    if (handle_assert_poison_fault(ucVoid, info->si_addr)) {
-+      return 1;
++    {
++      SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
++      // Trigger dtrace event for fastpath
++      __ push(atos); // save the return value
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(oopDesc*)>(SharedRuntime::dtrace_object_alloc)), x10);
++      __ pop(atos); // restore the return value
 +    }
++    __ j(done);
 +  }
-+#endif
 +
-+  JavaThread* thread = NULL;
-+  VMThread* vmthread = NULL;
-+  if (os::Linux::signal_handlers_are_installed) {
-+    if (t != NULL ) {
-+      if(t->is_Java_thread()) {
-+        thread = (JavaThread*)t;
-+      } else if(t->is_VM_thread()) {
-+        vmthread = (VMThread *)t;
-+      }
-+    }
-+  }
++  // slow case
++  __ bind(slow_case);
++  __ get_constant_pool(c_rarg1);
++  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
++  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2);
++  __ verify_oop(x10);
 +
-+  // Handle SafeFetch faults
-+  if (uc != NULL) {
-+    address const pc = (address) os::Linux::ucontext_get_pc(uc);
-+    if (StubRoutines::is_safefetch_fault(pc)) {
-+      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
-+      return 1;
-+    }
-+  }
++  // continue
++  __ bind(done);
++  // Must prevent reordering of stores for object initialization with stores that publish the new object.
++  __ membar(MacroAssembler::StoreStore);
++}
 +
-+  // decide if this trap can be handled by a stub
-+  address stub = NULL;
++void TemplateTable::newarray() {
++  transition(itos, atos);
++  __ load_unsigned_byte(c_rarg1, at_bcp(1));
++  __ mv(c_rarg2, x10);
++  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
++          c_rarg1, c_rarg2);
++  // Must prevent reordering of stores for object initialization with stores that publish the new object.
++  __ membar(MacroAssembler::StoreStore);
++}
 +
-+  address pc = NULL;
++void TemplateTable::anewarray() {
++  transition(itos, atos);
++  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
++  __ get_constant_pool(c_rarg1);
++  __ mv(c_rarg3, x10);
++  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
++          c_rarg1, c_rarg2, c_rarg3);
++  // Must prevent reordering of stores for object initialization with stores that publish the new object.
++  __ membar(MacroAssembler::StoreStore);
++}
 +
-+  //%note os_trap_1
-+  if (info != NULL && uc != NULL && thread != NULL) {
-+    pc = (address) os::Linux::ucontext_get_pc(uc);
++void TemplateTable::arraylength() {
++  transition(atos, itos);
++  __ null_check(x10, arrayOopDesc::length_offset_in_bytes());
++  __ lwu(x10, Address(x10, arrayOopDesc::length_offset_in_bytes()));
++}
 +
-+    // Handle ALL stack overflow variations here
-+    if (sig == SIGSEGV) {
-+      address addr = (address) info->si_addr;
++void TemplateTable::checkcast()
++{
++  transition(atos, atos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++  __ beqz(x10, is_null);
 +
-+      // check if fault address is within thread stack
-+      if (thread->on_local_stack(addr)) {
-+        // stack overflow
-+        if (thread->in_stack_yellow_reserved_zone(addr)) {
-+          if (thread->thread_state() == _thread_in_Java) {
-+            if (thread->in_stack_reserved_zone(addr)) {
-+              frame fr;
-+              if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
-+                assert(fr.is_java_frame(), "Must be a Java frame");
-+                frame activation =
-+                  SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
-+                if (activation.sp() != NULL) {
-+                  thread->disable_stack_reserved_zone();
-+                  if (activation.is_interpreted_frame()) {
-+                    thread->set_reserved_stack_activation((address)(
-+                      activation.fp() + frame::interpreter_frame_initial_sp_offset));
-+                  } else {
-+                    thread->set_reserved_stack_activation((address)activation.unextended_sp());
-+                  }
-+                  return 1;
-+                }
-+              }
-+            }
-+            // Throw a stack overflow exception.  Guard pages will be reenabled
-+            // while unwinding the stack.
-+            thread->disable_stack_yellow_reserved_zone();
-+            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
-+          } else {
-+            // Thread was in the vm or native code.  Return and try to finish.
-+            thread->disable_stack_yellow_reserved_zone();
-+            return 1;
-+          }
-+        } else if (thread->in_stack_red_zone(addr)) {
-+          // Fatal red zone violation.  Disable the guard pages and fall through
-+          // to handle_unexpected_exception way down below.
-+          thread->disable_stack_red_zone();
-+          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
++  // Get cpool & tags index
++  __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array
++  __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index
++  // See if bytecode has already been quicked
++  __ add(t0, x13, Array<u1>::base_offset_in_bytes());
++  __ add(x11, t0, x9);
++  __ membar(MacroAssembler::AnyAny);
++  __ lbu(x11, x11);
++  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++  __ sub(t0, x11, (u1)JVM_CONSTANT_Class);
++  __ beqz(t0, quicked);
 +
-+          // This is a likely cause, but hard to verify. Let's just print
-+          // it as a hint.
-+          tty->print_raw_cr("Please check if any of your loaded .so files has "
-+                            "enabled executable stack (see man page execstack(8))");
-+        } else {
-+          // Accessing stack address below sp may cause SEGV if current
-+          // thread has MAP_GROWSDOWN stack. This should only happen when
-+          // current thread was created by user code with MAP_GROWSDOWN flag
-+          // and then attached to VM. See notes in os_linux.cpp.
-+          if (thread->osthread()->expanding_stack() == 0) {
-+             thread->osthread()->set_expanding_stack();
-+             if (os::Linux::manually_expand_stack(thread, addr)) {
-+               thread->osthread()->clear_expanding_stack();
-+               return 1;
-+             }
-+             thread->osthread()->clear_expanding_stack();
-+          } else {
-+             fatal("recursive segv. expanding stack.");
-+          }
-+        }
-+      }
-+    }
++  __ push(atos); // save receiver for result, and for GC
++  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  // vm_result_2 has metadata result
++  __ get_vm_result_2(x10, xthread);
++  __ pop_reg(x13); // restore receiver
++  __ j(resolved);
 +
-+    if (thread->thread_state() == _thread_in_Java) {
-+      // Java thread running in Java code => find exception handler if any
-+      // a fault inside compiled code, the interpreter, or a stub
++  // Get superklass in x10 and subklass in x13
++  __ bind(quicked);
++  __ mv(x13, x10); // Save object in x13; x10 needed for subtype check
++  __ load_resolved_klass_at_offset(x12, x9, x10, t0); // x10 = klass
 +
-+      // Handle signal from NativeJump::patch_verified_entry().
-+      if ((sig == SIGILL || sig == SIGTRAP)
-+          && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
-+        if (TraceTraps) {
-+          tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
-+        }
-+        stub = SharedRuntime::get_handle_wrong_method_stub();
-+      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
-+        stub = SharedRuntime::get_poll_stub(pc);
-+      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
-+        // BugId 4454115: A read from a MappedByteBuffer can fault
-+        // here if the underlying file has been truncated.
-+        // Do not crash the VM in such a case.
-+        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
-+        CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
-+        if (nm != NULL && nm->has_unsafe_access()) {
-+          address next_pc = pc + NativeCall::instruction_size;
-+          stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
-+        }
-+      } else if (sig == SIGFPE  &&
-+          (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
-+        stub =
-+          SharedRuntime::
-+          continuation_for_implicit_exception(thread,
-+                                              pc,
-+                                              SharedRuntime::
-+                                              IMPLICIT_DIVIDE_BY_ZERO);
-+      } else if (sig == SIGSEGV &&
-+                 !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
-+          // Determination of interpreter/vtable stub/compiled code null exception
-+          stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
-+      }
-+    } else if (thread->thread_state() == _thread_in_vm &&
-+               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
-+               thread->doing_unsafe_access()) {
-+      address next_pc = pc + NativeCall::instruction_size;
-+      stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
-+    }
++  __ bind(resolved);
++  __ load_klass(x9, x13);
 +
-+    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
-+    // and the heap gets shrunk before the field access.
-+    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
-+      address addr_slow = JNI_FastGetField::find_slowcase_pc(pc);
-+      if (addr_slow != (address)-1) {
-+        stub = addr_slow;
-+      }
-+    }
++  // Generate subtype check.  Blows x12, x15.  Object in x13.
++  // Superklass in x10.  Subklass in x9.
++  __ gen_subtype_check(x9, ok_is_subtype);
 +
-+    // Check to see if we caught the safepoint code in the
-+    // process of write protecting the memory serialization page.
-+    // It write enables the page immediately after protecting it
-+    // so we can just return to retry the write.
-+    if ((sig == SIGSEGV) &&
-+        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
-+      // Block current thread until the memory serialize page permission restored.
-+      os::block_on_serialize_page_trap();
-+      return true;
-+    }
-+  }
++  // Come here on failure
++  __ push_reg(x13);
++  // object is at TOS
++  __ j(Interpreter::_throw_ClassCastException_entry);
 +
-+  if (stub != NULL) {
-+    // save all thread context in case we need to restore it
-+    if (thread != NULL) {
-+      thread->set_saved_exception_pc(pc);
-+    }
++  // Come here on success
++  __ bind(ok_is_subtype);
++  __ mv(x10, x13); // Restore object in x13
 +
-+    os::Linux::ucontext_set_pc(uc, stub);
-+    return true;
++  // Collect counts on whether this test sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ j(done);
++    __ bind(is_null);
++    __ profile_null_seen(x12);
++  } else {
++    __ bind(is_null);   // same as 'done'
 +  }
++  __ bind(done);
++}
 +
-+  // signal-chaining
-+  if (os::Linux::chained_handler(sig, info, ucVoid)) {
-+     return true;
-+  }
++void TemplateTable::instanceof() {
++  transition(atos, itos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++  __ beqz(x10, is_null);
 +
-+  if (!abort_if_unrecognized) {
-+    // caller wants another chance, so give it to him
-+    return false;
-+  }
++  // Get cpool & tags index
++  __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array
++  __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index
++  // See if bytecode has already been quicked
++  __ add(t0, x13, Array<u1>::base_offset_in_bytes());
++  __ add(x11, t0, x9);
++  __ membar(MacroAssembler::AnyAny);
++  __ lbu(x11, x11);
++  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++  __ sub(t0, x11, (u1)JVM_CONSTANT_Class);
++  __ beqz(t0, quicked);
 +
-+  if (pc == NULL && uc != NULL) {
-+    pc = os::Linux::ucontext_get_pc(uc);
-+  }
++  __ push(atos); // save receiver for result, and for GC
++  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  // vm_result_2 has metadata result
++  __ get_vm_result_2(x10, xthread);
++  __ pop_reg(x13); // restore receiver
++  __ verify_oop(x13);
++  __ load_klass(x13, x13);
++  __ j(resolved);
 +
-+  // unmask current signal
-+  sigset_t newset;
-+  sigemptyset(&newset);
-+  sigaddset(&newset, sig);
-+  sigprocmask(SIG_UNBLOCK, &newset, NULL);
++  // Get superklass in x10 and subklass in x13
++  __ bind(quicked);
++  __ load_klass(x13, x10);
++  __ load_resolved_klass_at_offset(x12, x9, x10, t0);
 +
-+  VMError::report_and_die(t, sig, pc, info, ucVoid);
++  __ bind(resolved);
 +
-+  ShouldNotReachHere();
-+  return true; // Mute compiler
-+}
++  // Generate subtype check.  Blows x12, x15
++  // Superklass in x10.  Subklass in x13.
++  __ gen_subtype_check(x13, ok_is_subtype);
 +
-+void os::Linux::init_thread_fpu_state(void) {
-+}
++  // Come here on failure
++  __ mv(x10, zr);
++  __ j(done);
++  // Come here on success
++  __ bind(ok_is_subtype);
++  __ li(x10, 1);
 +
-+int os::Linux::get_fpu_control_word(void) {
-+  return 0;
++  // Collect counts on whether this test sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ j(done);
++    __ bind(is_null);
++    __ profile_null_seen(x12);
++  } else {
++    __ bind(is_null);   // same as 'done'
++  }
++  __ bind(done);
++  // x10 = 0: obj == NULL or  obj is not an instanceof the specified klass
++  // x10 = 1: obj != NULL and obj is     an instanceof the specified klass
 +}
 +
-+void os::Linux::set_fpu_control_word(int fpu_control) {
-+}
++//-----------------------------------------------------------------------------
++// Breakpoints
++void TemplateTable::_breakpoint() {
++  // Note: We get here even if we are single stepping..
++  // jbug inists on setting breakpoints at every bytecode
++  // even if we are in single step mode.
 +
++  transition(vtos, vtos);
 +
-+////////////////////////////////////////////////////////////////////////////////
-+// thread stack
++  // get the unpatched byte code
++  __ get_method(c_rarg1);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::get_original_bytecode_at),
++             c_rarg1, xbcp);
++  __ mv(x9, x10);
 +
-+// Minimum usable stack sizes required to get to user code. Space for
-+// HotSpot guard pages is added later.
-+size_t os::Posix::_compiler_thread_min_stack_allowed = 72 * K;
-+size_t os::Posix::_java_thread_min_stack_allowed = 72 * K;
-+size_t os::Posix::_vm_internal_thread_min_stack_allowed = 72 * K;
++  // post the breakpoint event
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
++             xmethod, xbcp);
 +
-+// return default stack size for thr_type
-+size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
-+  // default stack size (compiler thread needs larger stack)
-+  size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M);
-+  return s;
++  // complete the execution of original bytecode
++  __ mv(t0, x9);
++  __ dispatch_only_normal(vtos);
 +}
 +
-+/////////////////////////////////////////////////////////////////////////////
-+// helper functions for fatal error handler
++//-----------------------------------------------------------------------------
++// Exceptions
 +
-+static const char* reg_abi_names[] = {
-+  "pc",
-+  "x1(ra)", "x2(sp)", "x3(gp)", "x4(tp)",
-+  "x5(t0)", "x6(t1)", "x7(t2)",
-+  "x8(s0)", "x9(s1)",
-+  "x10(a0)", "x11(a1)", "x12(a2)", "x13(a3)", "x14(a4)", "x15(a5)", "x16(a6)", "x17(a7)",
-+  "x18(s2)", "x19(s3)", "x20(s4)", "x21(s5)", "x22(s6)", "x23(s7)", "x24(s8)", "x25(s9)", "x26(s10)", "x27(s11)",
-+  "x28(t3)", "x29(t4)","x30(t5)", "x31(t6)"
-+};
++void TemplateTable::athrow() {
++  transition(atos, vtos);
++  __ null_check(x10);
++  __ j(Interpreter::throw_exception_entry());
++}
 +
-+void os::print_context(outputStream *st, const void *context) {
-+  if (context == NULL) {
-+    return;
-+  }
++//-----------------------------------------------------------------------------
++// Synchronization
++//
++// Note: monitorenter & exit are symmetric routines; which is reflected
++//       in the assembly code structure as well
++//
++// Stack layout:
++//
++// [expressions  ] <--- esp               = expression stack top
++// ..
++// [expressions  ]
++// [monitor entry] <--- monitor block top = expression stack bot
++// ..
++// [monitor entry]
++// [frame data   ] <--- monitor block bot
++// ...
++// [saved fp     ] <--- fp
++void TemplateTable::monitorenter()
++{
++  transition(atos, vtos);
 +
-+  const ucontext_t *uc = (const ucontext_t*)context;
-+  st->print_cr("Registers:");
-+  for (int r = 0; r < 32; r++) {
-+    st->print("%-*.*s=", 8, 8, reg_abi_names[r]);
-+    print_location(st, uc->uc_mcontext.__gregs[r]);
-+  }
-+  st->cr();
++   // check for NULL object
++   __ null_check(x10);
 +
-+  intptr_t *frame_sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
-+  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(frame_sp));
-+  print_hex_dump(st, (address)frame_sp, (address)(frame_sp + 64), sizeof(intptr_t));
-+  st->cr();
++   const Address monitor_block_top(
++         fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++   const Address monitor_block_bot(
++         fp, frame::interpreter_frame_initial_sp_offset * wordSize);
++   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
 +
-+  // Note: it may be unsafe to inspect memory near pc. For example, pc may
-+  // point to garbage if entry point in an nmethod is corrupted. Leave
-+  // this at the end, and hope for the best.
-+  address pc = os::Linux::ucontext_get_pc(uc);
-+  print_instructions(st, pc, sizeof(char));
-+  st->cr();
-+}
++   Label allocated;
 +
-+void os::print_register_info(outputStream *st, const void *context) {
-+  if (context == NULL) {
-+    return;
-+  }
++   // initialize entry pointer
++   __ mv(c_rarg1, zr); // points to free slot or NULL
 +
-+  const ucontext_t *uc = (const ucontext_t*)context;
++   // find a free slot in the monitor block (result in c_rarg1)
++   {
++     Label entry, loop, exit, notUsed;
++     __ ld(c_rarg3, monitor_block_top); // points to current entry,
++                                        // starting with top-most entry
++     __ la(c_rarg2, monitor_block_bot); // points to word before bottom
 +
-+  st->print_cr("Register to memory mapping:");
-+  st->cr();
++     __ j(entry);
 +
-+  // this is horrendously verbose but the layout of the registers in the
-+  // context does not match how we defined our abstract Register set, so
-+  // we can't just iterate through the gregs area
++     __ bind(loop);
++     // check if current entry is used
++     // if not used then remember entry in c_rarg1
++     __ ld(t0, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()));
++     __ bnez(t0, notUsed);
++     __ mv(c_rarg1, c_rarg3);
++     __ bind(notUsed);
++     // check if current entry is for same object
++     // if same object then stop searching
++     __ beq(x10, t0, exit);
++     // otherwise advance to next entry
++     __ add(c_rarg3, c_rarg3, entry_size);
++     __ bind(entry);
++     // check if bottom reached
++     // if not at bottom then check this entry
++     __ bne(c_rarg3, c_rarg2, loop);
++     __ bind(exit);
++   }
 +
-+  // this is only for the "general purpose" registers
++   __ bnez(c_rarg1, allocated); // check if a slot has been found and
++                             // if found, continue with that on
 +
-+  for (int r = 0; r < 32; r++)
-+    st->print_cr("%-*.*s=" INTPTR_FORMAT, 8, 8, reg_abi_names[r], (uintptr_t)uc->uc_mcontext.__gregs[r]);
-+  st->cr();
-+}
++   // allocate one if there's no free slot
++   {
++     Label entry, loop;
++     // 1. compute new pointers            // esp: old expression stack top
++     __ ld(c_rarg1, monitor_block_bot);    // c_rarg1: old expression stack bottom
++     __ sub(esp, esp, entry_size);         // move expression stack top
++     __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom
++     __ mv(c_rarg3, esp);                  // set start value for copy loop
++     __ sd(c_rarg1, monitor_block_bot);    // set new monitor block bottom
++     __ sub(sp, sp, entry_size);           // make room for the monitor
 +
-+void os::setup_fpu() {
-+}
++     __ j(entry);
++     // 2. move expression stack contents
++     __ bind(loop);
++     __ ld(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack
++                                                   // word from old location
++     __ sd(c_rarg2, Address(c_rarg3, 0));          // and store it at new location
++     __ add(c_rarg3, c_rarg3, wordSize);           // advance to next word
++     __ bind(entry);
++     __ bne(c_rarg3, c_rarg1, loop);    // check if bottom reached.if not at bottom
++                                        // then copy next word
++   }
 +
-+#ifndef PRODUCT
-+void os::verify_stack_alignment() {
-+  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
-+}
-+#endif
++   // call run-time routine
++   // c_rarg1: points to monitor entry
++   __ bind(allocated);
 +
-+int os::extra_bang_size_in_bytes() {
-+  return 0;
++   // Increment bcp to point to the next bytecode, so exception
++   // handling for async. exceptions work correctly.
++   // The object has already been poped from the stack, so the
++   // expression stack looks correct.
++   __ addi(xbcp, xbcp, 1);
++
++   // store object
++   __ sd(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
++   __ lock_object(c_rarg1);
++
++   // check to make sure this monitor doesn't cause stack overflow after locking
++   __ save_bcp();  // in case of exception
++   __ generate_stack_overflow_check(0);
++
++   // The bcp has already been incremented. Just need to dispatch to
++   // next instruction.
++   __ dispatch_next(vtos);
 +}
 +
-+extern "C" {
-+  int SpinPause() {
-+    return 0;
-+  }
++void TemplateTable::monitorexit()
++{
++  transition(atos, vtos);
 +
-+  void _Copy_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
-+    if (from > to) {
-+      const jshort *end = from + count;
-+      while (from < end) {
-+        *(to++) = *(from++);
-+      }
-+    } else if (from < to) {
-+      const jshort *end = from;
-+      from += count - 1;
-+      to   += count - 1;
-+      while (from >= end) {
-+        *(to--) = *(from--);
-+      }
-+    }
-+  }
-+  void _Copy_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
-+    if (from > to) {
-+      const jint *end = from + count;
-+      while (from < end) {
-+        *(to++) = *(from++);
-+      }
-+    } else if (from < to) {
-+      const jint *end = from;
-+      from += count - 1;
-+      to   += count - 1;
-+      while (from >= end) {
-+        *(to--) = *(from--);
-+      }
-+    }
-+  }
-+  void _Copy_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
-+    if (from > to) {
-+      const jlong *end = from + count;
-+      while (from < end) {
-+        os::atomic_copy64(from++, to++);
-+      }
-+    } else if (from < to) {
-+      const jlong *end = from;
-+      from += count - 1;
-+      to   += count - 1;
-+      while (from >= end) {
-+        os::atomic_copy64(from--, to--);
-+      }
-+    }
-+  }
-+
-+  void _Copy_arrayof_conjoint_bytes(const HeapWord* from,
-+                                    HeapWord* to,
-+                                    size_t    count) {
-+    memmove(to, from, count);
-+  }
-+  void _Copy_arrayof_conjoint_jshorts(const HeapWord* from,
-+                                      HeapWord* to,
-+                                      size_t    count) {
-+    memmove(to, from, count * 2);
-+  }
-+  void _Copy_arrayof_conjoint_jints(const HeapWord* from,
-+                                    HeapWord* to,
-+                                    size_t    count) {
-+    memmove(to, from, count * 4);
-+  }
-+  void _Copy_arrayof_conjoint_jlongs(const HeapWord* from,
-+                                     HeapWord* to,
-+                                     size_t    count) {
-+    memmove(to, from, count * 8);
-+  }
-+};
-diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
-new file mode 100644
-index 000000000..eae1635b0
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
-@@ -0,0 +1,40 @@
-+/*
-+ * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // check for NULL object
++  __ null_check(x10);
 +
-+#ifndef OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
++  const Address monitor_block_top(
++        fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  const Address monitor_block_bot(
++        fp, frame::interpreter_frame_initial_sp_offset * wordSize);
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
 +
-+  static void setup_fpu();
++  Label found;
 +
-+  // Used to register dynamic code cache area with the OS
-+  // Note: Currently only used in 64 bit Windows implementations
-+  static bool register_code_area(char *low, char *high) { return true; }
++  // find matching slot
++  {
++    Label entry, loop;
++    __ ld(c_rarg1, monitor_block_top); // points to current entry,
++                                        // starting with top-most entry
++    __ la(c_rarg2, monitor_block_bot); // points to word before bottom
++                                        // of monitor block
++    __ j(entry);
 +
-+  // Atomically copy 64 bits of data
-+  static void atomic_copy64(const volatile void *src, volatile void *dst) {
-+    *(jlong *) dst = *(const jlong *) src;
++    __ bind(loop);
++    // check if current entry is for same object
++    __ ld(t0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
++    // if same object then stop searching
++    __ beq(x10, t0, found);
++    // otherwise advance to next entry
++    __ add(c_rarg1, c_rarg1, entry_size);
++    __ bind(entry);
++    // check if bottom reached
++    // if not at bottom then check this entry
++    __ bne(c_rarg1, c_rarg2, loop);
 +  }
 +
-+#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
++  // error handling. Unlocking was not block-structured
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                   InterpreterRuntime::throw_illegal_monitor_state_exception));
++  __ should_not_reach_here();
++
++  // call run-time routine
++  __ bind(found);
++  __ push_ptr(x10); // make sure object is on stack (contract with oopMaps)
++  __ unlock_object(c_rarg1);
++  __ pop_ptr(x10); // discard object
++}
++
++// Wide instructions
++void TemplateTable::wide()
++{
++  __ load_unsigned_byte(x9, at_bcp(1));
++  __ mv(t0, (address)Interpreter::_wentry_point);
++  __ shadd(t0, x9, t0, t1, 3);
++  __ ld(t0, Address(t0));
++  __ jr(t0);
++}
++
++// Multi arrays
++void TemplateTable::multianewarray() {
++  transition(vtos, atos);
++  __ load_unsigned_byte(x10, at_bcp(3)); // get number of dimensions
++  // last dim is on top of stack; we want address of first one:
++  // first_addr = last_addr + (ndims - 1) * wordSize
++  __ shadd(c_rarg1, x10, esp, c_rarg1, 3);
++  __ sub(c_rarg1, c_rarg1, wordSize);
++  call_VM(x10,
++          CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
++          c_rarg1);
++  __ load_unsigned_byte(x11, at_bcp(3));
++  __ shadd(esp, x11, esp, t0, 3);
++}
+diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
 new file mode 100644
-index 000000000..82b9bb6fd
+index 00000000000..fcc86108d28
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
-@@ -0,0 +1,38 @@
++++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
+@@ -0,0 +1,42 @@
 +/*
-+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -55710,27 +55715,30 @@ index 000000000..82b9bb6fd
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
-+#define OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
-+
-+#include "runtime/prefetch.hpp"
-+
++#ifndef CPU_RISCV_TEMPLATETABLE_RISCV_HPP
++#define CPU_RISCV_TEMPLATETABLE_RISCV_HPP
 +
-+inline void Prefetch::read (void *loc, intx interval) {
-+}
++static void prepare_invoke(int byte_no,
++                           Register method,         // linked method (or i-klass)
++                           Register index = noreg,  // itable index, MethodType, etc.
++                           Register recv  = noreg,  // if caller wants to see it
++                           Register flags = noreg   // if caller wants to test it
++                           );
++static void invokevirtual_helper(Register index, Register recv,
++                                 Register flags);
 +
-+inline void Prefetch::write(void *loc, intx interval) {
-+}
++// Helpers
++static void index_check(Register array, Register index);
 +
-+#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
++#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
 new file mode 100644
-index 000000000..c78096931
+index 00000000000..4f50adb05c3
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-@@ -0,0 +1,103 @@
++++ b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
+@@ -0,0 +1,33 @@
 +/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -55755,91 +55763,21 @@ index 000000000..c78096931
 + */
 +
 +#include "precompiled.hpp"
-+#include "memory/metaspaceShared.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/thread.inline.hpp"
-+
-+frame JavaThread::pd_last_frame() {
-+  assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
-+  return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
-+}
-+
-+// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
-+// currently interrupted by SIGPROF
-+bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
-+  void* ucontext, bool isInJava) {
-+
-+  assert(Thread::current() == this, "caller must be current thread");
-+  return pd_get_top_frame(fr_addr, ucontext, isInJava);
-+}
-+
-+bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
-+  return pd_get_top_frame(fr_addr, ucontext, isInJava);
-+}
-+
-+bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
-+  assert(this->is_Java_thread(), "must be JavaThread");
-+  JavaThread* jt = (JavaThread *)this;
-+
-+  // If we have a last_Java_frame, then we should use it even if
-+  // isInJava == true.  It should be more reliable than ucontext info.
-+  if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
-+    *fr_addr = jt->pd_last_frame();
-+    return true;
-+  }
-+
-+  // At this point, we don't have a last_Java_frame, so
-+  // we try to glean some information out of the ucontext
-+  // if we were running Java code when SIGPROF came in.
-+  if (isInJava) {
-+    ucontext_t* uc = (ucontext_t*) ucontext;
-+
-+    intptr_t* ret_fp = NULL;
-+    intptr_t* ret_sp = NULL;
-+    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
-+      &ret_sp, &ret_fp);
-+    if (addr.pc() == NULL || ret_sp == NULL ) {
-+      // ucontext wasn't useful
-+      return false;
-+    }
-+
-+    if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) {
-+      // In the middle of a trampoline call. Bail out for safety.
-+      // This happens rarely so shouldn't affect profiling.
-+      return false;
-+    }
-+
-+    frame ret_frame(ret_sp, ret_fp, addr.pc());
-+    if (!ret_frame.safe_for_sender(jt)) {
-+#ifdef COMPILER2
-+      frame ret_frame2(ret_sp, NULL, addr.pc());
-+      if (!ret_frame2.safe_for_sender(jt)) {
-+        // nothing else to try if the frame isn't good
-+        return false;
-+      }
-+      ret_frame = ret_frame2;
-+#else
-+      // nothing else to try if the frame isn't good
-+      return false;
-+#endif /* COMPILER2 */
-+    }
-+    *fr_addr = ret_frame;
-+    return true;
-+  }
++#include "prims/universalNativeInvoker.hpp"
++#include "utilities/debug.hpp"
 +
-+  // nothing else to try
-+  return false;
++address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) {
++  Unimplemented();
++  return nullptr;
 +}
-+
-+void JavaThread::cache_global_variables() { }
-diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
+diff --git a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
 new file mode 100644
-index 000000000..657b98984
+index 00000000000..ce70da72f2e
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
-@@ -0,0 +1,67 @@
++++ b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
+@@ -0,0 +1,42 @@
 +/*
-+ * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -55863,56 +55801,31 @@ index 000000000..657b98984
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
-+
-+ private:
-+  void pd_initialize() {
-+    _anchor.clear();
-+  }
-+
-+  frame pd_last_frame();
-+
-+ public:
-+  // Mutators are highly dangerous....
-+  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
-+  void  set_last_Java_fp(intptr_t* java_fp)      { _anchor.set_last_Java_fp(java_fp);   }
-+
-+  void set_base_of_stack_pointer(intptr_t* base_sp) {
-+  }
-+
-+  static ByteSize last_Java_fp_offset()          {
-+    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
-+  }
-+
-+  intptr_t* base_of_stack_pointer() {
-+    return NULL;
-+  }
-+  void record_base_of_stack_pointer() {
-+  }
-+
-+  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
-+    bool isInJava);
++#include "precompiled.hpp"
++#include "prims/universalUpcallHandler.hpp"
++#include "utilities/debug.hpp"
 +
-+  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
-+private:
-+  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
++address ProgrammableUpcallHandler::generate_upcall_stub(jobject jrec, jobject jabi, jobject jlayout) {
++  Unimplemented();
++  return nullptr;
++}
 +
-+  // These routines are only used on cpu architectures that
-+  // have separate register stacks (Itanium).
-+  static bool register_stack_overflow() { return false; }
-+  static void enable_register_stack_guard() {}
-+  static void disable_register_stack_guard() {}
++address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) {
++  ShouldNotCallThis();
++  return nullptr;
++}
 +
-+#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
++bool ProgrammableUpcallHandler::supports_optimized_upcalls() {
++  return false;
++}
+diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
 new file mode 100644
-index 000000000..8ee443b5d
+index 00000000000..6c89133de02
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
-@@ -0,0 +1,55 @@
++++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
+@@ -0,0 +1,42 @@
 +/*
-+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -55936,45 +55849,32 @@ index 000000000..8ee443b5d
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
++#ifndef CPU_RISCV_VMSTRUCTS_RISCV_HPP
++#define CPU_RISCV_VMSTRUCTS_RISCV_HPP
 +
-+// These are the OS and CPU-specific fields, types and integer
++// These are the CPU-specific fields, types and integer
 +// constants required by the Serviceability Agent. This file is
 +// referenced by vmStructs.cpp.
 +
-+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
-+                                                                                                                                     \
-+  /******************************/                                                                                                   \
-+  /* Threads (NOTE: incomplete) */                                                                                                   \
-+  /******************************/                                                                                                   \
-+  nonstatic_field(OSThread,                      _thread_id,                                      OSThread::thread_id_t)             \
-+  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
-+
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++  volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
 +
-+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
-+                                                                          \
-+  /**********************/                                                \
-+  /* Posix Thread IDs   */                                                \
-+  /**********************/                                                \
-+                                                                          \
-+  declare_integer_type(OSThread::thread_id_t)                             \
-+  declare_unsigned_integer_type(pthread_t)
++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
 +
-+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 +
-+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 +
-+#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
++#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
 new file mode 100644
-index 000000000..ef9358aa0
+index 00000000000..768c7633ca6
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
-@@ -0,0 +1,116 @@
++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+@@ -0,0 +1,230 @@
 +/*
-+ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -55998,2163 +55898,219 @@ index 000000000..ef9358aa0
 + */
 +
 +#include "precompiled.hpp"
-+#include "asm/register.hpp"
++#include "runtime/java.hpp"
 +#include "runtime/os.hpp"
-+#include "runtime/os.inline.hpp"
 +#include "runtime/vm_version.hpp"
++#include "utilities/formatBuffer.hpp"
++#include "utilities/macros.hpp"
 +
-+#include <asm/hwcap.h>
-+#include <sys/auxv.h>
++#include OS_HEADER_INLINE(os)
 +
-+#ifndef HWCAP_ISA_I
-+#define HWCAP_ISA_I  (1 << ('I' - 'A'))
-+#endif
++const char* VM_Version::_uarch = "";
++uint32_t VM_Version::_initial_vector_length = 0;
 +
-+#ifndef HWCAP_ISA_M
-+#define HWCAP_ISA_M  (1 << ('M' - 'A'))
-+#endif
++void VM_Version::initialize() {
++  get_os_cpu_info();
 +
-+#ifndef HWCAP_ISA_A
-+#define HWCAP_ISA_A  (1 << ('A' - 'A'))
-+#endif
++  if (FLAG_IS_DEFAULT(UseFMA)) {
++    FLAG_SET_DEFAULT(UseFMA, true);
++  }
 +
-+#ifndef HWCAP_ISA_F
-+#define HWCAP_ISA_F  (1 << ('F' - 'A'))
-+#endif
++  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0);
++  }
 +
-+#ifndef HWCAP_ISA_D
-+#define HWCAP_ISA_D  (1 << ('D' - 'A'))
-+#endif
++  if (UseAES || UseAESIntrinsics) {
++    if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
++      warning("AES instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseAES, false);
++    }
++    if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
++      warning("AES intrinsics are not available on this CPU");
++      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++    }
++  }
 +
-+#ifndef HWCAP_ISA_C
-+#define HWCAP_ISA_C  (1 << ('C' - 'A'))
-+#endif
++  if (UseAESCTRIntrinsics) {
++    warning("AES/CTR intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
++  }
 +
-+#ifndef HWCAP_ISA_V
-+#define HWCAP_ISA_V  (1 << ('V' - 'A'))
-+#endif
++  if (UseSHA) {
++    warning("SHA instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
 +
-+#define read_csr(csr)                                           \
-+({                                                              \
-+        register unsigned long __v;                             \
-+        __asm__ __volatile__ ("csrr %0, %1"                     \
-+                              : "=r" (__v)                      \
-+                              : "i" (csr)                       \
-+                              : "memory");                      \
-+        __v;                                                    \
-+})
++  if (UseSHA1Intrinsics) {
++    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
++  }
 +
-+uint32_t VM_Version::get_current_vector_length() {
-+  assert(_features & CPU_V, "should not call this");
-+  return (uint32_t)read_csr(CSR_VLENB);
-+}
++  if (UseSHA256Intrinsics) {
++    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
++  }
 +
-+void VM_Version::get_os_cpu_info() {
++  if (UseSHA512Intrinsics) {
++    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++  }
 +
-+  uint64_t auxv = getauxval(AT_HWCAP);
++  if (UseSHA3Intrinsics) {
++    warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
++  }
 +
-+  STATIC_ASSERT(CPU_I == HWCAP_ISA_I);
-+  STATIC_ASSERT(CPU_M == HWCAP_ISA_M);
-+  STATIC_ASSERT(CPU_A == HWCAP_ISA_A);
-+  STATIC_ASSERT(CPU_F == HWCAP_ISA_F);
-+  STATIC_ASSERT(CPU_D == HWCAP_ISA_D);
-+  STATIC_ASSERT(CPU_C == HWCAP_ISA_C);
-+  STATIC_ASSERT(CPU_V == HWCAP_ISA_V);
++  if (UseCRC32Intrinsics) {
++    warning("CRC32 intrinsics are not available on this CPU.");
++    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
++  }
 +
-+  if (FILE *f = fopen("/proc/cpuinfo", "r")) {
-+    char buf[512], *p;
-+    while (fgets(buf, sizeof (buf), f) != NULL) {
-+      if ((p = strchr(buf, ':')) != NULL) {
-+        if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) {
-+          char* uarch = os::strdup(p + 2);
-+          uarch[strcspn(uarch, "\n")] = '\0';
-+          _uarch = uarch;
-+          break;
-+        }
-+      }
++  if (UseCRC32CIntrinsics) {
++    warning("CRC32C intrinsics are not available on this CPU.");
++    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
++  }
++
++  if (UseMD5Intrinsics) {
++    warning("MD5 intrinsics are not available on this CPU.");
++    FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
++  }
++
++  if (UseRVV) {
++    if (!(_features & CPU_V)) {
++      warning("RVV is not supported on this CPU");
++      FLAG_SET_DEFAULT(UseRVV, false);
++    } else {
++      // read vector length from vector CSR vlenb
++      _initial_vector_length = get_current_vector_length();
 +    }
-+    fclose(f);
 +  }
 +
-+  // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs.
-+  // Availability for those extensions could not be queried from HWCAP.
-+  // TODO: Add proper detection for those extensions.
-+  _features = auxv & (
-+      HWCAP_ISA_I |
-+      HWCAP_ISA_M |
-+      HWCAP_ISA_A |
-+      HWCAP_ISA_F |
-+      HWCAP_ISA_D |
-+      HWCAP_ISA_C |
-+      HWCAP_ISA_V);
-+}
-diff --git a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp
-index ba61aa4c0..4ca0b050b 100644
---- a/src/hotspot/share/adlc/archDesc.cpp
-+++ b/src/hotspot/share/adlc/archDesc.cpp
-@@ -929,6 +929,7 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
-   // Match Vector types.
-   if (strncmp(idealOp, "Vec",3)==0) {
-     switch(last_char) {
-+    case 'A':  return "TypeVect::VECTA";
-     case 'S':  return "TypeVect::VECTS";
-     case 'D':  return "TypeVect::VECTD";
-     case 'X':  return "TypeVect::VECTX";
-@@ -939,6 +940,10 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
-     }
-   }
- 
-+  if (strncmp(idealOp, "RegVMask", 8) == 0) {
-+    return "Type::BOTTOM";
-+  }
-+
-   // !!!!!
-   switch(last_char) {
-   case 'I':    return "TypeInt::INT";
-diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
-index f810fde76..2cf9636d1 100644
---- a/src/hotspot/share/adlc/formssel.cpp
-+++ b/src/hotspot/share/adlc/formssel.cpp
-@@ -3968,6 +3968,8 @@ bool MatchRule::is_base_register(FormDict &globals) const {
-          strcmp(opType,"RegL")==0 ||
-          strcmp(opType,"RegF")==0 ||
-          strcmp(opType,"RegD")==0 ||
-+         strcmp(opType,"RegVMask")==0 ||
-+         strcmp(opType,"VecA")==0 ||
-          strcmp(opType,"VecS")==0 ||
-          strcmp(opType,"VecD")==0 ||
-          strcmp(opType,"VecX")==0 ||
-diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
-index e30d39f73..af54dddf3 100644
---- a/src/hotspot/share/c1/c1_LIR.cpp
-+++ b/src/hotspot/share/c1/c1_LIR.cpp
-@@ -199,7 +199,6 @@ bool LIR_OprDesc::is_oop() const {
- void LIR_Op2::verify() const {
- #ifdef ASSERT
-   switch (code()) {
--    case lir_cmove:
-     case lir_xchg:
-       break;
- 
-@@ -252,30 +251,27 @@ void LIR_Op2::verify() const {
- 
- 
- LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block)
--  : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
--  , _cond(cond)
--  , _type(type)
-+  : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
-   , _label(block->label())
-+  , _type(type)
-   , _block(block)
-   , _ublock(NULL)
-   , _stub(NULL) {
- }
- 
- LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
--  LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
--  , _cond(cond)
--  , _type(type)
-+  LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
-   , _label(stub->entry())
-+  , _type(type)
-   , _block(NULL)
-   , _ublock(NULL)
-   , _stub(stub) {
- }
- 
- LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock)
--  : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
--  , _cond(cond)
--  , _type(type)
-+  : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
-   , _label(block->label())
-+  , _type(type)
-   , _block(block)
-   , _ublock(ublock)
-   , _stub(NULL)
-@@ -296,13 +292,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) {
- }
- 
- void LIR_OpBranch::negate_cond() {
--  switch (_cond) {
--    case lir_cond_equal:        _cond = lir_cond_notEqual;     break;
--    case lir_cond_notEqual:     _cond = lir_cond_equal;        break;
--    case lir_cond_less:         _cond = lir_cond_greaterEqual; break;
--    case lir_cond_lessEqual:    _cond = lir_cond_greater;      break;
--    case lir_cond_greaterEqual: _cond = lir_cond_less;         break;
--    case lir_cond_greater:      _cond = lir_cond_lessEqual;    break;
-+  switch (cond()) {
-+    case lir_cond_equal:        set_cond(lir_cond_notEqual);     break;
-+    case lir_cond_notEqual:     set_cond(lir_cond_equal);        break;
-+    case lir_cond_less:         set_cond(lir_cond_greaterEqual); break;
-+    case lir_cond_lessEqual:    set_cond(lir_cond_greater);      break;
-+    case lir_cond_greaterEqual: set_cond(lir_cond_less);         break;
-+    case lir_cond_greater:      set_cond(lir_cond_lessEqual);    break;
-     default: ShouldNotReachHere();
-   }
- }
-@@ -525,6 +521,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
-       assert(op->as_OpBranch() != NULL, "must be");
-       LIR_OpBranch* opBranch = (LIR_OpBranch*)op;
- 
-+      assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() &&
-+             opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() &&
-+             opBranch->_tmp5->is_illegal(), "not used");
++  if (UseRVB && !(_features & CPU_B)) {
++    warning("RVB is not supported on this CPU");
++    FLAG_SET_DEFAULT(UseRVB, false);
++  }
 +
-+      if (opBranch->_opr1->is_valid()) do_input(opBranch->_opr1);
-+      if (opBranch->_opr2->is_valid()) do_input(opBranch->_opr2);
++  if (UseRVC && !(_features & CPU_C)) {
++    warning("RVC is not supported on this CPU");
++    FLAG_SET_DEFAULT(UseRVC, false);
++  }
 +
-       if (opBranch->_info != NULL)     do_info(opBranch->_info);
-       assert(opBranch->_result->is_illegal(), "not used");
-       if (opBranch->_stub != NULL)     opBranch->stub()->visit(this);
-@@ -615,17 +618,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
-     // to the result operand, otherwise the backend fails
-     case lir_cmove:
-     {
--      assert(op->as_Op2() != NULL, "must be");
--      LIR_Op2* op2 = (LIR_Op2*)op;
-+      assert(op->as_Op4() != NULL, "must be");
-+      LIR_Op4* op4 = (LIR_Op4*)op;
- 
--      assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() &&
--             op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used");
--      assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used");
-+      assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() &&
-+             op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "must be");
-+      assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used");
- 
--      do_input(op2->_opr1);
--      do_input(op2->_opr2);
--      do_temp(op2->_opr2);
--      do_output(op2->_result);
-+      do_input(op4->_opr1);
-+      do_input(op4->_opr2);
-+      if (op4->_opr3->is_valid()) do_input(op4->_opr3);
-+      if (op4->_opr4->is_valid()) do_input(op4->_opr4);
-+      do_temp(op4->_opr2);
-+      do_output(op4->_result);
- 
-       break;
-     }
-@@ -1048,6 +1053,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) {
-   masm->emit_op3(this);
- }
- 
-+void LIR_Op4::emit_code(LIR_Assembler* masm) {
-+  masm->emit_op4(this);
-+}
++  if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) {
++    FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true);
++  }
 +
- void LIR_OpLock::emit_code(LIR_Assembler* masm) {
-   masm->emit_lock(this);
-   if (stub()) {
-@@ -1084,6 +1093,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block)
-   , _file(NULL)
-   , _line(0)
- #endif
-+#ifdef RISCV
-+  , _cmp_opr1(LIR_OprFact::illegalOpr)
-+  , _cmp_opr2(LIR_OprFact::illegalOpr)
-+#endif
- { }
- 
- 
-@@ -1101,6 +1114,38 @@ void LIR_List::set_file_and_line(const char * file, int line) {
- }
- #endif
- 
-+#ifdef RISCV
-+void LIR_List::set_cmp_oprs(LIR_Op* op) {
-+  switch (op->code()) {
-+    case lir_cmp:
-+      _cmp_opr1 = op->as_Op2()->in_opr1();
-+      _cmp_opr2 = op->as_Op2()->in_opr2();
-+      break;
-+    case lir_branch: // fall through
-+    case lir_cond_float_branch:
-+      assert(op->as_OpBranch()->cond() == lir_cond_always ||
-+            (_cmp_opr1 != LIR_OprFact::illegalOpr && _cmp_opr2 != LIR_OprFact::illegalOpr),
-+            "conditional branches must have legal operands");
-+      if (op->as_OpBranch()->cond() != lir_cond_always) {
-+        op->as_Op2()->set_in_opr1(_cmp_opr1);
-+        op->as_Op2()->set_in_opr2(_cmp_opr2);
-+      }
-+      break;
-+    case lir_cmove:
-+      op->as_Op4()->set_in_opr3(_cmp_opr1);
-+      op->as_Op4()->set_in_opr4(_cmp_opr2);
-+      break;
-+#if INCLUDE_ZGC
-+    case lir_zloadbarrier_test:
-+      _cmp_opr1 = FrameMap::as_opr(t1);
-+      _cmp_opr2 = LIR_OprFact::intConst(0);
-+      break;
-+#endif
-+    default:
-+      break;
++  if (UseRVB) {
++    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
++      FLAG_SET_DEFAULT(UsePopCountInstruction, true);
++    }
++  } else {
++    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
 +  }
-+}
-+#endif
- 
- void LIR_List::append(LIR_InsertionBuffer* buffer) {
-   assert(this == buffer->lir_list(), "wrong lir list");
-@@ -1680,7 +1725,6 @@ const char * LIR_Op::name() const {
-      case lir_cmp_l2i:               s = "cmp_l2i";       break;
-      case lir_ucmp_fd2i:             s = "ucomp_fd2i";    break;
-      case lir_cmp_fd2i:              s = "comp_fd2i";     break;
--     case lir_cmove:                 s = "cmove";         break;
-      case lir_add:                   s = "add";           break;
-      case lir_sub:                   s = "sub";           break;
-      case lir_mul:                   s = "mul";           break;
-@@ -1705,6 +1749,8 @@ const char * LIR_Op::name() const {
-      case lir_irem:                  s = "irem";          break;
-      case lir_fmad:                  s = "fmad";          break;
-      case lir_fmaf:                  s = "fmaf";          break;
-+     // LIR_Op4
-+     case lir_cmove:                 s = "cmove";         break;
-      // LIR_OpJavaCall
-      case lir_static_call:           s = "static";        break;
-      case lir_optvirtual_call:       s = "optvirtual";    break;
-@@ -1841,6 +1887,8 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) {
- // LIR_OpBranch
- void LIR_OpBranch::print_instr(outputStream* out) const {
-   print_condition(out, cond());             out->print(" ");
-+  in_opr1()->print(out); out->print(" ");
-+  in_opr2()->print(out); out->print(" ");
-   if (block() != NULL) {
-     out->print("[B%d] ", block()->block_id());
-   } else if (stub() != NULL) {
-@@ -1927,7 +1975,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const {
- 
- // LIR_Op2
- void LIR_Op2::print_instr(outputStream* out) const {
--  if (code() == lir_cmove || code() == lir_cmp) {
-+  if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) {
-     print_condition(out, condition());         out->print(" ");
-   }
-   in_opr1()->print(out);    out->print(" ");
-@@ -1978,6 +2026,15 @@ void LIR_Op3::print_instr(outputStream* out) const {
-   result_opr()->print(out);
- }
- 
-+// LIR_Op4
-+void LIR_Op4::print_instr(outputStream* out) const {
-+  print_condition(out, condition()); out->print(" ");
-+  in_opr1()->print(out);             out->print(" ");
-+  in_opr2()->print(out);             out->print(" ");
-+  in_opr3()->print(out);             out->print(" ");
-+  in_opr4()->print(out);             out->print(" ");
-+  result_opr()->print(out);
-+}
- 
- void LIR_OpLock::print_instr(outputStream* out) const {
-   hdr_opr()->print(out);   out->print(" ");
-diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
-index 3234ca018..88cd3b24e 100644
---- a/src/hotspot/share/c1/c1_LIR.hpp
-+++ b/src/hotspot/share/c1/c1_LIR.hpp
-@@ -864,9 +864,11 @@ class      LIR_OpConvert;
- class      LIR_OpAllocObj;
- class      LIR_OpRoundFP;
- class    LIR_Op2;
--class    LIR_OpDelay;
-+class      LIR_OpBranch;
-+class      LIR_OpDelay;
- class    LIR_Op3;
- class      LIR_OpAllocArray;
-+class    LIR_Op4;
- class    LIR_OpCall;
- class      LIR_OpJavaCall;
- class      LIR_OpRTCall;
-@@ -916,8 +918,6 @@ enum LIR_Code {
-       , lir_null_check
-       , lir_return
-       , lir_leal
--      , lir_branch
--      , lir_cond_float_branch
-       , lir_move
-       , lir_convert
-       , lir_alloc_object
-@@ -929,11 +929,12 @@ enum LIR_Code {
-       , lir_unwind
-   , end_op1
-   , begin_op2
-+      , lir_branch
-+      , lir_cond_float_branch
-       , lir_cmp
-       , lir_cmp_l2i
-       , lir_ucmp_fd2i
-       , lir_cmp_fd2i
--      , lir_cmove
-       , lir_add
-       , lir_sub
-       , lir_mul
-@@ -964,6 +965,9 @@ enum LIR_Code {
-       , lir_fmad
-       , lir_fmaf
-   , end_op3
-+  , begin_op4
-+      , lir_cmove
-+  , end_op4
-   , begin_opJavaCall
-       , lir_static_call
-       , lir_optvirtual_call
-@@ -1134,6 +1138,7 @@ class LIR_Op: public CompilationResourceObj {
-   virtual LIR_Op1* as_Op1() { return NULL; }
-   virtual LIR_Op2* as_Op2() { return NULL; }
-   virtual LIR_Op3* as_Op3() { return NULL; }
-+  virtual LIR_Op4* as_Op4() { return NULL; }
-   virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
-   virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
-   virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
-@@ -1410,51 +1415,6 @@ class LIR_OpRTCall: public LIR_OpCall {
-   virtual void verify() const;
- };
- 
--
--class LIR_OpBranch: public LIR_Op {
-- friend class LIR_OpVisitState;
--
-- private:
--  LIR_Condition _cond;
--  BasicType     _type;
--  Label*        _label;
--  BlockBegin*   _block;  // if this is a branch to a block, this is the block
--  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
--  CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
--
-- public:
--  LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
--    : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
--    , _cond(cond)
--    , _type(type)
--    , _label(lbl)
--    , _block(NULL)
--    , _ublock(NULL)
--    , _stub(NULL) { }
--
--  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block);
--  LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub);
--
--  // for unordered comparisons
--  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock);
--
--  LIR_Condition cond()        const              { return _cond;        }
--  BasicType     type()        const              { return _type;        }
--  Label*        label()       const              { return _label;       }
--  BlockBegin*   block()       const              { return _block;       }
--  BlockBegin*   ublock()      const              { return _ublock;      }
--  CodeStub*     stub()        const              { return _stub;       }
--
--  void          change_block(BlockBegin* b);
--  void          change_ublock(BlockBegin* b);
--  void          negate_cond();
--
--  virtual void emit_code(LIR_Assembler* masm);
--  virtual LIR_OpBranch* as_OpBranch() { return this; }
--  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
--};
--
--
- class ConversionStub;
- 
- class LIR_OpConvert: public LIR_Op1 {
-@@ -1614,19 +1574,19 @@ class LIR_Op2: public LIR_Op {
-   void verify() const;
- 
-  public:
--  LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL)
-+  LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL, BasicType type = T_ILLEGAL)
-     : LIR_Op(code, LIR_OprFact::illegalOpr, info)
-     , _opr1(opr1)
-     , _opr2(opr2)
--    , _type(T_ILLEGAL)
--    , _condition(condition)
-+    , _type(type)
-     , _fpu_stack_size(0)
-     , _tmp1(LIR_OprFact::illegalOpr)
-     , _tmp2(LIR_OprFact::illegalOpr)
-     , _tmp3(LIR_OprFact::illegalOpr)
-     , _tmp4(LIR_OprFact::illegalOpr)
--    , _tmp5(LIR_OprFact::illegalOpr) {
--    assert(code == lir_cmp || code == lir_assert, "code check");
-+    , _tmp5(LIR_OprFact::illegalOpr)
-+    , _condition(condition) {
-+    assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check");
-   }
- 
-   LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type)
-@@ -1634,7 +1594,6 @@ class LIR_Op2: public LIR_Op {
-     , _opr1(opr1)
-     , _opr2(opr2)
-     , _type(type)
--    , _condition(condition)
-     , _fpu_stack_size(0)
-     , _tmp1(LIR_OprFact::illegalOpr)
-     , _tmp2(LIR_OprFact::illegalOpr)
-@@ -1651,14 +1610,14 @@ class LIR_Op2: public LIR_Op {
-     , _opr1(opr1)
-     , _opr2(opr2)
-     , _type(type)
--    , _condition(lir_cond_unknown)
-     , _fpu_stack_size(0)
-     , _tmp1(LIR_OprFact::illegalOpr)
-     , _tmp2(LIR_OprFact::illegalOpr)
-     , _tmp3(LIR_OprFact::illegalOpr)
-     , _tmp4(LIR_OprFact::illegalOpr)
--    , _tmp5(LIR_OprFact::illegalOpr) {
--    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
-+    , _tmp5(LIR_OprFact::illegalOpr)
-+    , _condition(lir_cond_unknown) {
-+    assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check");
-   }
- 
-   LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
-@@ -1667,14 +1626,14 @@ class LIR_Op2: public LIR_Op {
-     , _opr1(opr1)
-     , _opr2(opr2)
-     , _type(T_ILLEGAL)
--    , _condition(lir_cond_unknown)
-     , _fpu_stack_size(0)
-     , _tmp1(tmp1)
-     , _tmp2(tmp2)
-     , _tmp3(tmp3)
-     , _tmp4(tmp4)
--    , _tmp5(tmp5) {
--    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
-+    , _tmp5(tmp5)
-+    , _condition(lir_cond_unknown) {
-+    assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check");
-   }
- 
-   LIR_Opr in_opr1() const                        { return _opr1; }
-@@ -1686,10 +1645,10 @@ class LIR_Op2: public LIR_Op {
-   LIR_Opr tmp4_opr() const                       { return _tmp4; }
-   LIR_Opr tmp5_opr() const                       { return _tmp5; }
-   LIR_Condition condition() const  {
--    assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
-+    assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition;
-   }
-   void set_condition(LIR_Condition condition) {
--    assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove");  _condition = condition;
-+    assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition;
-   }
- 
-   void set_fpu_stack_size(int size)              { _fpu_stack_size = size; }
-@@ -1703,6 +1662,53 @@ class LIR_Op2: public LIR_Op {
-   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
- };
- 
-+class LIR_OpBranch: public LIR_Op2 {
-+ friend class LIR_OpVisitState;
 +
-+ private:
-+  BasicType     _type;
-+  Label*        _label;
-+  BlockBegin*   _block;  // if this is a branch to a block, this is the block
-+  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
-+  CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
++  char buf[512];
++  buf[0] = '\0';
++  if (_uarch != NULL && strcmp(_uarch, "") != 0) snprintf(buf, sizeof(buf), "%s,", _uarch);
++  strcat(buf, "rv64");
++#define ADD_FEATURE_IF_SUPPORTED(id, name, bit) if (_features & CPU_##id) strcat(buf, name);
++  CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED)
++#undef ADD_FEATURE_IF_SUPPORTED
 +
-+ public:
-+  LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
-+    : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
-+    , _label(lbl)
-+    , _type(type)
-+    , _block(NULL)
-+    , _ublock(NULL)
-+    , _stub(NULL) { }
++  _features_string = os::strdup(buf);
 +
-+  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block);
-+  LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub);
++#ifdef COMPILER2
++  c2_initialize();
++#endif // COMPILER2
++}
 +
-+  // for unordered comparisons
-+  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock);
++#ifdef COMPILER2
++void VM_Version::c2_initialize() {
++  if (UseCMoveUnconditionally) {
++    FLAG_SET_DEFAULT(UseCMoveUnconditionally, false);
++  }
 +
-+  LIR_Condition cond() const {
-+    return condition();
++  if (ConditionalMoveLimit > 0) {
++    FLAG_SET_DEFAULT(ConditionalMoveLimit, 0);
 +  }
 +
-+  void set_cond(LIR_Condition cond) {
-+    set_condition(cond);
++  if (!UseRVV) {
++    FLAG_SET_DEFAULT(SpecialEncodeISOArray, false);
 +  }
 +
-+  Label*        label()       const              { return _label;       }
-+  BlockBegin*   block()       const              { return _block;       }
-+  BlockBegin*   ublock()      const              { return _ublock;      }
-+  CodeStub*     stub()        const              { return _stub;        }
++  if (!UseRVV && MaxVectorSize) {
++    FLAG_SET_DEFAULT(MaxVectorSize, 0);
++  }
 +
-+  void          change_block(BlockBegin* b);
-+  void          change_ublock(BlockBegin* b);
-+  void          negate_cond();
++  if (!UseRVV) {
++    FLAG_SET_DEFAULT(UseRVVForBigIntegerShiftIntrinsics, false);
++  }
 +
-+  virtual void emit_code(LIR_Assembler* masm);
-+  virtual LIR_OpBranch* as_OpBranch() { return this; }
-+  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
-+};
++  if (UseRVV) {
++    if (FLAG_IS_DEFAULT(MaxVectorSize)) {
++      MaxVectorSize = _initial_vector_length;
++    } else if (MaxVectorSize < 16) {
++      warning("RVV does not support vector length less than 16 bytes. Disabling RVV.");
++      UseRVV = false;
++    } else if (is_power_of_2(MaxVectorSize)) {
++      if (MaxVectorSize > _initial_vector_length) {
++        warning("Current system only supports max RVV vector length %d. Set MaxVectorSize to %d",
++                _initial_vector_length, _initial_vector_length);
++      }
++      MaxVectorSize = _initial_vector_length;
++    } else {
++      vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
++    }
++  }
 +
- class LIR_OpAllocArray : public LIR_Op {
-  friend class LIR_OpVisitState;
- 
-@@ -1766,6 +1772,63 @@ class LIR_Op3: public LIR_Op {
-   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
- };
- 
-+class LIR_Op4: public LIR_Op {
-+  friend class LIR_OpVisitState;
-+ protected:
-+  LIR_Opr   _opr1;
-+  LIR_Opr   _opr2;
-+  LIR_Opr   _opr3;
-+  LIR_Opr   _opr4;
-+  BasicType _type;
-+  LIR_Opr   _tmp1;
-+  LIR_Opr   _tmp2;
-+  LIR_Opr   _tmp3;
-+  LIR_Opr   _tmp4;
-+  LIR_Opr   _tmp5;
-+  LIR_Condition _condition;
++  // disable prefetch
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
++  }
 +
-+ public:
-+  LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4,
-+          LIR_Opr result, BasicType type)
-+    : LIR_Op(code, result, NULL)
-+    , _opr1(opr1)
-+    , _opr2(opr2)
-+    , _opr3(opr3)
-+    , _opr4(opr4)
-+    , _type(type)
-+    , _condition(condition)
-+    , _tmp1(LIR_OprFact::illegalOpr)
-+    , _tmp2(LIR_OprFact::illegalOpr)
-+    , _tmp3(LIR_OprFact::illegalOpr)
-+    , _tmp4(LIR_OprFact::illegalOpr)
-+    , _tmp5(LIR_OprFact::illegalOpr) {
-+    assert(code == lir_cmove, "code check");
-+    assert(type != T_ILLEGAL, "cmove should have type");
++  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
++    FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
 +  }
 +
-+  LIR_Opr in_opr1() const                        { return _opr1; }
-+  LIR_Opr in_opr2() const                        { return _opr2; }
-+  LIR_Opr in_opr3() const                        { return _opr3; }
-+  LIR_Opr in_opr4() const                        { return _opr4; }
-+  BasicType type()  const                        { return _type; }
-+  LIR_Opr tmp1_opr() const                       { return _tmp1; }
-+  LIR_Opr tmp2_opr() const                       { return _tmp2; }
-+  LIR_Opr tmp3_opr() const                       { return _tmp3; }
-+  LIR_Opr tmp4_opr() const                       { return _tmp4; }
-+  LIR_Opr tmp5_opr() const                       { return _tmp5; }
++  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
++    FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
++  }
 +
-+  LIR_Condition condition() const                { return _condition; }
-+  void set_condition(LIR_Condition condition)    { _condition = condition; }
++  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
++    FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true);
++  }
 +
-+  void set_in_opr1(LIR_Opr opr)                  { _opr1 = opr; }
-+  void set_in_opr2(LIR_Opr opr)                  { _opr2 = opr; }
-+  void set_in_opr3(LIR_Opr opr)                  { _opr3 = opr; }
-+  void set_in_opr4(LIR_Opr opr)                  { _opr4 = opr; }
-+  virtual void emit_code(LIR_Assembler* masm);
-+  virtual LIR_Op4* as_Op4() { return this; }
++  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
++    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
++  }
 +
-+  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
-+};
- 
- //--------------------------------
- class LabelObj: public CompilationResourceObj {
-@@ -1988,6 +2051,10 @@ class LIR_List: public CompilationResourceObj {
-   const char *  _file;
-   int           _line;
- #endif
-+#ifdef RISCV
-+  LIR_Opr       _cmp_opr1;
-+  LIR_Opr       _cmp_opr2;
-+#endif
- 
-  public:
-   void append(LIR_Op* op) {
-@@ -2000,6 +2067,12 @@ class LIR_List: public CompilationResourceObj {
-     }
- #endif // PRODUCT
- 
-+#ifdef RISCV
-+    set_cmp_oprs(op);
-+    // lir_cmp set cmp oprs only on riscv
-+    if (op->code() == lir_cmp) return;
-+#endif
-+
-     _operations.append(op);
- 
- #ifdef ASSERT
-@@ -2016,6 +2089,10 @@ class LIR_List: public CompilationResourceObj {
-   void set_file_and_line(const char * file, int line);
- #endif
- 
-+#ifdef RISCV
-+  void set_cmp_oprs(LIR_Op* op);
-+#endif
-+
-   //---------- accessors ---------------
-   LIR_OpList* instructions_list()                { return &_operations; }
-   int         length() const                     { return _operations.length(); }
-@@ -2149,8 +2226,9 @@ class LIR_List: public CompilationResourceObj {
-   void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
-   void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info);
- 
--  void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
--    append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type));
-+  void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type,
-+             LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) {
-+    append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type));
-   }
- 
-   void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
-diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp
-index 160483d5f..42a0350f7 100644
---- a/src/hotspot/share/c1/c1_LIRAssembler.cpp
-+++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp
-@@ -709,10 +709,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
-       comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
-       break;
- 
--    case lir_cmove:
--      cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type());
--      break;
--
-     case lir_shl:
-     case lir_shr:
-     case lir_ushr:
-@@ -776,6 +772,17 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
-   }
- }
- 
-+void LIR_Assembler::emit_op4(LIR_Op4* op) {
-+  switch(op->code()) {
-+    case lir_cmove:
-+      cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type(), op->in_opr3(), op->in_opr4());
-+      break;
-+
-+    default:
-+      Unimplemented();
-+      break;
-+  }
-+}
- 
- void LIR_Assembler::build_frame() {
-   _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
-diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp
-index 44a5bcbe5..406a58d21 100644
---- a/src/hotspot/share/c1/c1_LIRAssembler.hpp
-+++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp
-@@ -190,6 +190,7 @@ class LIR_Assembler: public CompilationResourceObj {
-   void emit_op1(LIR_Op1* op);
-   void emit_op2(LIR_Op2* op);
-   void emit_op3(LIR_Op3* op);
-+  void emit_op4(LIR_Op4* op);
-   void emit_opBranch(LIR_OpBranch* op);
-   void emit_opLabel(LIR_OpLabel* op);
-   void emit_arraycopy(LIR_OpArrayCopy* op);
-@@ -222,7 +223,8 @@ class LIR_Assembler: public CompilationResourceObj {
-   void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);
-   void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);  // info set for null exceptions
-   void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op);
--  void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type);
-+  void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type,
-+             LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr);
- 
-   void call(        LIR_OpJavaCall* op, relocInfo::relocType rtype);
-   void ic_call(     LIR_OpJavaCall* op);
-diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
-index c28055fd9..d00bfe91a 100644
---- a/src/hotspot/share/c1/c1_LinearScan.cpp
-+++ b/src/hotspot/share/c1/c1_LinearScan.cpp
-@@ -1242,8 +1242,8 @@ void LinearScan::add_register_hints(LIR_Op* op) {
-       break;
-     }
-     case lir_cmove: {
--      assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2");
--      LIR_Op2* cmove = (LIR_Op2*)op;
-+      assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4");
-+      LIR_Op4* cmove = (LIR_Op4*)op;
- 
-       LIR_Opr move_from = cmove->in_opr1();
-       LIR_Opr move_to = cmove->result_opr();
-@@ -3140,6 +3140,9 @@ void LinearScan::do_linear_scan() {
-     }
-   }
- 
-+#ifndef RISCV
-+  // Disable these optimizations on riscv temporarily, because it does not
-+  // work when the comparison operands are bound to branches or cmoves.
-   { TIME_LINEAR_SCAN(timer_optimize_lir);
- 
-     EdgeMoveOptimizer::optimize(ir()->code());
-@@ -3147,6 +3150,7 @@ void LinearScan::do_linear_scan() {
-     // check that cfg is still correct after optimizations
-     ir()->verify();
-   }
-+#endif
- 
-   NOT_PRODUCT(print_lir(1, "Before Code Generation", false));
-   NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final));
-@@ -6284,14 +6288,14 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
-               // There might be a cmove inserted for profiling which depends on the same
-               // compare. If we change the condition of the respective compare, we have
-               // to take care of this cmove as well.
--              LIR_Op2* prev_cmove = NULL;
-+              LIR_Op4* prev_cmove = NULL;
- 
-               for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) {
-                 prev_op = instructions->at(j);
-                 // check for the cmove
-                 if (prev_op->code() == lir_cmove) {
--                  assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2");
--                  prev_cmove = (LIR_Op2*)prev_op;
-+                  assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4");
-+                  prev_cmove = (LIR_Op4*)prev_op;
-                   assert(prev_branch->cond() == prev_cmove->condition(), "should be the same");
-                 }
-                 if (prev_op->code() == lir_cmp) {
-diff --git a/src/hotspot/share/classfile/vmSymbols.cpp b/src/hotspot/share/classfile/vmSymbols.cpp
-index 19fe196bc..d9cb8e999 100644
---- a/src/hotspot/share/classfile/vmSymbols.cpp
-+++ b/src/hotspot/share/classfile/vmSymbols.cpp
-@@ -523,6 +523,7 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) {
-     case vmIntrinsics::_indexOfIU:
-     case vmIntrinsics::_indexOfIUL:
-     case vmIntrinsics::_indexOfU_char:
-+    case vmIntrinsics::_indexOfL_char:
-     case vmIntrinsics::_compareToL:
-     case vmIntrinsics::_compareToU:
-     case vmIntrinsics::_compareToLU:
-@@ -808,6 +809,7 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) {
-   case vmIntrinsics::_indexOfIU:
-   case vmIntrinsics::_indexOfIUL:
-   case vmIntrinsics::_indexOfU_char:
-+  case vmIntrinsics::_indexOfL_char:
-     if (!SpecialStringIndexOf) return true;
-     break;
-   case vmIntrinsics::_equalsL:
-diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp
-index cef3f530c..a31525003 100644
---- a/src/hotspot/share/classfile/vmSymbols.hpp
-+++ b/src/hotspot/share/classfile/vmSymbols.hpp
-@@ -946,6 +946,7 @@
-   do_intrinsic(_indexOfIU,                java_lang_StringUTF16, indexOf_name, indexOfI_signature,               F_S)   \
-   do_intrinsic(_indexOfIUL,               java_lang_StringUTF16, indexOfUL_name, indexOfI_signature,             F_S)   \
-   do_intrinsic(_indexOfU_char,            java_lang_StringUTF16, indexOfChar_name, indexOfChar_signature,        F_S)   \
-+  do_intrinsic(_indexOfL_char,            java_lang_StringLatin1,indexOfChar_name, indexOfChar_signature,        F_S)   \
-    do_name(     indexOf_name,                                    "indexOf")                                             \
-    do_name(     indexOfChar_name,                                "indexOfChar")                                         \
-    do_name(     indexOfUL_name,                                  "indexOfLatin1")                                       \
-diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
-index 4771a8b86..295f82ccc 100644
---- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
-+++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
-@@ -31,7 +31,7 @@
- #include "utilities/defaultStream.hpp"
- 
- void ShenandoahArguments::initialize() {
--#if !(defined AARCH64 || defined AMD64 || defined IA32)
-+#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined RISCV64)
-   vm_exit_during_initialization("Shenandoah GC is not supported on this platform.");
- #endif
- 
-diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
-index e01a242a5..ff16de0e7 100644
---- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
-+++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
-@@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) {
- inline bool JfrBigEndian::platform_supports_unaligned_reads(void) {
- #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390)
-   return true;
--#elif defined(SPARC) || defined(ARM) || defined(AARCH64)
-+#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(RISCV)
-   return false;
- #else
-   #warning "Unconfigured platform"
-diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp
-index 7768615b7..ef006f087 100644
---- a/src/hotspot/share/opto/c2compiler.cpp
-+++ b/src/hotspot/share/opto/c2compiler.cpp
-@@ -510,6 +510,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
-   case vmIntrinsics::_indexOfIU:
-   case vmIntrinsics::_indexOfIUL:
-   case vmIntrinsics::_indexOfU_char:
-+  case vmIntrinsics::_indexOfL_char:
-   case vmIntrinsics::_toBytesStringU:
-   case vmIntrinsics::_getCharsStringU:
-   case vmIntrinsics::_getCharStringU:
-diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp
-index 500054218..fafbde78d 100644
---- a/src/hotspot/share/opto/chaitin.cpp
-+++ b/src/hotspot/share/opto/chaitin.cpp
-@@ -77,6 +77,7 @@ void LRG::dump() const {
-   if( _is_oop ) tty->print("Oop ");
-   if( _is_float ) tty->print("Float ");
-   if( _is_vector ) tty->print("Vector ");
-+  if( _is_scalable ) tty->print("Scalable ");
-   if( _was_spilled1 ) tty->print("Spilled ");
-   if( _was_spilled2 ) tty->print("Spilled2 ");
-   if( _direct_conflict ) tty->print("Direct_conflict ");
-@@ -591,6 +592,7 @@ void PhaseChaitin::Register_Allocate() {
- 
-   // Merge multidefs if multiple defs representing the same value are used in a single block.
-   merge_multidefs();
-+  merge_debugdefs();
- 
- #ifdef ASSERT
-   // Veify the graph after RA.
-@@ -646,7 +648,15 @@ void PhaseChaitin::Register_Allocate() {
-           // Live ranges record the highest register in their mask.
-           // We want the low register for the AD file writer's convenience.
-           OptoReg::Name hi = lrg.reg(); // Get hi register
--          OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo
-+          int num_regs = lrg.num_regs();
-+          if (lrg.is_scalable() && OptoReg::is_stack(hi)) {
-+            // For scalable vector registers, when they are allocated in physical
-+            // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable
-+            // vector. If they are allocated on stack, we need to get the actual
-+            // num_regs, which reflects the physical length of scalable registers.
-+            num_regs = lrg.scalable_reg_slots();
-+          }
-+          OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo
-           // We have to use pair [lo,lo+1] even for wide vectors because
-           // the rest of code generation works only with pairs. It is safe
-           // since for registers encoding only 'lo' is used.
-@@ -801,8 +811,19 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
-         // Check for vector live range (only if vector register is used).
-         // On SPARC vector uses RegD which could be misaligned so it is not
-         // processes as vector in RA.
--        if (RegMask::is_vector(ireg))
-+        if (RegMask::is_vector(ireg)) {
-           lrg._is_vector = 1;
-+          if (ireg == Op_VecA) {
-+            assert(Matcher::supports_scalable_vector(), "scalable vector should be supported");
-+            lrg._is_scalable = 1;
-+            // For scalable vector, when it is allocated in physical register,
-+            // num_regs is RegMask::SlotsPerVecA for reg mask,
-+            // which may not be the actual physical register size.
-+            // If it is allocated in stack, we need to get the actual
-+            // physical length of scalable vector register.
-+            lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT));
-+          }
-+        }
-         assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,
-                "vector must be in vector registers");
- 
-@@ -912,6 +933,13 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
-           lrg.set_reg_pressure(1);
- #endif
-           break;
-+        case Op_VecA:
-+          assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
-+          assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity");
-+          assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned");
-+          lrg.set_num_regs(RegMask::SlotsPerVecA);
-+          lrg.set_reg_pressure(1);
-+          break;
-         case Op_VecS:
-           assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
-           assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
-@@ -1358,6 +1386,47 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
-   return false;
- }
- 
-+static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) {
-+  int num_regs = lrg.num_regs();
-+  OptoReg::Name assigned = mask.find_first_set(lrg, num_regs);
-+
-+  if (lrg.is_scalable()) {
-+    // a physical register is found
-+    if (chunk == 0 && OptoReg::is_reg(assigned)) {
-+      return assigned;
-+    }
-+
-+    // find available stack slots for scalable register
-+    if (lrg._is_vector) {
-+      num_regs = lrg.scalable_reg_slots();
-+      // if actual scalable vector register is exactly SlotsPerVecA * 32 bits
-+      if (num_regs == RegMask::SlotsPerVecA) {
-+        return assigned;
-+      }
-+
-+      // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it
-+      // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits
-+      // instead of SlotsPerVecA bits.
-+      assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg
-+      while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) {
-+        // Verify the found reg has scalable_reg_slots() bits set.
-+        if (mask.is_valid_reg(assigned, num_regs)) {
-+          return assigned;
-+        } else {
-+          // Remove more for each iteration
-+          mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg
-+          mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits
-+          assigned = mask.find_first_set(lrg, num_regs);
-+        }
-+      }
-+      return OptoReg::Bad; // will cause chunk change, and retry next chunk
-+    }
-+  }
-+
-+  return assigned;
-+}
-+
-+
- // Choose a color using the biasing heuristic
- OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
- 
-@@ -1391,7 +1460,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
-       RegMask tempmask = lrg.mask();
-       tempmask.AND(lrgs(copy_lrg).mask());
-       tempmask.clear_to_sets(lrg.num_regs());
--      OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
-+      OptoReg::Name reg = find_first_set(lrg, tempmask, chunk);
-       if (OptoReg::is_valid(reg))
-         return reg;
-     }
-@@ -1400,7 +1469,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
-   // If no bias info exists, just go with the register selection ordering
-   if (lrg._is_vector || lrg.num_regs() == 2) {
-     // Find an aligned set
--    return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
-+    return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk);
-   }
- 
-   // CNC - Fun hack.  Alternate 1st and 2nd selection.  Enables post-allocate
-@@ -1564,12 +1633,21 @@ uint PhaseChaitin::Select( ) {
-       int n_regs = lrg->num_regs();
-       assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
-       if (n_regs == 1 || !lrg->_fat_proj) {
--        assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
-+        if (Matcher::supports_scalable_vector()) {
-+          assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity");
-+        } else {
-+          assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
-+        }
-         lrg->Clear();           // Clear the mask
-         lrg->Insert(reg);       // Set regmask to match selected reg
-         // For vectors and pairs, also insert the low bit of the pair
--        for (int i = 1; i < n_regs; i++)
-+        // We always choose the high bit, then mask the low bits by register size
-+        if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack
-+          n_regs = lrg->scalable_reg_slots();
-+        }
-+        for (int i = 1; i < n_regs; i++) {
-           lrg->Insert(OptoReg::add(reg,-i));
-+        }
-         lrg->set_mask_size(n_regs);
-       } else {                  // Else fatproj
-         // mask must be equal to fatproj bits, by definition
-diff --git a/src/hotspot/share/opto/chaitin.hpp b/src/hotspot/share/opto/chaitin.hpp
-index e5be5b966..b5d1b0604 100644
---- a/src/hotspot/share/opto/chaitin.hpp
-+++ b/src/hotspot/share/opto/chaitin.hpp
-@@ -115,9 +115,11 @@ public:
-     _msize_valid=1;
-     if (_is_vector) {
-       assert(!_fat_proj, "sanity");
--      _mask.verify_sets(_num_regs);
-+      if (!(_is_scalable && OptoReg::is_stack(_reg))) {
-+        assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets");
-+      }
-     } else if (_num_regs == 2 && !_fat_proj) {
--      _mask.verify_pairs();
-+      assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs");
-     }
- #endif
-   }
-@@ -143,10 +145,34 @@ public:
- private:
-   uint16_t _num_regs;           // 2 for Longs and Doubles, 1 for all else
-                                 // except _num_regs is kill count for fat_proj
-+
-+  // For scalable register, num_regs may not be the actual physical register size.
-+  // We need to get the actual physical length of scalable register when scalable
-+  // register is spilled. The size of one slot is 32-bit.
-+  uint _scalable_reg_slots;     // Actual scalable register length of slots. 
-+                                // Meaningful only when _is_scalable is true.  
- public:
-   int num_regs() const { return _num_regs; }
-   void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; }
- 
-+  uint scalable_reg_slots() { return _scalable_reg_slots; }
-+  void set_scalable_reg_slots(uint slots) {
-+    assert(_is_scalable, "scalable register");
-+    assert(slots > 0, "slots of scalable register is not valid");
-+    _scalable_reg_slots = slots;
-+  }
-+
-+  bool is_scalable() {
-+#ifdef ASSERT
-+    if (_is_scalable) {
-+      // Should only be a vector for now, but it could also be a RegVMask in future.
-+      assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg");
-+    }
-+#endif
-+    return _is_scalable;
-+  }
-+
-+
- private:
-   // Number of physical registers this live range uses when it colors
-   // Architecture and register-set dependent
-@@ -172,6 +198,7 @@ public:
-   uint   _is_oop:1,             // Live-range holds an oop
-          _is_float:1,           // True if in float registers
-          _is_vector:1,          // True if in vector registers
-+         _is_scalable:1,        // True if register size is scalable
-          _was_spilled1:1,       // True if prior spilling on def
-          _was_spilled2:1,       // True if twice prior spilling on def
-          _is_bound:1,           // live range starts life with no
-@@ -756,6 +783,7 @@ private:
- 
-   // Merge nodes that are a part of a multidef lrg and produce the same value within a block.
-   void merge_multidefs();
-+  void merge_debugdefs();
- 
- private:
- 
-diff --git a/src/hotspot/share/opto/intrinsicnode.hpp b/src/hotspot/share/opto/intrinsicnode.hpp
-index c0dfe1b0c..2d9526a39 100644
---- a/src/hotspot/share/opto/intrinsicnode.hpp
-+++ b/src/hotspot/share/opto/intrinsicnode.hpp
-@@ -47,10 +47,11 @@ class PartialSubtypeCheckNode : public Node {
- // Base class for Ideal nodes used in String intrinsic code.
- class StrIntrinsicNode: public Node {
-  public:
--  // Possible encodings of the two parameters passed to the string intrinsic.
-+  // Possible encodings of the parameters passed to the string intrinsic.
-   // 'L' stands for Latin1 and 'U' stands for UTF16. For example, 'LU' means that
-   // the first string is Latin1 encoded and the second string is UTF16 encoded.
--  typedef enum ArgEncoding { LL, LU, UL, UU, none } ArgEnc;
-+  // 'L' means that the single string is Latin1 encoded
-+  typedef enum ArgEncoding { LL, LU, UL, UU, L, U, none } ArgEnc;
- 
-  protected:
-   // Encoding of strings. Used to select the right version of the intrinsic.
-diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp
-index 6b6aa9e9b..8719c5b12 100644
---- a/src/hotspot/share/opto/library_call.cpp
-+++ b/src/hotspot/share/opto/library_call.cpp
-@@ -217,7 +217,7 @@ class LibraryCallKit : public GraphKit {
-   bool inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae);
-   Node* make_indexOf_node(Node* src_start, Node* src_count, Node* tgt_start, Node* tgt_count,
-                           RegionNode* region, Node* phi, StrIntrinsicNode::ArgEnc ae);
--  bool inline_string_indexOfChar();
-+  bool inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae);
-   bool inline_string_equals(StrIntrinsicNode::ArgEnc ae);
-   bool inline_string_toBytesU();
-   bool inline_string_getCharsU();
-@@ -590,7 +590,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
-   case vmIntrinsics::_indexOfIL:                return inline_string_indexOfI(StrIntrinsicNode::LL);
-   case vmIntrinsics::_indexOfIU:                return inline_string_indexOfI(StrIntrinsicNode::UU);
-   case vmIntrinsics::_indexOfIUL:               return inline_string_indexOfI(StrIntrinsicNode::UL);
--  case vmIntrinsics::_indexOfU_char:            return inline_string_indexOfChar();
-+  case vmIntrinsics::_indexOfU_char:            return inline_string_indexOfChar(StrIntrinsicNode::U);
-+  case vmIntrinsics::_indexOfL_char:            return inline_string_indexOfChar(StrIntrinsicNode::L);
- 
-   case vmIntrinsics::_equalsL:                  return inline_string_equals(StrIntrinsicNode::LL);
-   case vmIntrinsics::_equalsU:                  return inline_string_equals(StrIntrinsicNode::UU);
-@@ -1419,7 +1420,7 @@ Node* LibraryCallKit::make_indexOf_node(Node* src_start, Node* src_count, Node*
- }
- 
- //-----------------------------inline_string_indexOfChar-----------------------
--bool LibraryCallKit::inline_string_indexOfChar() {
-+bool LibraryCallKit::inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae) {
-   if (too_many_traps(Deoptimization::Reason_intrinsic)) {
-     return false;
-   }
-@@ -1434,12 +1435,12 @@ bool LibraryCallKit::inline_string_indexOfChar() {
- 
-   src = must_be_not_null(src, true);
- 
--  Node* src_offset = _gvn.transform(new LShiftINode(from_index, intcon(1)));
-+  Node* src_offset = ae == StrIntrinsicNode::L ? from_index : _gvn.transform(new LShiftINode(from_index, intcon(1)));
-   Node* src_start = array_element_address(src, src_offset, T_BYTE);
-   Node* src_count = _gvn.transform(new SubINode(max, from_index));
- 
-   // Range checks
--  generate_string_range_check(src, src_offset, src_count, true);
-+  generate_string_range_check(src, src_offset, src_count, ae == StrIntrinsicNode::U);
-   if (stopped()) {
-     return true;
-   }
-@@ -1447,7 +1448,7 @@ bool LibraryCallKit::inline_string_indexOfChar() {
-   RegionNode* region = new RegionNode(3);
-   Node* phi = new PhiNode(region, TypeInt::INT);
- 
--  Node* result = new StrIndexOfCharNode(control(), memory(TypeAryPtr::BYTES), src_start, src_count, tgt, StrIntrinsicNode::none);
-+  Node* result = new StrIndexOfCharNode(control(), memory(TypeAryPtr::BYTES), src_start, src_count, tgt, ae);
-   C->set_has_split_ifs(true); // Has chance for split-if optimization
-   _gvn.transform(result);
- 
-diff --git a/src/hotspot/share/opto/machnode.cpp b/src/hotspot/share/opto/machnode.cpp
-index 8d526b15d..92b4f7158 100644
---- a/src/hotspot/share/opto/machnode.cpp
-+++ b/src/hotspot/share/opto/machnode.cpp
-@@ -147,7 +147,7 @@ uint MachNode::size(PhaseRegAlloc *ra_) const {
-   return MachNode::emit_size(ra_);
- }
- 
--//------------------------------size-------------------------------------------
-+//-------------------------emit_size-------------------------------------------
- // Helper function that computes size by emitting code
- uint MachNode::emit_size(PhaseRegAlloc *ra_) const {
-   // Emit into a trash buffer and count bytes emitted.
-diff --git a/src/hotspot/share/opto/machnode.hpp b/src/hotspot/share/opto/machnode.hpp
-index a52325680..dad70565b 100644
---- a/src/hotspot/share/opto/machnode.hpp
-+++ b/src/hotspot/share/opto/machnode.hpp
-@@ -334,6 +334,10 @@ public:
-   // Top-level ideal Opcode matched
-   virtual int ideal_Opcode()     const { return Op_Node; }
- 
-+  virtual bool is_Opcode_equal(Node* node) {
-+    return node->is_Mach() && (ideal_Opcode() == node->as_Mach()->ideal_Opcode());
-+  }
-+
-   // Adds the label for the case
-   virtual void add_case_label( int switch_val, Label* blockLabel);
- 
-diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp
-index 9e9b3383f..97de5e314 100644
---- a/src/hotspot/share/opto/matcher.cpp
-+++ b/src/hotspot/share/opto/matcher.cpp
-@@ -84,6 +84,7 @@ Matcher::Matcher()
-   idealreg2spillmask  [Op_RegF] = NULL;
-   idealreg2spillmask  [Op_RegD] = NULL;
-   idealreg2spillmask  [Op_RegP] = NULL;
-+  idealreg2spillmask  [Op_VecA] = NULL;
-   idealreg2spillmask  [Op_VecS] = NULL;
-   idealreg2spillmask  [Op_VecD] = NULL;
-   idealreg2spillmask  [Op_VecX] = NULL;
-@@ -110,6 +111,7 @@ Matcher::Matcher()
-   idealreg2mhdebugmask[Op_RegF] = NULL;
-   idealreg2mhdebugmask[Op_RegD] = NULL;
-   idealreg2mhdebugmask[Op_RegP] = NULL;
-+  idealreg2mhdebugmask[Op_VecA] = NULL;
-   idealreg2mhdebugmask[Op_VecS] = NULL;
-   idealreg2mhdebugmask[Op_VecD] = NULL;
-   idealreg2mhdebugmask[Op_VecX] = NULL;
-@@ -424,7 +426,7 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
- void Matcher::init_first_stack_mask() {
- 
-   // Allocate storage for spill masks as masks for the appropriate load type.
--  RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+5));
-+  RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+6));
- 
-   idealreg2spillmask  [Op_RegN] = &rms[0];
-   idealreg2spillmask  [Op_RegI] = &rms[1];
-@@ -447,11 +449,12 @@ void Matcher::init_first_stack_mask() {
-   idealreg2mhdebugmask[Op_RegD] = &rms[16];
-   idealreg2mhdebugmask[Op_RegP] = &rms[17];
- 
--  idealreg2spillmask  [Op_VecS] = &rms[18];
--  idealreg2spillmask  [Op_VecD] = &rms[19];
--  idealreg2spillmask  [Op_VecX] = &rms[20];
--  idealreg2spillmask  [Op_VecY] = &rms[21];
--  idealreg2spillmask  [Op_VecZ] = &rms[22];
-+  idealreg2spillmask  [Op_VecA] = &rms[18];
-+  idealreg2spillmask  [Op_VecS] = &rms[19];
-+  idealreg2spillmask  [Op_VecD] = &rms[20];
-+  idealreg2spillmask  [Op_VecX] = &rms[21];
-+  idealreg2spillmask  [Op_VecY] = &rms[22];
-+  idealreg2spillmask  [Op_VecZ] = &rms[23];
- 
-   OptoReg::Name i;
- 
-@@ -478,6 +481,7 @@ void Matcher::init_first_stack_mask() {
-   // Keep spill masks aligned.
-   aligned_stack_mask.clear_to_pairs();
-   assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
-+  RegMask scalable_stack_mask = aligned_stack_mask;
- 
-   *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
- #ifdef _LP64
-@@ -548,6 +552,26 @@ void Matcher::init_first_stack_mask() {
-     *idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ];
-      idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask);
-   }
-+
-+  if (Matcher::supports_scalable_vector()) {
-+    int k = 1;
-+    OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
-+    // Exclude last input arg stack slots to avoid spilling vector register there,
-+    // otherwise vector spills could stomp over stack slots in caller frame.
-+    for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) {
-+      scalable_stack_mask.Remove(in);
-+      in = OptoReg::add(in, -1);
-+    }
-+
-+    // For VecA
-+     scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA);
-+     assert(scalable_stack_mask.is_AllStack(), "should be infinite stack");
-+    *idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA];
-+     idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask);
-+  } else {
-+    *idealreg2spillmask[Op_VecA] = RegMask::Empty;
-+  }
-+
-    if (UseFPUForSpilling) {
-      // This mask logic assumes that the spill operations are
-      // symmetric and that the registers involved are the same size.
-@@ -872,6 +896,11 @@ void Matcher::init_spill_mask( Node *ret ) {
-   idealreg2regmask[Op_RegP] = &spillP->out_RegMask();
- 
-   // Vector regmasks.
-+  if (Matcher::supports_scalable_vector()) {
-+    TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));;
-+    MachNode *spillVectA = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTA));
-+    idealreg2regmask[Op_VecA] = &spillVectA->out_RegMask();
-+  }
-   if (Matcher::vector_size_supported(T_BYTE,4)) {
-     TypeVect::VECTS = TypeVect::make(T_BYTE, 4);
-     MachNode *spillVectS = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS));
-diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp
-index 244e3d1f8..9a8307102 100644
---- a/src/hotspot/share/opto/matcher.hpp
-+++ b/src/hotspot/share/opto/matcher.hpp
-@@ -310,7 +310,7 @@ public:
- 
-   // identify extra cases that we might want to provide match rules for
-   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
--  static const bool match_rule_supported_vector(int opcode, int vlen);
-+  static const bool match_rule_supported_vector(int opcode, int vlen, BasicType bt);
- 
-   // Some microarchitectures have mask registers used on vectors
-   static const bool has_predicated_vectors(void);
-@@ -333,6 +333,10 @@ public:
-             Matcher::min_vector_size(bt) <= size);
-   }
- 
-+  static const bool supports_scalable_vector();
-+  // Actual max scalable vector register length.
-+  static const int scalable_vector_reg_size(const BasicType bt);
-+
-   // Vector ideal reg
-   static const uint vector_ideal_reg(int len);
-   static const uint vector_shift_count_ideal_reg(int len);
-diff --git a/src/hotspot/share/opto/node.cpp b/src/hotspot/share/opto/node.cpp
-index 02bb6bb16..99d51ba05 100644
---- a/src/hotspot/share/opto/node.cpp
-+++ b/src/hotspot/share/opto/node.cpp
-@@ -2359,6 +2359,27 @@ Node* Node::find_similar(int opc) {
-   return NULL;
- }
- 
-+//--------------------------is_similar-----------------------------------
-+// True if a node has the same opcode and inputs as "this".
-+bool Node::is_similar(Node* node) {
-+  if (this == node) {
-+    return true;
-+  } else {
-+    if (is_Opcode_equal(node) && (req() == node->req())) {
-+      for (uint i = 0; i < node->req(); i++) {
-+        if (in(i) != node->in(i)) {
-+          return false;
-+        }
-+      }
-+      return true;
-+    }
-+  }
-+  return false;
-+}
-+
-+bool Node::is_Opcode_equal(Node* node) {
-+  return Opcode() == node->Opcode();
-+}
- 
- //--------------------------unique_ctrl_out------------------------------
- // Return the unique control out if only one. Null if none or more than one.
-diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp
-index 0c0b9bf69..e24456d85 100644
---- a/src/hotspot/share/opto/node.hpp
-+++ b/src/hotspot/share/opto/node.hpp
-@@ -1030,6 +1030,11 @@ public:
-   // be found; Otherwise return NULL;
-   Node* find_similar(int opc);
- 
-+  // True if a node has the same opcode and inputs as "this".
-+  bool is_similar(Node* node);
-+
-+  virtual bool is_Opcode_equal(Node* node);
-+
-   // Return the unique control out if only one. Null if none or more than one.
-   Node* unique_ctrl_out() const;
- 
-diff --git a/src/hotspot/share/opto/opcodes.cpp b/src/hotspot/share/opto/opcodes.cpp
-index e31e8d847..aa0483c73 100644
---- a/src/hotspot/share/opto/opcodes.cpp
-+++ b/src/hotspot/share/opto/opcodes.cpp
-@@ -38,12 +38,14 @@ const char *NodeClassNames[] = {
-   "RegF",
-   "RegD",
-   "RegL",
--  "RegFlags",
-+  "VecA",
-   "VecS",
-   "VecD",
-   "VecX",
-   "VecY",
-   "VecZ",
-+  "RegVMask",
-+  "RegFlags",
-   "_last_machine_leaf",
- #include "classes.hpp"
-   "_last_class_name",
-diff --git a/src/hotspot/share/opto/opcodes.hpp b/src/hotspot/share/opto/opcodes.hpp
-index ae3d61ce0..0a77c3732 100644
---- a/src/hotspot/share/opto/opcodes.hpp
-+++ b/src/hotspot/share/opto/opcodes.hpp
-@@ -37,11 +37,13 @@ enum Opcodes {
-   macro(RegF)                   // Machine float   register
-   macro(RegD)                   // Machine double  register
-   macro(RegL)                   // Machine long    register
-+  macro(VecA)                   // Machine vectora register
-   macro(VecS)                   // Machine vectors register
-   macro(VecD)                   // Machine vectord register
-   macro(VecX)                   // Machine vectorx register
-   macro(VecY)                   // Machine vectory register
-   macro(VecZ)                   // Machine vectorz register
-+  macro(RegVMask)               // Vector mask/predicate register
-   macro(RegFlags)               // Machine flags   register
-   _last_machine_leaf,           // Split between regular opcodes and machine
- #include "classes.hpp"
-diff --git a/src/hotspot/share/opto/phase.cpp b/src/hotspot/share/opto/phase.cpp
-index 397a53713..89c7fc7c8 100644
---- a/src/hotspot/share/opto/phase.cpp
-+++ b/src/hotspot/share/opto/phase.cpp
-@@ -113,6 +113,7 @@ void Phase::print_timers() {
-     tty->print_cr ("         Regalloc Split:      %7.3f s", timers[_t_regAllocSplit].seconds());
-     tty->print_cr ("         Postalloc Copy Rem:  %7.3f s", timers[_t_postAllocCopyRemoval].seconds());
-     tty->print_cr ("         Merge multidefs:     %7.3f s", timers[_t_mergeMultidefs].seconds());
-+    tty->print_cr ("         Merge debugdefs:     %7.3f s", timers[_t_mergeDebugdefs].seconds());
-     tty->print_cr ("         Fixup Spills:        %7.3f s", timers[_t_fixupSpills].seconds());
-     tty->print_cr ("         Compact:             %7.3f s", timers[_t_chaitinCompact].seconds());
-     tty->print_cr ("         Coalesce 1:          %7.3f s", timers[_t_chaitinCoalesce1].seconds());
-@@ -130,6 +131,7 @@ void Phase::print_timers() {
-        timers[_t_regAllocSplit].seconds() +
-        timers[_t_postAllocCopyRemoval].seconds() +
-        timers[_t_mergeMultidefs].seconds() +
-+       timers[_t_mergeDebugdefs].seconds() +
-        timers[_t_fixupSpills].seconds() +
-        timers[_t_chaitinCompact].seconds() +
-        timers[_t_chaitinCoalesce1].seconds() +
-diff --git a/src/hotspot/share/opto/phase.hpp b/src/hotspot/share/opto/phase.hpp
-index 4b0c53ffc..b3302ec86 100644
---- a/src/hotspot/share/opto/phase.hpp
-+++ b/src/hotspot/share/opto/phase.hpp
-@@ -91,6 +91,7 @@ public:
-       _t_regAllocSplit,
-       _t_postAllocCopyRemoval,
-       _t_mergeMultidefs,
-+      _t_mergeDebugdefs,
-       _t_fixupSpills,
-       _t_chaitinCompact,
-       _t_chaitinCoalesce1,
-diff --git a/src/hotspot/share/opto/postaloc.cpp b/src/hotspot/share/opto/postaloc.cpp
-index 46766b604..3f608bb40 100644
---- a/src/hotspot/share/opto/postaloc.cpp
-+++ b/src/hotspot/share/opto/postaloc.cpp
-@@ -27,6 +27,7 @@
- #include "memory/resourceArea.hpp"
- #include "opto/chaitin.hpp"
- #include "opto/machnode.hpp"
-+#include "opto/addnode.hpp"
- 
- // See if this register (or pairs, or vector) already contains the value.
- static bool register_contains_value(Node* val, OptoReg::Name reg, int n_regs,
-@@ -266,9 +267,9 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
-   Node *val = skip_copies(n->in(k));
-   if (val == x) return blk_adjust; // No progress?
- 
--  int n_regs = RegMask::num_registers(val->ideal_reg());
-   uint val_idx = _lrg_map.live_range_id(val);
-   OptoReg::Name val_reg = lrgs(val_idx).reg();
-+  int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx));
- 
-   // See if it happens to already be in the correct register!
-   // (either Phi's direct register, or the common case of the name
-@@ -305,8 +306,26 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
-     }
- 
-     Node *vv = value[reg];
-+    // For scalable register, number of registers may be inconsistent between
-+    // "val_reg" and "reg". For example, when "val" resides in register
-+    // but "reg" is located in stack.
-+    if (lrgs(val_idx).is_scalable()) {
-+      assert(val->ideal_reg() == Op_VecA, "scalable vector register");
-+      if (OptoReg::is_stack(reg)) {
-+        n_regs = lrgs(val_idx).scalable_reg_slots();
-+      } else {
-+        n_regs = RegMask::SlotsPerVecA;
-+      }
-+    }
-     if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set
--      uint last = (n_regs-1); // Looking for the last part of a set
-+      uint last;
-+      if (lrgs(val_idx).is_scalable()) {
-+        assert(val->ideal_reg() == Op_VecA, "scalable vector register");
-+        // For scalable vector register, regmask is always SlotsPerVecA bits aligned
-+        last = RegMask::SlotsPerVecA - 1;
-+      } else {
-+        last = (n_regs-1); // Looking for the last part of a set
-+      }
-       if ((reg&last) != last) continue; // Wrong part of a set
-       if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value
-     }
-@@ -410,6 +429,28 @@ void PhaseChaitin::merge_multidefs() {
-   }
- }
- 
-+void PhaseChaitin::merge_debugdefs() {
-+  Compile::TracePhase tp("merge_Debugdefs", &timers[_t_mergeDebugdefs]);
-+
-+  ResourceMark rm;
-+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
-+    Block* block = _cfg.get_block(i);
-+    for (int j = 0; j < (int) block->number_of_nodes(); j++) {
-+      Node* base = block->get_node(j);
-+      if (base && base->is_Mach() && base->outcnt() == 1) {
-+        Node* addp = base->unique_out();
-+        if (addp && addp->is_Mach() && addp->as_Mach()->ideal_Opcode() == Op_AddP) {
-+          Node* derived = addp->in(AddPNode::Address);
-+          if (base == addp->in(AddPNode::Base) && base->is_similar(derived)) {
-+            base->subsume_by(derived, Compile::current());
-+            block->remove_node(j--);
-+          }
-+        }
-+      }
-+    }
-+  }
-+}
-+
- int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) {
-   int blk_adjust = 0;
- 
-@@ -591,7 +632,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
-       uint k;
-       Node *phi = block->get_node(j);
-       uint pidx = _lrg_map.live_range_id(phi);
--      OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg();
-+      OptoReg::Name preg = lrgs(pidx).reg();
- 
-       // Remove copies remaining on edges.  Check for junk phi.
-       Node *u = NULL;
-@@ -619,7 +660,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
-       if( pidx ) {
-         value.map(preg,phi);
-         regnd.map(preg,phi);
--        int n_regs = RegMask::num_registers(phi->ideal_reg());
-+        int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx));
-         for (int l = 1; l < n_regs; l++) {
-           OptoReg::Name preg_lo = OptoReg::add(preg,-l);
-           value.map(preg_lo,phi);
-@@ -663,7 +704,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
-             regnd.map(ureg,   def);
-             // Record other half of doubles
-             uint def_ideal_reg = def->ideal_reg();
--            int n_regs = RegMask::num_registers(def_ideal_reg);
-+            int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def)));
-             for (int l = 1; l < n_regs; l++) {
-               OptoReg::Name ureg_lo = OptoReg::add(ureg,-l);
-               if (!value[ureg_lo] &&
-@@ -707,7 +748,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
-       }
- 
-       uint n_ideal_reg = n->ideal_reg();
--      int n_regs = RegMask::num_registers(n_ideal_reg);
-+      int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx));
-       if (n_regs == 1) {
-         // If Node 'n' does not change the value mapped by the register,
-         // then 'n' is a useless copy.  Do not update the register->node
-diff --git a/src/hotspot/share/opto/regmask.cpp b/src/hotspot/share/opto/regmask.cpp
-index 2e04c42eb..34a701e84 100644
---- a/src/hotspot/share/opto/regmask.cpp
-+++ b/src/hotspot/share/opto/regmask.cpp
-@@ -24,6 +24,7 @@
- 
- #include "precompiled.hpp"
- #include "opto/ad.hpp"
-+#include "opto/chaitin.hpp"
- #include "opto/compile.hpp"
- #include "opto/matcher.hpp"
- #include "opto/node.hpp"
-@@ -116,30 +117,47 @@ const RegMask RegMask::Empty(
- 
- //=============================================================================
- bool RegMask::is_vector(uint ireg) {
--  return (ireg == Op_VecS || ireg == Op_VecD ||
-+  return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD ||
-           ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ );
- }
- 
- int RegMask::num_registers(uint ireg) {
-     switch(ireg) {
-       case Op_VecZ:
--        return 16;
-+        return SlotsPerVecZ;
-       case Op_VecY:
--        return 8;
-+        return SlotsPerVecY;
-       case Op_VecX:
--        return 4;
-+        return SlotsPerVecX;
-       case Op_VecD:
-+        return SlotsPerVecD;
-       case Op_RegD:
-       case Op_RegL:
- #ifdef _LP64
-       case Op_RegP:
- #endif
-         return 2;
-+      case Op_VecA:
-+        assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
-+        return SlotsPerVecA;
-     }
-     // Op_VecS and the rest ideal registers.
-     return 1;
- }
- 
-+int RegMask::num_registers(uint ireg, LRG &lrg) {
-+  int n_regs = num_registers(ireg);
-+
-+  // assigned is OptoReg which is selected by register allocator
-+  OptoReg::Name assigned = lrg.reg();
-+  assert(OptoReg::is_valid(assigned), "should be valid opto register");
-+
-+  if (lrg.is_scalable() && OptoReg::is_stack(assigned)) {
-+    n_regs = lrg.scalable_reg_slots();
-+  }
-+  return n_regs;
-+}
-+
- //------------------------------find_first_pair--------------------------------
- // Find the lowest-numbered register pair in the mask.  Return the
- // HIGHEST register number in the pair, or BAD if no pairs.
-@@ -238,14 +256,30 @@ int RegMask::is_bound_pair() const {
-   return true;
- }
- 
-+// Check that whether given reg number with size is valid
-+// for current regmask, where reg is the highest number.
-+bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const {
-+  for (int i = 0; i < size; i++) {
-+    if (!Member(reg - i)) {
-+      return false;
-+    }
++  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
++    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
 +  }
-+  return true;
 +}
++#endif // COMPILER2
 +
- // only indicies of power 2 are accessed, so index 3 is only filled in for storage.
- static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 };
- //------------------------------find_first_set---------------------------------
- // Find the lowest-numbered register set in the mask.  Return the
- // HIGHEST register number in the set, or BAD if no sets.
- // Works also for size 1.
--OptoReg::Name RegMask::find_first_set(const int size) const {
--  verify_sets(size);
-+OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const {
-+  if (lrg.is_scalable()) {
-+    // For scalable vector register, regmask is SlotsPerVecA bits aligned.
-+    assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets");
-+  } else {
-+    assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
-+  }
-   for (int i = 0; i < RM_SIZE; i++) {
-     if (_A[i]) {                // Found some bits
-       int bit = _A[i] & -_A[i]; // Extract low bit
-diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp
-index c64d08795..2688275be 100644
---- a/src/hotspot/share/opto/regmask.hpp
-+++ b/src/hotspot/share/opto/regmask.hpp
-@@ -28,6 +28,8 @@
- #include "code/vmreg.hpp"
- #include "opto/optoreg.hpp"
- 
-+class LRG;
-+
- // Some fun naming (textual) substitutions:
- //
- // RegMask::get_low_elem() ==> RegMask::find_first_elem()
-@@ -95,6 +97,7 @@ public:
-   // requirement is internal to the allocator, and independent of any
-   // particular platform.
-   enum { SlotsPerLong = 2,
-+         SlotsPerVecA = RISCV_ONLY(4) NOT_RISCV(8),
-          SlotsPerVecS = 1,
-          SlotsPerVecD = 2,
-          SlotsPerVecX = 4,
-@@ -204,10 +207,14 @@ public:
-     return false;
-   }
- 
-+  // Check that whether given reg number with size is valid
-+  // for current regmask, where reg is the highest number.
-+  bool is_valid_reg(OptoReg::Name reg, const int size) const;
-+
-   // Find the lowest-numbered register set in the mask.  Return the
-   // HIGHEST register number in the set, or BAD if no sets.
-   // Assert that the mask contains only bit sets.
--  OptoReg::Name find_first_set(const int size) const;
-+  OptoReg::Name find_first_set(LRG &lrg, const int size) const;
- 
-   // Clear out partial bits; leave only aligned adjacent bit sets of size.
-   void clear_to_sets(const int size);
-@@ -226,6 +233,7 @@ public:
- 
-   static bool is_vector(uint ireg);
-   static int num_registers(uint ireg);
-+  static int num_registers(uint ireg, LRG &lrg);
- 
-   // Fast overlap test.  Non-zero if any registers in common.
-   int overlap( const RegMask &rm ) const {
-diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp
-index fed52e488..ee583236f 100644
---- a/src/hotspot/share/opto/superword.cpp
-+++ b/src/hotspot/share/opto/superword.cpp
-@@ -96,8 +96,11 @@ static const bool _do_vector_loop_experimental = false; // Experimental vectoriz
- //------------------------------transform_loop---------------------------
- void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
-   assert(UseSuperWord, "should be");
--  // Do vectors exist on this architecture?
--  if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
-+  // SuperWord only works with power of two vector sizes.
-+  int vector_width = Matcher::vector_width_in_bytes(T_BYTE);
-+  if (vector_width < 2 || !is_power_of_2(vector_width)) {
++void VM_Version::initialize_cpu_information(void) {
++  // do nothing if cpu info has been initialized
++  if (_initialized) {
 +    return;
 +  }
- 
-   assert(lpt->_head->is_CountedLoop(), "must be");
-   CountedLoopNode *cl = lpt->_head->as_CountedLoop();
-diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp
-index 7d767c47c..c9948df5f 100644
---- a/src/hotspot/share/opto/type.cpp
-+++ b/src/hotspot/share/opto/type.cpp
-@@ -79,6 +79,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
-   { Bad,             T_ILLEGAL,    "vectory:",      false, 0,                    relocInfo::none          },  // VectorY
-   { Bad,             T_ILLEGAL,    "vectorz:",      false, 0,                    relocInfo::none          },  // VectorZ
- #else // all other
-+  { Bad,             T_ILLEGAL,    "vectora:",      false, Op_VecA,              relocInfo::none          },  // VectorA
-   { Bad,             T_ILLEGAL,    "vectors:",      false, Op_VecS,              relocInfo::none          },  // VectorS
-   { Bad,             T_ILLEGAL,    "vectord:",      false, Op_VecD,              relocInfo::none          },  // VectorD
-   { Bad,             T_ILLEGAL,    "vectorx:",      false, Op_VecX,              relocInfo::none          },  // VectorX
-@@ -655,6 +656,10 @@ void Type::Initialize_shared(Compile* current) {
-   // get_zero_type() should not happen for T_CONFLICT
-   _zero_type[T_CONFLICT]= NULL;
- 
-+  if (Matcher::supports_scalable_vector()) {
-+    TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));
-+  }
-+
-   // Vector predefined types, it needs initialized _const_basic_type[].
-   if (Matcher::vector_size_supported(T_BYTE,4)) {
-     TypeVect::VECTS = TypeVect::make(T_BYTE,4);
-@@ -671,6 +676,7 @@ void Type::Initialize_shared(Compile* current) {
-   if (Matcher::vector_size_supported(T_FLOAT,16)) {
-     TypeVect::VECTZ = TypeVect::make(T_FLOAT,16);
-   }
-+  mreg2type[Op_VecA] = TypeVect::VECTA;
-   mreg2type[Op_VecS] = TypeVect::VECTS;
-   mreg2type[Op_VecD] = TypeVect::VECTD;
-   mreg2type[Op_VecX] = TypeVect::VECTX;
-@@ -990,6 +996,7 @@ const Type::TYPES Type::dual_type[Type::lastype] = {
- 
-   Bad,          // Tuple - handled in v-call
-   Bad,          // Array - handled in v-call
-+  Bad,          // VectorA - handled in v-call
-   Bad,          // VectorS - handled in v-call
-   Bad,          // VectorD - handled in v-call
-   Bad,          // VectorX - handled in v-call
-@@ -2329,6 +2336,7 @@ bool TypeAry::ary_must_be_exact() const {
- 
- //==============================TypeVect=======================================
- // Convenience common pre-built types.
-+const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic
- const TypeVect *TypeVect::VECTS = NULL; //  32-bit vectors
- const TypeVect *TypeVect::VECTD = NULL; //  64-bit vectors
- const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors
-@@ -2339,10 +2347,11 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors
- const TypeVect* TypeVect::make(const Type *elem, uint length) {
-   BasicType elem_bt = elem->array_element_basic_type();
-   assert(is_java_primitive(elem_bt), "only primitive types in vector");
--  assert(length > 1 && is_power_of_2(length), "vector length is power of 2");
-   assert(Matcher::vector_size_supported(elem_bt, length), "length in range");
-   int size = length * type2aelembytes(elem_bt);
-   switch (Matcher::vector_ideal_reg(size)) {
-+  case Op_VecA:
-+    return (TypeVect*)(new TypeVectA(elem, length))->hashcons();
-   case Op_VecS:
-     return (TypeVect*)(new TypeVectS(elem, length))->hashcons();
-   case Op_RegL:
-@@ -2375,6 +2384,7 @@ const Type *TypeVect::xmeet( const Type *t ) const {
-   default:                      // All else is a mistake
-     typerr(t);
- 
-+  case VectorA:
-   case VectorS:
-   case VectorD:
-   case VectorX:
-@@ -2429,6 +2439,8 @@ bool TypeVect::empty(void) const {
- #ifndef PRODUCT
- void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
-   switch (base()) {
-+  case VectorA:
-+    st->print("vectora["); break;
-   case VectorS:
-     st->print("vectors["); break;
-   case VectorD:
-diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp
-index 27d042d94..82ee2dfcb 100644
---- a/src/hotspot/share/opto/type.hpp
-+++ b/src/hotspot/share/opto/type.hpp
-@@ -53,6 +53,7 @@ class     TypeNarrowKlass;
- class   TypeAry;
- class   TypeTuple;
- class   TypeVect;
-+class     TypeVectA;
- class     TypeVectS;
- class     TypeVectD;
- class     TypeVectX;
-@@ -87,6 +88,7 @@ public:
- 
-     Tuple,                      // Method signature or object layout
-     Array,                      // Array types
-+    VectorA,                    // (Scalable) Vector types for vector length agnostic
-     VectorS,                    //  32bit Vector types
-     VectorD,                    //  64bit Vector types
-     VectorX,                    // 128bit Vector types
-@@ -769,6 +771,7 @@ public:
-   virtual const Type *xmeet( const Type *t) const;
-   virtual const Type *xdual() const;     // Compute dual right now.
- 
-+  static const TypeVect *VECTA;
-   static const TypeVect *VECTS;
-   static const TypeVect *VECTD;
-   static const TypeVect *VECTX;
-@@ -780,6 +783,11 @@ public:
- #endif
- };
- 
-+class TypeVectA : public TypeVect {
-+  friend class TypeVect;
-+  TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {}
-+};
-+
- class TypeVectS : public TypeVect {
-   friend class TypeVect;
-   TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {}
-@@ -1630,12 +1638,12 @@ inline const TypeAry *Type::is_ary() const {
- }
- 
- inline const TypeVect *Type::is_vect() const {
--  assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" );
-+  assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" );
-   return (TypeVect*)this;
- }
- 
- inline const TypeVect *Type::isa_vect() const {
--  return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL;
-+  return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL;
- }
- 
- inline const TypePtr *Type::is_ptr() const {
-diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp
-index de22591ba..b82d631f4 100644
---- a/src/hotspot/share/opto/vectornode.cpp
-+++ b/src/hotspot/share/opto/vectornode.cpp
-@@ -236,7 +236,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
-       (vlen > 1) && is_power_of_2(vlen) &&
-       Matcher::vector_size_supported(bt, vlen)) {
-     int vopc = VectorNode::opcode(opc, bt);
--    return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen);
-+    return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen, bt);
-   }
-   return false;
- }
-@@ -655,7 +655,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
-       (vlen > 1) && is_power_of_2(vlen) &&
-       Matcher::vector_size_supported(bt, vlen)) {
-     int vopc = ReductionNode::opcode(opc, bt);
--    return vopc != opc && Matcher::match_rule_supported(vopc);
-+    return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt);
-   }
-   return false;
- }
-diff --git a/src/hotspot/share/runtime/abstract_vm_version.cpp b/src/hotspot/share/runtime/abstract_vm_version.cpp
-index c46247f2b..ee769634f 100644
---- a/src/hotspot/share/runtime/abstract_vm_version.cpp
-+++ b/src/hotspot/share/runtime/abstract_vm_version.cpp
-@@ -98,8 +98,13 @@ bool Abstract_VM_Version::_parallel_worker_threads_initialized = false;
-   #ifdef ZERO
-     #define VMTYPE "Zero"
-   #else // ZERO
--     #define VMTYPE COMPILER1_PRESENT("Client")   \
--                    COMPILER2_PRESENT("Server")
-+    #ifdef COMPILER2
-+      #define VMTYPE "Server"
-+    #elif defined(COMPILER1) 
-+      #define VMTYPE "Client"
-+    #else
-+      #define VMTYPE "Core"
-+    #endif // COMPILER2
-   #endif // ZERO
-   #endif // TIERED
- #endif
-@@ -196,7 +201,8 @@ const char* Abstract_VM_Version::jre_release_version() {
-                  IA32_ONLY("x86")                \
-                  IA64_ONLY("ia64")               \
-                  S390_ONLY("s390")               \
--                 SPARC_ONLY("sparc")
-+                 SPARC_ONLY("sparc")             \
-+                 RISCV64_ONLY("riscv64")
- #endif // !ZERO
- #endif // !CPU
- 
-diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp
-index 0a9c45f85..a96c2dd81 100644
---- a/src/hotspot/share/runtime/thread.hpp
-+++ b/src/hotspot/share/runtime/thread.hpp
-@@ -1234,7 +1234,7 @@ class JavaThread: public Thread {
-   address last_Java_pc(void)                     { return _anchor.last_Java_pc(); }
- 
-   // Safepoint support
--#if !(defined(PPC64) || defined(AARCH64))
-+#if !(defined(PPC64) || defined(AARCH64) || defined(RISCV64))
-   JavaThreadState thread_state() const           { return _thread_state; }
-   void set_thread_state(JavaThreadState s)       {
-     assert(current_or_null() == NULL || current_or_null() == this,
-diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp
-index dee8534f7..aa71d7655 100644
---- a/src/hotspot/share/runtime/thread.inline.hpp
-+++ b/src/hotspot/share/runtime/thread.inline.hpp
-@@ -142,7 +142,7 @@ inline void JavaThread::set_pending_async_exception(oop e) {
-   set_has_async_exception();
- }
- 
--#if defined(PPC64) || defined (AARCH64)
-+#if defined(PPC64) || defined(AARCH64) || defined(RISCV64)
- inline JavaThreadState JavaThread::thread_state() const    {
-   return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state);
- }
-diff --git a/src/hotspot/share/utilities/debug.cpp b/src/hotspot/share/utilities/debug.cpp
-index 0b898dcc3..7f76486ae 100644
---- a/src/hotspot/share/utilities/debug.cpp
-+++ b/src/hotspot/share/utilities/debug.cpp
-@@ -632,6 +632,7 @@ void help() {
-   tty->print_cr("                   pns($sp, $rbp, $pc) on Linux/amd64 and Solaris/amd64 or");
-   tty->print_cr("                   pns($sp, $ebp, $pc) on Linux/x86 or");
-   tty->print_cr("                   pns($sp, $fp, $pc)  on Linux/AArch64 or");
-+  tty->print_cr("                   pns($sp, $fp, $pc)  on Linux/RISCV64 or");
-   tty->print_cr("                   pns($sp, 0, $pc)    on Linux/ppc64 or");
-   tty->print_cr("                   pns($sp + 0x7ff, 0, $pc) on Solaris/SPARC");
-   tty->print_cr("                 - in gdb do 'set overload-resolution off' before calling pns()");
-diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
-index cf8025386..e8ab3097a 100644
---- a/src/hotspot/share/utilities/macros.hpp
-+++ b/src/hotspot/share/utilities/macros.hpp
-@@ -597,6 +597,32 @@
- 
- #define MACOS_AARCH64_ONLY(x) MACOS_ONLY(AARCH64_ONLY(x))
- 
-+#if defined(RISCV32) || defined(RISCV64)
-+#define RISCV
-+#define RISCV_ONLY(code) code
-+#define NOT_RISCV(code)
-+#else
-+#undef RISCV
-+#define RISCV_ONLY(code)
-+#define NOT_RISCV(code) code
-+#endif
-+
-+#ifdef RISCV32
-+#define RISCV32_ONLY(code) code
-+#define NOT_RISCV32(code)
-+#else
-+#define RISCV32_ONLY(code)
-+#define NOT_RISCV32(code) code
-+#endif
-+
-+#ifdef RISCV64
-+#define RISCV64_ONLY(code) code
-+#define NOT_RISCV64(code)
-+#else
-+#define RISCV64_ONLY(code)
-+#define NOT_RISCV64(code) code
-+#endif
-+
- #ifdef VM_LITTLE_ENDIAN
- #define LITTLE_ENDIAN_ONLY(code) code
- #define BIG_ENDIAN_ONLY(code)
-diff --git a/src/java.base/share/classes/java/lang/StringLatin1.java b/src/java.base/share/classes/java/lang/StringLatin1.java
-index 063a5ef3a..50e9cdb57 100644
---- a/src/java.base/share/classes/java/lang/StringLatin1.java
-+++ b/src/java.base/share/classes/java/lang/StringLatin1.java
-@@ -209,6 +209,11 @@ final class StringLatin1 {
-             // Note: fromIndex might be near -1>>>1.
-             return -1;
-         }
-+        return indexOfChar(value, ch, fromIndex, max);
-+    }
-+
-+    @HotSpotIntrinsicCandidate
-+    private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) {
-         byte c = (byte)ch;
-         for (int i = fromIndex; i < max; i++) {
-             if (value[i] == c) {
-diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-index 0d834302c..55a7b96f7 100644
---- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-@@ -58,6 +58,10 @@
- #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h"
- #endif
- 
-+#ifdef riscv64
-+#include "sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext.h"
-+#endif
-+
- static jfieldID p_ps_prochandle_ID = 0;
- static jfieldID threadList_ID = 0;
- static jfieldID loadObjectList_ID = 0;
-@@ -397,7 +401,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
-   return (err == PS_OK)? array : 0;
- }
- 
--#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64)
-+#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64)
- JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
-   (JNIEnv *env, jobject this_obj, jint lwp_id) {
- 
-@@ -422,6 +426,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
- #ifdef aarch64
- #define NPRGREG sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_NPRGREG
- #endif
-+#ifdef riscv64
-+#define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG
-+#endif
- #if defined(sparc) || defined(sparcv9)
- #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG
- #endif
-@@ -534,6 +541,46 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
-   }
- #endif /* aarch64 */
- 
-+#if defined(riscv64)
-+
-+#define REG_INDEX(reg) sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg
-+
-+  {
-+    regs[REG_INDEX(PC)]  = gregs.pc;
-+    regs[REG_INDEX(LR)]  = gregs.ra;
-+    regs[REG_INDEX(SP)]  = gregs.sp;
-+    regs[REG_INDEX(R3)]  = gregs.gp;
-+    regs[REG_INDEX(R4)]  = gregs.tp;
-+    regs[REG_INDEX(R5)]  = gregs.t0;
-+    regs[REG_INDEX(R6)]  = gregs.t1;
-+    regs[REG_INDEX(R7)]  = gregs.t2;
-+    regs[REG_INDEX(R8)]  = gregs.s0;
-+    regs[REG_INDEX(R9)]  = gregs.s1;
-+    regs[REG_INDEX(R10)] = gregs.a0;
-+    regs[REG_INDEX(R11)] = gregs.a1;
-+    regs[REG_INDEX(R12)] = gregs.a2;
-+    regs[REG_INDEX(R13)] = gregs.a3;
-+    regs[REG_INDEX(R14)] = gregs.a4;
-+    regs[REG_INDEX(R15)] = gregs.a5;
-+    regs[REG_INDEX(R16)] = gregs.a6;
-+    regs[REG_INDEX(R17)] = gregs.a7;
-+    regs[REG_INDEX(R18)] = gregs.s2;
-+    regs[REG_INDEX(R19)] = gregs.s3;
-+    regs[REG_INDEX(R20)] = gregs.s4;
-+    regs[REG_INDEX(R21)] = gregs.s5;
-+    regs[REG_INDEX(R22)] = gregs.s6;
-+    regs[REG_INDEX(R23)] = gregs.s7;
-+    regs[REG_INDEX(R24)] = gregs.s8;
-+    regs[REG_INDEX(R25)] = gregs.s9;
-+    regs[REG_INDEX(R26)] = gregs.s10;
-+    regs[REG_INDEX(R27)] = gregs.s11;
-+    regs[REG_INDEX(R28)] = gregs.t3;
-+    regs[REG_INDEX(R29)] = gregs.t4;
-+    regs[REG_INDEX(R30)] = gregs.t5;
-+    regs[REG_INDEX(R31)] = gregs.t6;
-+  }
-+#endif /* riscv64 */
 +
- #if defined(ppc64) || defined(ppc64le)
- #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg
- 
-diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
-index 8318e8e02..9d7fda8a6 100644
---- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
-+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
-@@ -43,6 +43,8 @@
- #elif defined(arm)
- #include <asm/ptrace.h>
- #define user_regs_struct  pt_regs
-+#elif defined(riscv64)
-+#include <asm/ptrace.h>
- #endif
- 
- // This C bool type must be int for compatibility with Linux calls and
-diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-index de5254d85..12eafc455 100644
---- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-@@ -134,6 +134,9 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
- #define ptrace_getregs(request, pid, addr, data) ptrace(request, pid, data, addr)
- #endif
- 
-+// riscv kernel didn't implement compat_arch_ptrace function that will handle PT_GETREGS case
-+// like other platforms, so call ptrace with PTRACE_GETREGSET here.
-+#ifndef riscv64
- #if defined(_LP64) && defined(PTRACE_GETREGS64)
- #define PTRACE_GETREGS_REQ PTRACE_GETREGS64
- #elif defined(PTRACE_GETREGS)
-@@ -141,6 +144,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
- #elif defined(PT_GETREGS)
- #define PTRACE_GETREGS_REQ PT_GETREGS
- #endif
-+#endif
- 
- #ifdef PTRACE_GETREGS_REQ
-  if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
-index 0f5f0119c..82c083055 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
-@@ -1,6 +1,7 @@
- /*
-  * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
-  * Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -36,6 +37,7 @@ import sun.jvm.hotspot.debugger.MachineDescription;
- import sun.jvm.hotspot.debugger.MachineDescriptionAMD64;
- import sun.jvm.hotspot.debugger.MachineDescriptionPPC64;
- import sun.jvm.hotspot.debugger.MachineDescriptionAArch64;
-+import sun.jvm.hotspot.debugger.MachineDescriptionRISCV64;
- import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86;
- import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit;
- import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit;
-@@ -592,6 +594,8 @@ public class HotSpotAgent {
-             machDesc = new MachineDescriptionPPC64();
-         } else if (cpu.equals("aarch64")) {
-             machDesc = new MachineDescriptionAArch64();
-+        } else if (cpu.equals("riscv64")) {
-+            machDesc = new MachineDescriptionRISCV64();
-         } else if (cpu.equals("sparc")) {
-             if (LinuxDebuggerLocal.getAddressSize()==8) {
-                     machDesc = new MachineDescriptionSPARC64Bit();
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
++  _no_of_cores  = os::processor_count();
++  _no_of_threads = _no_of_cores;
++  _no_of_sockets = _no_of_cores;
++  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
++  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
++  _initialized = true;
++}
+diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
 new file mode 100644
-index 000000000..4221937f1
+index 00000000000..8e35530359a
 --- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
-@@ -0,0 +1,40 @@
++++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
+@@ -0,0 +1,72 @@
 +/*
-+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -58178,163 +56134,61 @@ index 000000000..4221937f1
 + *
 + */
 +
-+package sun.jvm.hotspot.debugger;
++#ifndef CPU_RISCV_VM_VERSION_RISCV_HPP
++#define CPU_RISCV_VM_VERSION_RISCV_HPP
 +
-+public class MachineDescriptionRISCV64 extends MachineDescriptionTwosComplement implements MachineDescription {
-+  public long getAddressSize() {
-+    return 8;
-+  }
++#include "runtime/abstract_vm_version.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/globals_extension.hpp"
++#include "utilities/sizes.hpp"
 +
-+  public boolean isLP64() {
-+    return true;
-+  }
-+
-+  public boolean isBigEndian() {
-+    return false;
-+  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
-index 5e5a6bb71..acd5844ca 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
-@@ -33,6 +33,7 @@ import sun.jvm.hotspot.debugger.cdbg.*;
- import sun.jvm.hotspot.debugger.x86.*;
- import sun.jvm.hotspot.debugger.amd64.*;
- import sun.jvm.hotspot.debugger.aarch64.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
- import sun.jvm.hotspot.debugger.sparc.*;
- import sun.jvm.hotspot.debugger.ppc64.*;
- import sun.jvm.hotspot.debugger.linux.x86.*;
-@@ -40,6 +41,7 @@ import sun.jvm.hotspot.debugger.linux.amd64.*;
- import sun.jvm.hotspot.debugger.linux.sparc.*;
- import sun.jvm.hotspot.debugger.linux.ppc64.*;
- import sun.jvm.hotspot.debugger.linux.aarch64.*;
-+import sun.jvm.hotspot.debugger.linux.riscv64.*;
- import sun.jvm.hotspot.utilities.*;
- 
- class LinuxCDebugger implements CDebugger {
-@@ -116,7 +118,14 @@ class LinuxCDebugger implements CDebugger {
-        Address pc  = context.getRegisterAsAddress(AARCH64ThreadContext.PC);
-        if (pc == null) return null;
-        return new LinuxAARCH64CFrame(dbg, fp, pc);
--     } else {
-+    } else if (cpu.equals("riscv64")) {
-+       RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext();
-+       Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP);
-+       if (fp == null) return null;
-+       Address pc  = context.getRegisterAsAddress(RISCV64ThreadContext.PC);
-+       if (pc == null) return null;
-+       return new LinuxRISCV64CFrame(dbg, fp, pc);
-+    } else {
-        // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu
-        ThreadContext context = (ThreadContext) thread.getContext();
-        return context.getTopFrame(dbg);
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
-new file mode 100644
-index 000000000..eaef586b4
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
-@@ -0,0 +1,90 @@
-+/*
-+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.debugger.linux.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.debugger.linux.*;
-+import sun.jvm.hotspot.debugger.cdbg.*;
-+import sun.jvm.hotspot.debugger.cdbg.basic.*;
-+
-+public final class LinuxRISCV64CFrame extends BasicCFrame {
-+   private static final int C_FRAME_LINK_OFFSET        = -2;
-+   private static final int C_FRAME_RETURN_ADDR_OFFSET = -1;
-+
-+   public LinuxRISCV64CFrame(LinuxDebugger dbg, Address fp, Address pc) {
-+      super(dbg.getCDebugger());
-+      this.fp = fp;
-+      this.pc = pc;
-+      this.dbg = dbg;
-+   }
-+
-+   // override base class impl to avoid ELF parsing
-+   public ClosestSymbol closestSymbolToPC() {
-+      // try native lookup in debugger.
-+      return dbg.lookup(dbg.getAddressValue(pc()));
-+   }
++class VM_Version : public Abstract_VM_Version {
++#ifdef COMPILER2
++private:
++  static void c2_initialize();
++#endif // COMPILER2
 +
-+   public Address pc() {
-+      return pc;
-+   }
++protected:
++  static const char* _uarch;
++  static uint32_t _initial_vector_length;
++  static void get_os_cpu_info();
++  static uint32_t get_current_vector_length();
 +
-+   public Address localVariableBase() {
-+      return fp;
-+   }
++public:
++  // Initialization
++  static void initialize();
 +
-+   public CFrame sender(ThreadProxy thread) {
-+      RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext();
-+      Address rsp = context.getRegisterAsAddress(RISCV64ThreadContext.SP);
++  constexpr static bool supports_stack_watermark_barrier() { return true; }
 +
-+      if ((fp == null) || fp.lessThan(rsp)) {
-+        return null;
-+      }
++  enum Feature_Flag {
++#define CPU_FEATURE_FLAGS(decl)               \
++    decl(I,            "i",            8)     \
++    decl(M,            "m",           12)     \
++    decl(A,            "a",            0)     \
++    decl(F,            "f",            5)     \
++    decl(D,            "d",            3)     \
++    decl(C,            "c",            2)     \
++    decl(V,            "v",           21)     \
++    decl(B,            "b",            1)
 +
-+      // Check alignment of fp
-+      if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) {
-+        return null;
-+      }
++#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit),
++    CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
++#undef DECLARE_CPU_FEATURE_FLAG
++  };
 +
-+      Address nextFP = fp.getAddressAt(C_FRAME_LINK_OFFSET * ADDRESS_SIZE);
-+      if (nextFP == null || nextFP.lessThanOrEqual(fp)) {
-+        return null;
-+      }
-+      Address nextPC  = fp.getAddressAt(C_FRAME_RETURN_ADDR_OFFSET * ADDRESS_SIZE);
-+      if (nextPC == null) {
-+        return null;
-+      }
-+      return new LinuxRISCV64CFrame(dbg, nextFP, nextPC);
-+   }
++  static void initialize_cpu_information(void);
++};
 +
-+   // package/class internals only
-+   private static final int ADDRESS_SIZE = 8;
-+   private Address pc;
-+   private Address sp;
-+   private Address fp;
-+   private LinuxDebugger dbg;
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
++#endif // CPU_RISCV_VM_VERSION_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
 new file mode 100644
-index 000000000..4789e664c
+index 00000000000..aa7222dc64a
 --- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
-@@ -0,0 +1,48 @@
++++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
+@@ -0,0 +1,64 @@
 +/*
-+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -58357,68 +56211,54 @@ index 000000000..4789e664c
 + *
 + */
 +
-+package sun.jvm.hotspot.debugger.linux.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.debugger.linux.*;
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "code/vmreg.hpp"
 +
-+public class LinuxRISCV64ThreadContext extends RISCV64ThreadContext {
-+  private LinuxDebugger debugger;
++void VMRegImpl::set_regName() {
++  int i = 0;
++  Register reg = ::as_Register(0);
++  for ( ; i < ConcreteRegisterImpl::max_gpr ; ) {
++    for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) {
++      regName[i++] = reg->name();
++    }
++    reg = reg->successor();
++  }
 +
-+  public LinuxRISCV64ThreadContext(LinuxDebugger debugger) {
-+    super();
-+    this.debugger = debugger;
++  FloatRegister freg = ::as_FloatRegister(0);
++  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
++    for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
++      regName[i++] = reg->name();
++    }
++    freg = freg->successor();
 +  }
 +
-+  public void setRegisterAsAddress(int index, Address value) {
-+    setRegister(index, debugger.getAddressValue(value));
++  VectorRegister vreg = ::as_VectorRegister(0);
++  for ( ; i < ConcreteRegisterImpl::max_vpr ; ) {
++    for (int j = 0 ; j < VectorRegisterImpl::max_slots_per_register ; j++) {
++      regName[i++] = reg->name();
++    }
++    vreg = vreg->successor();
 +  }
 +
-+  public Address getRegisterAsAddress(int index) {
-+    return debugger.newAddress(getRegister(index));
++  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) {
++    regName[i] = "NON-GPR-FPR-VPR";
 +  }
 +}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
-index 74e957d94..1f44d75ee 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
-@@ -32,12 +32,14 @@ import sun.jvm.hotspot.debugger.*;
- import sun.jvm.hotspot.debugger.cdbg.*;
- import sun.jvm.hotspot.debugger.proc.amd64.*;
- import sun.jvm.hotspot.debugger.proc.aarch64.*;
-+import sun.jvm.hotspot.debugger.proc.riscv64.*;
- import sun.jvm.hotspot.debugger.proc.sparc.*;
- import sun.jvm.hotspot.debugger.proc.ppc64.*;
- import sun.jvm.hotspot.debugger.proc.x86.*;
- import sun.jvm.hotspot.debugger.ppc64.*;
- import sun.jvm.hotspot.debugger.amd64.*;
- import sun.jvm.hotspot.debugger.aarch64.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
- import sun.jvm.hotspot.debugger.sparc.*;
- import sun.jvm.hotspot.debugger.x86.*;
- import sun.jvm.hotspot.utilities.*;
-@@ -94,6 +96,10 @@ public class ProcDebuggerLocal extends DebuggerBase implements ProcDebugger {
-             threadFactory = new ProcAARCH64ThreadFactory(this);
-             pcRegIndex = AARCH64ThreadContext.PC;
-             fpRegIndex = AARCH64ThreadContext.FP;
-+        } else if (cpu.equals("riscv64")) {
-+            threadFactory = new ProcRISCV64ThreadFactory(this);
-+            pcRegIndex = RISCV64ThreadContext.PC;
-+            fpRegIndex = RISCV64ThreadContext.FP;
-         } else if (cpu.equals("ppc64")) {
-             threadFactory = new ProcPPC64ThreadFactory(this);
-             pcRegIndex = PPC64ThreadContext.PC;
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
++
++VMReg VMRegImpl::vmStorageToVMReg(int type, int index) {
++  Unimplemented();
++  return VMRegImpl::Bad();
++}
+diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
 new file mode 100644
-index 000000000..c1cf1fb0f
+index 00000000000..9e611b1f671
 --- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
-@@ -0,0 +1,88 @@
++++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
+@@ -0,0 +1,68 @@
 +/*
-+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -58441,132 +56281,58 @@ index 000000000..c1cf1fb0f
 + *
 + */
 +
-+package sun.jvm.hotspot.debugger.proc.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.debugger.proc.*;
-+import sun.jvm.hotspot.utilities.*;
-+
-+public class ProcRISCV64Thread implements ThreadProxy {
-+    private ProcDebugger debugger;
-+    private int         id;
-+
-+    public ProcRISCV64Thread(ProcDebugger debugger, Address addr) {
-+        this.debugger = debugger;
-+
-+        // FIXME: the size here should be configurable. However, making it
-+        // so would produce a dependency on the "types" package from the
-+        // debugger package, which is not desired.
-+        this.id       = (int) addr.getCIntegerAt(0, 4, true);
-+    }
-+
-+    public ProcRISCV64Thread(ProcDebugger debugger, long id) {
-+        this.debugger = debugger;
-+        this.id = (int) id;
-+    }
-+
-+    public ThreadContext getContext() throws IllegalThreadStateException {
-+        ProcRISCV64ThreadContext context = new ProcRISCV64ThreadContext(debugger);
-+        long[] regs = debugger.getThreadIntegerRegisterSet(id);
-+        if (Assert.ASSERTS_ENABLED) {
-+            Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size mismatch");
-+        }
-+        for (int i = 0; i < regs.length; i++) {
-+            context.setRegister(i, regs[i]);
-+        }
-+        return context;
-+    }
-+
-+    public boolean canSetContext() throws DebuggerException {
-+        return false;
-+    }
-+
-+    public void setContext(ThreadContext context)
-+    throws IllegalThreadStateException, DebuggerException {
-+        throw new DebuggerException("Unimplemented");
-+    }
-+
-+    public String toString() {
-+        return "t@" + id;
-+    }
-+
-+    public boolean equals(Object obj) {
-+        if ((obj == null) || !(obj instanceof ProcRISCV64Thread)) {
-+            return false;
-+        }
-+
-+        return (((ProcRISCV64Thread) obj).id == id);
-+    }
++#ifndef CPU_RISCV_VMREG_RISCV_HPP
++#define CPU_RISCV_VMREG_RISCV_HPP
 +
-+    public int hashCode() {
-+        return id;
-+    }
++inline bool is_Register() {
++  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
 +}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
-new file mode 100644
-index 000000000..498fa0dc6
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
-@@ -0,0 +1,48 @@
-+/*
-+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+package sun.jvm.hotspot.debugger.proc.riscv64;
++inline bool is_FloatRegister() {
++  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
++}
 +
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.debugger.proc.*;
++inline bool is_VectorRegister() {
++  return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr;
++}
 +
-+public class ProcRISCV64ThreadContext extends RISCV64ThreadContext {
-+    private ProcDebugger debugger;
++inline Register as_Register() {
++  assert(is_Register(), "must be");
++  return ::as_Register(value() / RegisterImpl::max_slots_per_register);
++}
 +
-+    public ProcRISCV64ThreadContext(ProcDebugger debugger) {
-+        super();
-+        this.debugger = debugger;
-+    }
++inline FloatRegister as_FloatRegister() {
++  assert(is_FloatRegister() && is_even(value()), "must be");
++  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
++                            FloatRegisterImpl::max_slots_per_register);
++}
 +
-+    public void setRegisterAsAddress(int index, Address value) {
-+        setRegister(index, debugger.getAddressValue(value));
-+    }
++inline VectorRegister as_VectorRegister() {
++  assert(is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be");
++  return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) /
++                             VectorRegisterImpl::max_slots_per_register);
++}
 +
-+    public Address getRegisterAsAddress(int index) {
-+        return debugger.newAddress(getRegister(index));
-+    }
++inline bool is_concrete() {
++  assert(is_reg(), "must be");
++  if (is_VectorRegister()) {
++    int base = value() - ConcreteRegisterImpl::max_fpr;
++    return (base % VectorRegisterImpl::max_slots_per_register) == 0;
++  } else {
++    return is_even(value());
++  }
 +}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
++
++#endif // CPU_RISCV_VMREG_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
 new file mode 100644
-index 000000000..81afd8fdc
+index 00000000000..06b70020b4b
 --- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
++++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
 @@ -0,0 +1,46 @@
 +/*
-+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -58589,36 +56355,37 @@ index 000000000..81afd8fdc
 + *
 + */
 +
-+package sun.jvm.hotspot.debugger.proc.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.proc.*;
-+
-+public class ProcRISCV64ThreadFactory implements ProcThreadFactory {
-+    private ProcDebugger debugger;
++#ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
++#define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
 +
-+    public ProcRISCV64ThreadFactory(ProcDebugger debugger) {
-+        this.debugger = debugger;
-+    }
++inline VMReg RegisterImpl::as_VMReg() const {
++  if (this == noreg) {
++    return VMRegImpl::Bad();
++  }
++  return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register);
++}
 +
-+    public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
-+        return new ProcRISCV64Thread(debugger, threadIdentifierAddr);
-+    }
++inline VMReg FloatRegisterImpl::as_VMReg() const {
++  return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) +
++                             ConcreteRegisterImpl::max_gpr);
++}
 +
-+    public ThreadProxy createThreadWrapper(long id) {
-+        return new ProcRISCV64Thread(debugger, id);
-+    }
++inline VMReg VectorRegisterImpl::as_VMReg() const {
++  return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) +
++                             ConcreteRegisterImpl::max_fpr);
 +}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
++
++#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
+diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
 new file mode 100644
-index 000000000..ab92e3e74
+index 00000000000..78b81138003
 --- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
-@@ -0,0 +1,55 @@
++++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
+@@ -0,0 +1,260 @@
 +/*
-+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -58641,99 +56408,310 @@ index 000000000..ab92e3e74
 + *
 + */
 +
-+package sun.jvm.hotspot.debugger.remote.riscv64;
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "assembler_riscv.inline.hpp"
++#include "code/vtableStubs.hpp"
++#include "interp_masm_riscv.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/instanceKlass.hpp"
++#include "oops/klassVtable.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_riscv.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
 +
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.debugger.remote.*;
-+import sun.jvm.hotspot.utilities.*;
++// machine-dependent part of VtableStubs: create VtableStub of correct size and
++// initialize its code
 +
-+public class RemoteRISCV64Thread extends RemoteThread  {
-+  public RemoteRISCV64Thread(RemoteDebuggerClient debugger, Address addr) {
-+     super(debugger, addr);
-+  }
++#define __ masm->
 +
-+  public RemoteRISCV64Thread(RemoteDebuggerClient debugger, long id) {
-+     super(debugger, id);
-+  }
++#ifndef PRODUCT
++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
++#endif
 +
-+  public ThreadContext getContext() throws IllegalThreadStateException {
-+    RemoteRISCV64ThreadContext context = new RemoteRISCV64ThreadContext(debugger);
-+    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
-+                                  debugger.getThreadIntegerRegisterSet(id);
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size of register set must match");
-+    }
-+    for (int i = 0; i < regs.length; i++) {
-+      context.setRegister(i, regs[i]);
-+    }
-+    return context;
++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(true);
++  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
 +  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
-new file mode 100644
-index 000000000..1e8cd19b2
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
-@@ -0,0 +1,48 @@
-+/*
-+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.debugger.remote.riscv64;
 +
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.debugger.remote.*;
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc = NULL;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
 +
-+public class RemoteRISCV64ThreadContext extends RISCV64ThreadContext {
-+  private RemoteDebuggerClient debugger;
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  assert_cond(masm != NULL);
 +
-+  public RemoteRISCV64ThreadContext(RemoteDebuggerClient debugger) {
-+    super();
-+    this.debugger = debugger;
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    __ la(t2, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
++    __ add_memory_int64(Address(t2), 1);
 +  }
++#endif
 +
-+  public void setRegisterAsAddress(int index, Address value) {
-+    setRegister(index, debugger.getAddressValue(value));
-+  }
++  // get receiver (need to skip return address on top of stack)
++  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
 +
-+  public Address getRegisterAsAddress(int index) {
-+    return debugger.newAddress(getRegister(index));
++  // get receiver klass
++  address npe_addr = __ pc();
++  __ load_klass(t2, j_rarg0);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    start_pc = __ pc();
++
++    // check offset vs vtable length
++    __ lwu(t0, Address(t2, Klass::vtable_length_offset()));
++    __ mvw(t1, vtable_index * vtableEntry::size());
++    __ bgt(t0, t1, L);
++    __ enter();
++    __ mv(x12, vtable_index);
++
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, x12);
++    const ptrdiff_t estimate = 256;
++    const ptrdiff_t codesize = __ pc() - start_pc;
++    slop_delta = estimate - codesize;  // call_VM varies in length, depending on data
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
++
++    __ leave();
++    __ bind(L);
++  }
++#endif // PRODUCT
++
++  start_pc = __ pc();
++  __ lookup_virtual_method(t2, vtable_index, xmethod);
++  // lookup_virtual_method generates
++  // 4 instructions (maximum value encountered in normal case):li(lui + addiw) + add + ld
++  // 1 instruction (best case):ld * 1
++  slop_delta = 16 - (int)(__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    __ beqz(xmethod, L);
++    __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
++    __ bnez(t0, L);
++    __ stop("Vtable entry is NULL");
++    __ bind(L);
 +  }
++#endif // PRODUCT
++
++  // x10: receiver klass
++  // xmethod: Method*
++  // x12: receiver
++  address ame_addr = __ pc();
++  __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
++  __ jr(t0);
++
++  masm->flush();
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
++
++  return s;
 +}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
++
++VtableStub* VtableStubs::create_itable_stub(int itable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(false);
++  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc = NULL;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  assert_cond(masm != NULL);
++
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    __ la(x18, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
++    __ add_memory_int64(Address(x18), 1);
++  }
++#endif
++
++  // get receiver (need to skip return address on top of stack)
++  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
++
++  // Entry arguments:
++  //  t2: CompiledICHolder
++  //  j_rarg0: Receiver
++
++  // This stub is called from compiled code which has no callee-saved registers,
++  // so all registers except arguments are free at this point.
++  const Register recv_klass_reg     = x18;
++  const Register holder_klass_reg   = x19; // declaring interface klass (DECC)
++  const Register resolved_klass_reg = xmethod; // resolved interface klass (REFC)
++  const Register temp_reg           = x28;
++  const Register temp_reg2          = x29;
++  const Register icholder_reg       = t1;
++
++  Label L_no_such_interface;
++
++  __ ld(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
++  __ ld(holder_klass_reg,   Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
++
++  start_pc = __ pc();
++
++  // get receiver klass (also an implicit null-check)
++  address npe_addr = __ pc();
++  __ load_klass(recv_klass_reg, j_rarg0);
++
++  // Receiver subtype check against REFC.
++  __ lookup_interface_method(// inputs: rec. class, interface
++                             recv_klass_reg, resolved_klass_reg, noreg,
++                             // outputs:  scan temp. reg1, scan temp. reg2
++                             temp_reg2, temp_reg,
++                             L_no_such_interface,
++                             /*return_method=*/false);
++
++  const ptrdiff_t typecheckSize = __ pc() - start_pc;
++  start_pc = __ pc();
++
++  // Get selected method from declaring class and itable index
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             recv_klass_reg, holder_klass_reg, itable_index,
++                             // outputs: method, scan temp. reg
++                             xmethod, temp_reg,
++                             L_no_such_interface);
++
++  const ptrdiff_t lookupSize = __ pc() - start_pc;
++
++  // Reduce "estimate" such that "padding" does not drop below 8.
++  const ptrdiff_t estimate = 256;
++  const ptrdiff_t codesize = typecheckSize + lookupSize;
++  slop_delta = (int)(estimate - codesize);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
++
++#ifdef ASSERT
++  if (DebugVtables) {
++    Label L2;
++    __ beqz(xmethod, L2);
++    __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
++    __ bnez(t0, L2);
++    __ stop("compiler entrypoint is null");
++    __ bind(L2);
++  }
++#endif // ASSERT
++
++  // xmethod: Method*
++  // j_rarg0: receiver
++  address ame_addr = __ pc();
++  __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
++  __ jr(t0);
++
++  __ bind(L_no_such_interface);
++  // Handle IncompatibleClassChangeError in itable stubs.
++  // More detailed error message.
++  // We force resolving of the call site by jumping to the "handle
++  // wrong method" stub, and so let the interpreter runtime do all the
++  // dirty work.
++  assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order");
++  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
++
++  masm->flush();
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
++
++  return s;
++}
++
++int VtableStub::pd_code_alignment() {
++  // RISCV cache line size is not an architected constant. We just align on word size.
++  const unsigned int icache_line_size = wordSize;
++  return icache_line_size;
++}
+diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+index 897be2209e2..ee298f56653 100644
+--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
++++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+@@ -1,6 +1,6 @@
+ /*
+- * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2016, 2019, SAP SE. All rights reserved.
++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2016, 2019 SAP SE. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -1447,7 +1447,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op
+ }
+ 
+ // result = condition ? opr1 : opr2
+-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
++                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
++  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on s390");
++
+   Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual;
+   switch (condition) {
+     case lir_cond_equal:        acond = Assembler::bcondEqual;    ncond = Assembler::bcondNotEqual; break;
+diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+index cee3140f4f7..82e9de5a06f 100644
+--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
++++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -1970,7 +1970,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
+   }
+ }
+ 
+-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
++                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
++  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on x86");
++
+   Assembler::Condition acond, ncond;
+   switch (condition) {
+     case lir_cond_equal:        acond = Assembler::equal;        ncond = Assembler::notEqual;     break;
+diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
+index 3799adf5dd9..6f75e623a9a 100644
+--- a/src/hotspot/os/linux/os_linux.cpp
++++ b/src/hotspot/os/linux/os_linux.cpp
+@@ -2845,6 +2845,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) {
+   strncpy(cpuinfo, "IA64", length);
+ #elif defined(PPC)
+   strncpy(cpuinfo, "PPC64", length);
++#elif defined(RISCV)
++  strncpy(cpuinfo, "RISCV64", length);
+ #elif defined(S390)
+   strncpy(cpuinfo, "S390", length);
+ #elif defined(SPARC)
+diff --git a/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
 new file mode 100644
-index 000000000..eecb6e029
+index 00000000000..f2610af6cdd
 --- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
-@@ -0,0 +1,46 @@
++++ b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
+@@ -0,0 +1,26 @@
 +/*
-+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -58756,36 +56734,16 @@ index 000000000..eecb6e029
 + *
 + */
 +
-+package sun.jvm.hotspot.debugger.remote.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.remote.*;
-+
-+public class RemoteRISCV64ThreadFactory implements RemoteThreadFactory {
-+  private RemoteDebuggerClient debugger;
-+
-+  public RemoteRISCV64ThreadFactory(RemoteDebuggerClient debugger) {
-+    this.debugger = debugger;
-+  }
-+
-+  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
-+    return new RemoteRISCV64Thread(debugger, threadIdentifierAddr);
-+  }
-+
-+  public ThreadProxy createThreadWrapper(long id) {
-+    return new RemoteRISCV64Thread(debugger, id);
-+  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java
++// nothing required here
+diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
 new file mode 100644
-index 000000000..426ff0580
+index 00000000000..761da5d743e
 --- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java
-@@ -0,0 +1,172 @@
++++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
+@@ -0,0 +1,134 @@
 +/*
-+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -58808,183 +56766,124 @@ index 000000000..426ff0580
 + *
 + */
 +
-+package sun.jvm.hotspot.debugger.riscv64;
-+
-+import java.lang.annotation.Native;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.cdbg.*;
-+
-+/** Specifies the thread context on riscv64 platforms; only a sub-portion
-+ * of the context is guaranteed to be present on all operating
-+ * systems. */
-+
-+public abstract class RISCV64ThreadContext implements ThreadContext {
-+    // Taken from /usr/include/asm/sigcontext.h on Linux/RISCV64.
++#ifndef OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
 +
-+    //  /*
-+    //   * Signal context structure - contains all info to do with the state
-+    //   * before the signal handler was invoked.
-+    //   */
-+    // struct sigcontext {
-+    //   struct user_regs_struct sc_regs;
-+    //   union __riscv_fp_state sc_fpregs; 
-+    // };
-+    // 
-+    // struct user_regs_struct {
-+    //    unsigned long pc;
-+    //    unsigned long ra;
-+    //    unsigned long sp;
-+    //    unsigned long gp;
-+    //    unsigned long tp;
-+    //    unsigned long t0;
-+    //    unsigned long t1;
-+    //    unsigned long t2;
-+    //    unsigned long s0;
-+    //    unsigned long s1;
-+    //    unsigned long a0;
-+    //    unsigned long a1;
-+    //    unsigned long a2;
-+    //    unsigned long a3;
-+    //    unsigned long a4;
-+    //    unsigned long a5;
-+    //    unsigned long a6;
-+    //    unsigned long a7;
-+    //    unsigned long s2;
-+    //    unsigned long s3;
-+    //    unsigned long s4;
-+    //    unsigned long s5;
-+    //    unsigned long s6;
-+    //    unsigned long s7;
-+    //    unsigned long s8;
-+    //    unsigned long s9;
-+    //    unsigned long s10;
-+    //    unsigned long s11;
-+    //    unsigned long t3;
-+    //    unsigned long t4;
-+    //    unsigned long t5;
-+    //    unsigned long t6;
-+    // };
++#include "runtime/vm_version.hpp"
 +
-+    // NOTE: the indices for the various registers must be maintained as
-+    // listed across various operating systems. However, only a small
-+    // subset of the registers' values are guaranteed to be present (and
-+    // must be present for the SA's stack walking to work)
++// Implementation of class atomic
 +
-+    // One instance of the Native annotation is enough to trigger header generation
-+    // for this file.
-+    @Native
-+    public static final int R0 = 0;
-+    public static final int R1 = 1;
-+    public static final int R2 = 2;
-+    public static final int R3 = 3;
-+    public static final int R4 = 4;
-+    public static final int R5 = 5;
-+    public static final int R6 = 6;
-+    public static final int R7 = 7;
-+    public static final int R8 = 8;
-+    public static final int R9 = 9;
-+    public static final int R10 = 10;
-+    public static final int R11 = 11;
-+    public static final int R12 = 12;
-+    public static final int R13 = 13;
-+    public static final int R14 = 14;
-+    public static final int R15 = 15;
-+    public static final int R16 = 16;
-+    public static final int R17 = 17;
-+    public static final int R18 = 18;
-+    public static final int R19 = 19;
-+    public static final int R20 = 20;
-+    public static final int R21 = 21;
-+    public static final int R22 = 22;
-+    public static final int R23 = 23;
-+    public static final int R24 = 24;
-+    public static final int R25 = 25;
-+    public static final int R26 = 26;
-+    public static final int R27 = 27;
-+    public static final int R28 = 28;
-+    public static final int R29 = 29;
-+    public static final int R30 = 30;
-+    public static final int R31 = 31;
++// Note that memory_order_conservative requires a full barrier after atomic stores.
++// See https://patchwork.kernel.org/patch/3575821/
 +
-+    public static final int NPRGREG = 32;
++template<size_t byte_size>
++struct Atomic::PlatformAdd {
++  template<typename D, typename I>
++  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const {
++    D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
++    FULL_MEM_BARRIER;
++    return res;
++  }
 +
-+    public static final int PC = R0;
-+    public static final int LR = R1;
-+    public static final int SP = R2;
-+    public static final int FP = R8;
++  template<typename D, typename I>
++  D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const {
++    return add_and_fetch(dest, add_value, order) - add_value;
++  }
++};
 +
-+    private long[] data;
++template<size_t byte_size>
++template<typename T>
++inline T Atomic::PlatformXchg<byte_size>::operator()(T volatile* dest,
++                                                     T exchange_value,
++                                                     atomic_memory_order order) const {
++  STATIC_ASSERT(byte_size == sizeof(T));
++  T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
++  FULL_MEM_BARRIER;
++  return res;
++}
 +
-+    public RISCV64ThreadContext() {
-+        data = new long[NPRGREG];
-+    }
++// __attribute__((unused)) on dest is to get rid of spurious GCC warnings.
++template<size_t byte_size>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T volatile* dest __attribute__((unused)),
++                                                        T compare_value,
++                                                        T exchange_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(byte_size == sizeof(T));
++  T value = compare_value;
++  if (order != memory_order_relaxed) {
++    FULL_MEM_BARRIER;
++  }
 +
-+    public int getNumRegisters() {
-+        return NPRGREG;
-+    }
++  __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */ false,
++                            __ATOMIC_RELAXED, __ATOMIC_RELAXED);
 +
-+    public String getRegisterName(int index) {
-+        switch (index) {
-+        case LR: return "lr";
-+        case SP: return "sp";
-+        case PC: return "pc";
-+        default:
-+            return "r" + index;
-+        }
-+    }
++  if (order != memory_order_relaxed) {
++    FULL_MEM_BARRIER;
++  }
++  return value;
++}
 +
-+    public void setRegister(int index, long value) {
-+        data[index] = value;
-+    }
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((unused)),
++                                                T compare_value,
++                                                T exchange_value,
++                                                atomic_memory_order order) const {
++  STATIC_ASSERT(4 == sizeof(T));
++  if (order != memory_order_relaxed) {
++    FULL_MEM_BARRIER;
++  }
++  T rv;
++  int tmp;
++  __asm volatile(
++    "1:\n\t"
++    " addiw     %[tmp], %[cv], 0\n\t" // make sure compare_value signed_extend
++    " lr.w.aq   %[rv], (%[dest])\n\t"
++    " bne       %[rv], %[tmp], 2f\n\t"
++    " sc.w.rl   %[tmp], %[ev], (%[dest])\n\t"
++    " bnez      %[tmp], 1b\n\t"
++    "2:\n\t"
++    : [rv] "=&r" (rv), [tmp] "=&r" (tmp)
++    : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value)
++    : "memory");
++  if (order != memory_order_relaxed) {
++    FULL_MEM_BARRIER;
++  }
++  return rv;
++}
 +
-+    public long getRegister(int index) {
-+        return data[index];
-+    }
++template<size_t byte_size>
++struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE>
++{
++  template <typename T>
++  T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; }
++};
 +
-+    public CFrame getTopFrame(Debugger dbg) {
-+        return null;
-+    }
++template<size_t byte_size>
++struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X>
++{
++  template <typename T>
++  void operator()(volatile T* p, T v) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); }
++};
 +
-+    /** This can't be implemented in this class since we would have to
-+     * tie the implementation to, for example, the debugging system */
-+    public abstract void setRegisterAsAddress(int index, Address value);
++template<size_t byte_size>
++struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
++{
++  template <typename T>
++  void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); }
++};
 +
-+    /** This can't be implemented in this class since we would have to
-+     * tie the implementation to, for example, the debugging system */
-+    public abstract Address getRegisterAsAddress(int index);
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
-index 190062785..74bd614d3 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
-@@ -38,6 +38,7 @@ import sun.jvm.hotspot.runtime.win32_aarch64.Win32AARCH64JavaThreadPDAccess;
- import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess;
- import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
- import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess;
-+import sun.jvm.hotspot.runtime.linux_riscv64.LinuxRISCV64JavaThreadPDAccess;
- import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess;
- import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess;
- import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess;
-@@ -99,6 +100,8 @@ public class Threads {
-                 access = new LinuxPPC64JavaThreadPDAccess();
-             } else if (cpu.equals("aarch64")) {
-                 access = new LinuxAARCH64JavaThreadPDAccess();
-+            } else if (cpu.equals("riscv64")) {
-+                access = new LinuxRISCV64JavaThreadPDAccess();
-             } else {
-               try {
-                 access = (JavaThreadPDAccess)
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
++#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
 new file mode 100644
-index 000000000..2df0837b6
+index 00000000000..28868c76406
 --- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
-@@ -0,0 +1,132 @@
++++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
+@@ -0,0 +1,45 @@
 +/*
-+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -59007,122 +56906,120 @@ index 000000000..2df0837b6
 + *
 + */
 +
-+package sun.jvm.hotspot.runtime.linux_riscv64;
++#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
 +
-+import java.io.*;
-+import java.util.*;
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.runtime.*;
-+import sun.jvm.hotspot.runtime.riscv64.*;
-+import sun.jvm.hotspot.types.*;
-+import sun.jvm.hotspot.utilities.*;
++#include <byteswap.h>
 +
-+public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess {
-+  private static AddressField  lastJavaFPField;
-+  private static AddressField  osThreadField;
++// Efficient swapping of data bytes from Java byte
++// ordering to native byte ordering and vice versa.
++inline u2   Bytes::swap_u2(u2 x) {
++  return bswap_16(x);
++}
 +
-+  // Field from OSThread
-+  private static CIntegerField osThreadThreadIDField;
++inline u4   Bytes::swap_u4(u4 x) {
++  return bswap_32(x);
++}
 +
-+  // This is currently unneeded but is being kept in case we change
-+  // the currentFrameGuess algorithm
-+  private static final long GUESS_SCAN_RANGE = 128 * 1024;
-+
-+  static {
-+    VM.registerVMInitializedObserver(new Observer() {
-+        public void update(Observable o, Object data) {
-+          initialize(VM.getVM().getTypeDataBase());
-+        }
-+      });
-+  }
-+
-+  private static synchronized void initialize(TypeDataBase db) {
-+    Type type = db.lookupType("JavaThread");
-+    osThreadField           = type.getAddressField("_osthread");
-+
-+    Type anchorType = db.lookupType("JavaFrameAnchor");
-+    lastJavaFPField         = anchorType.getAddressField("_last_Java_fp");
-+
-+    Type osThreadType = db.lookupType("OSThread");
-+    osThreadThreadIDField   = osThreadType.getCIntegerField("_thread_id");
-+  }
-+
-+  public Address getLastJavaFP(Address addr) {
-+    return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
-+  }
-+
-+  public Address getLastJavaPC(Address addr) {
-+    return null;
-+  }
++inline u8 Bytes::swap_u8(u8 x) {
++  return bswap_64(x);
++}
 +
-+  public Address getBaseOfStackPointer(Address addr) {
-+    return null;
-+  }
++#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
+new file mode 100644
+index 00000000000..147cfdf3c10
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  public Frame getLastFramePD(JavaThread thread, Address addr) {
-+    Address fp = thread.getLastJavaFP();
-+    if (fp == null) {
-+      return null; // no information
-+    }
-+    return new RISCV64Frame(thread.getLastJavaSP(), fp);
-+  }
++#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
 +
-+  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
-+    return new RISCV64RegisterMap(thread, updateMap);
-+  }
++// Empty for build system
 +
-+  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
-+    ThreadProxy t = getThreadProxy(addr);
-+    RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext();
-+    RISCV64CurrentFrameGuess guesser = new RISCV64CurrentFrameGuess(context, thread);
-+    if (!guesser.run(GUESS_SCAN_RANGE)) {
-+      return null;
-+    }
-+    if (guesser.getPC() == null) {
-+      return new RISCV64Frame(guesser.getSP(), guesser.getFP());
-+    } else {
-+      return new RISCV64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
-+    }
-+  }
++#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
+new file mode 100644
+index 00000000000..1aa58f27871
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  public void printThreadIDOn(Address addr, PrintStream tty) {
-+    tty.print(getThreadProxy(addr));
-+  }
++#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
 +
-+  public void printInfoOn(Address threadAddr, PrintStream tty) {
-+    tty.print("Thread id: ");
-+    printThreadIDOn(threadAddr, tty);
-+  }
++#include <sys/syscall.h>
 +
-+  public Address getLastSP(Address addr) {
-+    ThreadProxy t = getThreadProxy(addr);
-+    RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext();
-+    return context.getRegisterAsAddress(RISCV64ThreadContext.SP);
-+  }
++//
++// Support for building on older Linux systems
++//
 +
-+  public ThreadProxy getThreadProxy(Address addr) {
-+    // Addr is the address of the JavaThread.
-+    // Fetch the OSThread (for now and for simplicity, not making a
-+    // separate "OSThread" class in this package)
-+    Address osThreadAddr = osThreadField.getValue(addr);
-+    // Get the address of the _thread_id from the OSThread
-+    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
++#ifndef SYS_memfd_create
++#define SYS_memfd_create     279
++#endif
++#ifndef SYS_fallocate
++#define SYS_fallocate        47
++#endif
 +
-+    JVMDebugger debugger = VM.getVM().getDebugger();
-+    return debugger.getThreadForIdentifierAddress(threadIdAddr);
-+  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
++#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
 new file mode 100644
-index 000000000..a3bbf1ad1
+index 00000000000..297414bfcd5
 --- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
-@@ -0,0 +1,223 @@
++++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
+@@ -0,0 +1,43 @@
 +/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2019, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -59145,213 +57042,102 @@ index 000000000..a3bbf1ad1
 + *
 + */
 +
-+package sun.jvm.hotspot.runtime.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.code.*;
-+import sun.jvm.hotspot.interpreter.*;
-+import sun.jvm.hotspot.runtime.*;
-+import sun.jvm.hotspot.runtime.riscv64.*;
-+
-+/** <P> Should be able to be used on all riscv64 platforms we support
-+    (Linux/riscv64) to implement JavaThread's "currentFrameGuess()"
-+    functionality. Input is an RISCV64ThreadContext; output is SP, FP,
-+    and PC for an RISCV64Frame. Instantiation of the RISCV64Frame is
-+    left to the caller, since we may need to subclass RISCV64Frame to
-+    support signal handler frames on Unix platforms. </P>
-+
-+    <P> Algorithm is to walk up the stack within a given range (say,
-+    512K at most) looking for a plausible PC and SP for a Java frame,
-+    also considering those coming in from the context. If we find a PC
-+    that belongs to the VM (i.e., in generated code like the
-+    interpreter or CodeCache) then we try to find an associated FP.
-+    We repeat this until we either find a complete frame or run out of
-+    stack to look at. </P> */
-+
-+public class RISCV64CurrentFrameGuess {
-+  private RISCV64ThreadContext context;
-+  private JavaThread       thread;
-+  private Address          spFound;
-+  private Address          fpFound;
-+  private Address          pcFound;
++#ifndef OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
 +
-+  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.riscv64.RISCV64Frame.DEBUG")
-+                                       != null;
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
 +
-+  public RISCV64CurrentFrameGuess(RISCV64ThreadContext context,
-+                              JavaThread thread) {
-+    this.context = context;
-+    this.thread  = thread;
-+  }
++define_pd_global(bool,  DontYieldALot,            false);
++define_pd_global(intx,  ThreadStackSize,          2048); // 0 => use system default
++define_pd_global(intx,  VMThreadStackSize,        2048);
 +
-+  /** Returns false if not able to find a frame within a reasonable range. */
-+  public boolean run(long regionInBytesToSearch) {
-+    Address sp  = context.getRegisterAsAddress(RISCV64ThreadContext.SP);
-+    Address pc  = context.getRegisterAsAddress(RISCV64ThreadContext.PC);
-+    Address fp  = context.getRegisterAsAddress(RISCV64ThreadContext.FP);
-+    if (sp == null) {
-+      // Bail out if no last java frame either
-+      if (thread.getLastJavaSP() != null) {
-+        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
-+        return true;
-+      }
-+      return false;
-+    }
-+    Address end = sp.addOffsetTo(regionInBytesToSearch);
-+    VM vm       = VM.getVM();
++define_pd_global(intx,  CompilerThreadStackSize,  2048);
 +
-+    setValues(null, null, null); // Assume we're not going to find anything
++define_pd_global(uintx, JVMInvokeMethodSlack,     8192);
 +
-+    if (vm.isJavaPCDbg(pc)) {
-+      if (vm.isClientCompiler()) {
-+        // If the topmost frame is a Java frame, we are (pretty much)
-+        // guaranteed to have a viable FP. We should be more robust
-+        // than this (we have the potential for losing entire threads'
-+        // stack traces) but need to see how much work we really have
-+        // to do here. Searching the stack for an (SP, FP) pair is
-+        // hard since it's easy to misinterpret inter-frame stack
-+        // pointers as base-of-frame pointers; we also don't know the
-+        // sizes of C1 frames (not registered in the nmethod) so can't
-+        // derive them from SP.
++// Used on 64 bit platforms for UseCompressedOops base address
++define_pd_global(uintx, HeapBaseMinAddress,       2 * G);
 +
-+        setValues(sp, fp, pc);
-+        return true;
-+      } else {
-+        if (vm.getInterpreter().contains(pc)) {
-+          if (DEBUG) {
-+            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
-+                               sp + ", fp = " + fp + ", pc = " + pc);
-+          }
-+          setValues(sp, fp, pc);
-+          return true;
-+        }
++#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
+new file mode 100644
+index 00000000000..1c33dc1e87f
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
+@@ -0,0 +1,63 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+        // For the server compiler, FP is not guaranteed to be valid
-+        // for compiled code. In addition, an earlier attempt at a
-+        // non-searching algorithm (see below) failed because the
-+        // stack pointer from the thread context was pointing
-+        // (considerably) beyond the ostensible end of the stack, into
-+        // garbage; walking from the topmost frame back caused a crash.
-+        //
-+        // This algorithm takes the current PC as a given and tries to
-+        // find the correct corresponding SP by walking up the stack
-+        // and repeatedly performing stackwalks (very inefficient).
-+        //
-+        // FIXME: there is something wrong with stackwalking across
-+        // adapter frames...this is likely to be the root cause of the
-+        // failure with the simpler algorithm below.
++#ifndef OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
 +
-+        for (long offset = 0;
-+             offset < regionInBytesToSearch;
-+             offset += vm.getAddressSize()) {
-+          try {
-+            Address curSP = sp.addOffsetTo(offset);
-+            Frame frame = new RISCV64Frame(curSP, null, pc);
-+            RegisterMap map = thread.newRegisterMap(false);
-+            while (frame != null) {
-+              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
-+                // We were able to traverse all the way to the
-+                // bottommost Java frame.
-+                // This sp looks good. Keep it.
-+                if (DEBUG) {
-+                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
-+                }
-+                setValues(curSP, null, pc);
-+                return true;
-+              }
-+              frame = frame.sender(map);
-+            }
-+          } catch (Exception e) {
-+            if (DEBUG) {
-+              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
-+            }
-+            // Bad SP. Try another.
-+          }
-+        }
++// Included in orderAccess.hpp header file.
 +
-+        // Were not able to find a plausible SP to go with this PC.
-+        // Bail out.
-+        return false;
-+      }
-+    } else {
-+      // If the current program counter was not known to us as a Java
-+      // PC, we currently assume that we are in the run-time system
-+      // and attempt to look to thread-local storage for saved SP and
-+      // FP. Note that if these are null (because we were, in fact,
-+      // in Java code, i.e., vtable stubs or similar, and the SA
-+      // didn't have enough insight into the target VM to understand
-+      // that) then we are going to lose the entire stack trace for
-+      // the thread, which is sub-optimal. FIXME.
++#include "runtime/vm_version.hpp"
 +
-+      if (DEBUG) {
-+        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
-+                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
-+      }
-+      if (thread.getLastJavaSP() == null) {
-+        return false; // No known Java frames on stack
-+      }
++// Implementation of class OrderAccess.
 +
-+      // The runtime has a nasty habit of not saving fp in the frame
-+      // anchor, leaving us to grovel about in the stack to find a
-+      // plausible address.  Fortunately, this only happens in
-+      // compiled code; there we always have a valid PC, and we always
-+      // push LR and FP onto the stack as a pair, with FP at the lower
-+      // address.
-+      pc = thread.getLastJavaPC();
-+      fp = thread.getLastJavaFP();
-+      sp = thread.getLastJavaSP();
++inline void OrderAccess::loadload()   { acquire(); }
++inline void OrderAccess::storestore() { release(); }
++inline void OrderAccess::loadstore()  { acquire(); }
++inline void OrderAccess::storeload()  { fence(); }
 +
-+      if (fp == null) {
-+        CodeCache cc = vm.getCodeCache();
-+        if (cc.contains(pc)) {
-+          CodeBlob cb = cc.findBlob(pc);
-+          if (DEBUG) {
-+            System.out.println("FP is null.  Found blob frame size " + cb.getFrameSize());
-+          }
-+          // See if we can derive a frame pointer from SP and PC
-+          long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize();
-+          if (link_offset >= 0) {
-+            fp = sp.addOffsetTo(link_offset);
-+          }
-+        }
-+      }
++#define FULL_MEM_BARRIER  __sync_synchronize()
++#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
++#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
 +
-+      // We found a PC in the frame anchor. Check that it's plausible, and
-+      // if it is, use it.
-+      if (vm.isJavaPCDbg(pc)) {
-+        setValues(sp, fp, pc);
-+      } else {
-+        setValues(sp, fp, null);
-+      }
++inline void OrderAccess::acquire() {
++  READ_MEM_BARRIER;
++}
 +
-+      return true;
-+    }
-+  }
++inline void OrderAccess::release() {
++  WRITE_MEM_BARRIER;
++}
 +
-+  public Address getSP() { return spFound; }
-+  public Address getFP() { return fpFound; }
-+  /** May be null if getting values from thread-local storage; take
-+      care to call the correct RISCV64Frame constructor to recover this if
-+      necessary */
-+  public Address getPC() { return pcFound; }
++inline void OrderAccess::fence() {
++  FULL_MEM_BARRIER;
++}
 +
-+  private void setValues(Address sp, Address fp, Address pc) {
-+    spFound = sp;
-+    fpFound = fp;
-+    pcFound = pc;
++inline void OrderAccess::cross_modify_fence_impl() {
++  asm volatile("fence.i" : : : "memory");
++  if (UseConservativeFence) {
++    asm volatile("fence ir, ir" : : : "memory");
 +  }
 +}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
++
++#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
 new file mode 100644
-index 000000000..c04def5a1
+index 00000000000..1f46bbab0a2
 --- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
-@@ -0,0 +1,554 @@
++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
+@@ -0,0 +1,466 @@
 +/*
-+ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2019, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -59374,544 +57160,23387 @@ index 000000000..c04def5a1
 + *
 + */
 +
-+package sun.jvm.hotspot.runtime.riscv64;
++// no precompiled headers
++#include "asm/macroAssembler.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "code/codeCache.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nativeInst.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "jvm.h"
++#include "memory/allocation.inline.hpp"
++#include "os_share_linux.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/java.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/osThread.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/timer.hpp"
++#include "signals_posix.hpp"
++#include "utilities/debug.hpp"
++#include "utilities/events.hpp"
++#include "utilities/vmError.hpp"
 +
-+import java.util.*;
-+import sun.jvm.hotspot.code.*;
-+import sun.jvm.hotspot.compiler.*;
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.oops.*;
-+import sun.jvm.hotspot.runtime.*;
-+import sun.jvm.hotspot.types.*;
-+import sun.jvm.hotspot.utilities.*;
++// put OS-includes here
++# include <dlfcn.h>
++# include <fpu_control.h>
++# include <errno.h>
++# include <pthread.h>
++# include <signal.h>
++# include <stdio.h>
++# include <stdlib.h>
++# include <sys/mman.h>
++# include <sys/resource.h>
++# include <sys/socket.h>
++# include <sys/stat.h>
++# include <sys/time.h>
++# include <sys/types.h>
++# include <sys/utsname.h>
++# include <sys/wait.h>
++# include <poll.h>
++# include <pwd.h>
++# include <ucontext.h>
++# include <unistd.h>
 +
-+/** Specialization of and implementation of abstract methods of the
-+    Frame class for the riscv64 family of CPUs. */
++#define REG_LR       1
++#define REG_FP       8
 +
-+public class RISCV64Frame extends Frame {
-+  private static final boolean DEBUG;
-+  static {
-+    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.RISCV64.RISCV64Frame.DEBUG") != null;
-+  }
++NOINLINE address os::current_stack_pointer() {
++  return (address)__builtin_frame_address(0);
++}
 +
-+  // Java frames
-+  private static final int LINK_OFFSET                =  -2;
-+  private static final int RETURN_ADDR_OFFSET         =  -1;
-+  private static final int SENDER_SP_OFFSET           =   0;
++char* os::non_memory_address_word() {
++  // Must never look like an address returned by reserve_memory,
++  return (char*) -1;
++}
 +
-+  // Interpreter frames
-+  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -3;
-+  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
-+  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
-+  private static       int INTERPRETER_FRAME_MDX_OFFSET;         // Non-core builds only
-+  private static       int INTERPRETER_FRAME_PADDING_OFFSET;
-+  private static       int INTERPRETER_FRAME_MIRROR_OFFSET;
-+  private static       int INTERPRETER_FRAME_CACHE_OFFSET;
-+  private static       int INTERPRETER_FRAME_LOCALS_OFFSET;
-+  private static       int INTERPRETER_FRAME_BCX_OFFSET;
-+  private static       int INTERPRETER_FRAME_INITIAL_SP_OFFSET;
-+  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET;
-+  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
++address os::Posix::ucontext_get_pc(const ucontext_t * uc) {
++  return (address)uc->uc_mcontext.__gregs[REG_PC];
++}
 +
-+  // Entry frames
-+  private static       int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -10;
++void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) {
++  uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc;
++}
 +
-+  // Native frames
-+  private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET =  2;
++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP];
++}
 +
-+  private static VMReg fp = new VMReg(8);
++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
++}
 +
-+  static {
-+    VM.registerVMInitializedObserver(new Observer() {
-+        public void update(Observable o, Object data) {
-+          initialize(VM.getVM().getTypeDataBase());
-+        }
-+      });
-+  }
++address os::fetch_frame_from_context(const void* ucVoid,
++                                     intptr_t** ret_sp, intptr_t** ret_fp) {
++  address epc;
++  const ucontext_t* uc = (const ucontext_t*)ucVoid;
 +
-+  private static synchronized void initialize(TypeDataBase db) {
-+    INTERPRETER_FRAME_MDX_OFFSET                  = INTERPRETER_FRAME_METHOD_OFFSET - 1;
-+    INTERPRETER_FRAME_PADDING_OFFSET              = INTERPRETER_FRAME_MDX_OFFSET - 1;
-+    INTERPRETER_FRAME_MIRROR_OFFSET               = INTERPRETER_FRAME_PADDING_OFFSET - 1;
-+    INTERPRETER_FRAME_CACHE_OFFSET                = INTERPRETER_FRAME_MIRROR_OFFSET - 1;
-+    INTERPRETER_FRAME_LOCALS_OFFSET               = INTERPRETER_FRAME_CACHE_OFFSET - 1;
-+    INTERPRETER_FRAME_BCX_OFFSET                  = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
-+    INTERPRETER_FRAME_INITIAL_SP_OFFSET           = INTERPRETER_FRAME_BCX_OFFSET - 1;
-+    INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET    = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
-+    INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
-+  }
-+
-+
-+  // an additional field beyond sp and pc:
-+  Address raw_fp; // frame pointer
-+  private Address raw_unextendedSP;
-+
-+  private RISCV64Frame() {
-+  }
-+
-+  private void adjustForDeopt() {
-+    if ( pc != null) {
-+      // Look for a deopt pc and if it is deopted convert to original pc
-+      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
-+      if (cb != null && cb.isJavaMethod()) {
-+        NMethod nm = (NMethod) cb;
-+        if (pc.equals(nm.deoptHandlerBegin())) {
-+          if (Assert.ASSERTS_ENABLED) {
-+            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
-+          }
-+          // adjust pc if frame is deoptimized.
-+          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
-+          deoptimized = true;
-+        }
-+      }
++  if (uc != NULL) {
++    epc = os::Posix::ucontext_get_pc(uc);
++    if (ret_sp != NULL) {
++      *ret_sp = os::Linux::ucontext_get_sp(uc);
 +    }
-+  }
-+
-+  public RISCV64Frame(Address raw_sp, Address raw_fp, Address pc) {
-+    this.raw_sp = raw_sp;
-+    this.raw_unextendedSP = raw_sp;
-+    this.raw_fp = raw_fp;
-+    this.pc = pc;
-+    adjustUnextendedSP();
-+
-+    // Frame must be fully constructed before this call
-+    adjustForDeopt();
-+
-+    if (DEBUG) {
-+      System.out.println("RISCV64Frame(sp, fp, pc): " + this);
-+      dumpStack();
++    if (ret_fp != NULL) {
++      *ret_fp = os::Linux::ucontext_get_fp(uc);
 +    }
-+  }
-+
-+  public RISCV64Frame(Address raw_sp, Address raw_fp) {
-+    this.raw_sp = raw_sp;
-+    this.raw_unextendedSP = raw_sp;
-+    this.raw_fp = raw_fp;
-+
-+    // We cannot assume SP[-1] always contains a valid return PC (e.g. if
-+    // the callee is a C/C++ compiled frame). If the PC is not known to
-+    // Java then this.pc is null.
-+    Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
-+    if (VM.getVM().isJavaPCDbg(savedPC)) {
-+      this.pc = savedPC;
++  } else {
++    epc = NULL;
++    if (ret_sp != NULL) {
++      *ret_sp = (intptr_t *)NULL;
 +    }
-+
-+    adjustUnextendedSP();
-+
-+    // Frame must be fully constructed before this call
-+    adjustForDeopt();
-+
-+    if (DEBUG) {
-+      System.out.println("RISCV64Frame(sp, fp): " + this);
-+      dumpStack();
++    if (ret_fp != NULL) {
++      *ret_fp = (intptr_t *)NULL;
 +    }
 +  }
 +
-+  public RISCV64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
-+    this.raw_sp = raw_sp;
-+    this.raw_unextendedSP = raw_unextendedSp;
-+    this.raw_fp = raw_fp;
-+    this.pc = pc;
-+    adjustUnextendedSP();
-+
-+    // Frame must be fully constructed before this call
-+    adjustForDeopt();
-+
-+    if (DEBUG) {
-+      System.out.println("RISCV64Frame(sp, unextendedSP, fp, pc): " + this);
-+      dumpStack();
-+    }
++  return epc;
++}
 +
-+  }
++frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
++  const ucontext_t* uc = (const ucontext_t*)ucVoid;
++  // In compiled code, the stack banging is performed before RA
++  // has been saved in the frame. RA is live, and SP and FP
++  // belong to the caller.
++  intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc);
++  intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc);
++  address frame_pc = (address)(uc->uc_mcontext.__gregs[REG_LR]
++                         - NativeInstruction::instruction_size);
++  return frame(frame_sp, frame_fp, frame_pc);
++}
 +
-+  public Object clone() {
-+    RISCV64Frame frame = new RISCV64Frame();
-+    frame.raw_sp = raw_sp;
-+    frame.raw_unextendedSP = raw_unextendedSP;
-+    frame.raw_fp = raw_fp;
-+    frame.pc = pc;
-+    frame.deoptimized = deoptimized;
-+    return frame;
-+  }
++frame os::fetch_frame_from_context(const void* ucVoid) {
++  intptr_t* frame_sp = NULL;
++  intptr_t* frame_fp = NULL;
++  address epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp);
++  return frame(frame_sp, frame_fp, epc);
++}
 +
-+  public boolean equals(Object arg) {
-+    if (arg == null) {
-+      return false;
-+    }
++// By default, gcc always saves frame pointer rfp on this stack. This
++// may get turned off by -fomit-frame-pointer.
++frame os::get_sender_for_C_frame(frame* fr) {
++  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
++}
 +
-+    if (!(arg instanceof RISCV64Frame)) {
-+      return false;
++NOINLINE frame os::current_frame() {
++  intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0);
++  if (sender_sp != NULL) {
++    frame myframe((intptr_t*)os::current_stack_pointer(),
++                  sender_sp[frame::link_offset],
++                  CAST_FROM_FN_PTR(address, os::current_frame));
++    if (os::is_first_C_frame(&myframe)) {
++      // stack is not walkable
++      return frame();
++    } else {
++      return os::get_sender_for_C_frame(&myframe);
 +    }
-+
-+    RISCV64Frame other = (RISCV64Frame) arg;
-+
-+    return (AddressOps.equal(getSP(), other.getSP()) &&
-+            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
-+            AddressOps.equal(getFP(), other.getFP()) &&
-+            AddressOps.equal(getPC(), other.getPC()));
++  } else {
++    ShouldNotReachHere();
++    return frame();
 +  }
++}
 +
-+  public int hashCode() {
-+    if (raw_sp == null) {
-+      return 0;
-+    }
++// Utility functions
++bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
++                                             ucontext_t* uc, JavaThread* thread) {
 +
-+    return raw_sp.hashCode();
-+  }
++  // decide if this trap can be handled by a stub
++  address stub = NULL;
 +
-+  public String toString() {
-+    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
-+         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
-+         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
-+         ", pc: " + (pc == null? "null" : pc.toString());
-+  }
++  address pc = NULL;
 +
-+  // accessors for the instance variables
-+  public Address getFP() { return raw_fp; }
-+  public Address getSP() { return raw_sp; }
-+  public Address getID() { return raw_sp; }
++  //%note os_trap_1
++  if (info != NULL && uc != NULL && thread != NULL) {
++    pc = (address) os::Posix::ucontext_get_pc(uc);
 +
-+  // FIXME: not implemented yet
-+  public boolean isSignalHandlerFrameDbg() { return false; }
-+  public int     getSignalNumberDbg()      { return 0;     }
-+  public String  getSignalNameDbg()        { return null;  }
++    address addr = (address) info->si_addr;
 +
-+  public boolean isInterpretedFrameValid() {
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
++    // Make sure the high order byte is sign extended, as it may be masked away by the hardware.
++    if ((uintptr_t(addr) & (uintptr_t(1) << 55)) != 0) {
++      addr = address(uintptr_t(addr) | (uintptr_t(0xFF) << 56));
 +    }
 +
-+    // These are reasonable sanity checks
-+    if (getFP() == null || getFP().andWithMask(0x3) != null) {
-+      return false;
++    // Handle ALL stack overflow variations here
++    if (sig == SIGSEGV) {
++      // check if fault address is within thread stack
++      if (thread->is_in_full_stack(addr)) {
++        if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
++          return true; // continue
++        }
++      }
 +    }
 +
-+    if (getSP() == null || getSP().andWithMask(0x3) != null) {
-+      return false;
-+    }
++    if (thread->thread_state() == _thread_in_Java) {
++      // Java thread running in Java code => find exception handler if any
++      // a fault inside compiled code, the interpreter, or a stub
 +
-+    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
-+      return false;
++      // Handle signal from NativeJump::patch_verified_entry().
++      if ((sig == SIGILL || sig == SIGTRAP)
++          && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
++        if (TraceTraps) {
++          tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
++        }
++        stub = SharedRuntime::get_handle_wrong_method_stub();
++      } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) {
++        stub = SharedRuntime::get_poll_stub(pc);
++      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
++        // BugId 4454115: A read from a MappedByteBuffer can fault
++        // here if the underlying file has been truncated.
++        // Do not crash the VM in such a case.
++        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
++        CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
++        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
++        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
++          address next_pc = pc + NativeCall::instruction_size;
++          if (is_unsafe_arraycopy) {
++            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
++          }
++          stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++        }
++      } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) {
++        // Pull a pointer to the error message out of the instruction
++        // stream.
++        const uint64_t *detail_msg_ptr
++          = (uint64_t*)(pc + NativeInstruction::instruction_size);
++        const char *detail_msg = (const char *)*detail_msg_ptr;
++        const char *msg = "stop";
++        if (TraceTraps) {
++          tty->print_cr("trap: %s: (SIGILL)", msg);
++        }
++
++        // End life with a fatal error, message and detail message and the context.
++        // Note: no need to do any post-processing here (e.g. signal chaining)
++        va_list va_dummy;
++        VMError::report_and_die(thread, uc, NULL, 0, msg, detail_msg, va_dummy);
++        va_end(va_dummy);
++
++        ShouldNotReachHere();
++      } else if (sig == SIGFPE  &&
++          (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
++        stub =
++          SharedRuntime::
++          continuation_for_implicit_exception(thread,
++                                              pc,
++                                              SharedRuntime::
++                                              IMPLICIT_DIVIDE_BY_ZERO);
++      } else if (sig == SIGSEGV &&
++                 MacroAssembler::uses_implicit_null_check((void*)addr)) {
++          // Determination of interpreter/vtable stub/compiled code null exception
++          stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
++      }
++    } else if ((thread->thread_state() == _thread_in_vm ||
++                thread->thread_state() == _thread_in_native) &&
++                sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
++                thread->doing_unsafe_access()) {
++      address next_pc = pc + NativeCall::instruction_size;
++      if (UnsafeCopyMemory::contains_pc(pc)) {
++        next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
++      }
++      stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
 +    }
 +
-+    // These are hacks to keep us out of trouble.
-+    // The problem with these is that they mask other problems
-+    if (getFP().lessThanOrEqual(getSP())) {
-+      // this attempts to deal with unsigned comparison above
-+      return false;
++    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
++    // and the heap gets shrunk before the field access.
++    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
++      address addr_slow = JNI_FastGetField::find_slowcase_pc(pc);
++      if (addr_slow != (address)-1) {
++        stub = addr_slow;
++      }
 +    }
++  }
 +
-+    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
-+      // stack frames shouldn't be large.
-+      return false;
++  if (stub != NULL) {
++    // save all thread context in case we need to restore it
++    if (thread != NULL) {
++      thread->set_saved_exception_pc(pc);
 +    }
 +
++    os::Posix::ucontext_set_pc(uc, stub);
 +    return true;
 +  }
 +
-+  public Frame sender(RegisterMap regMap, CodeBlob cb) {
-+    RISCV64RegisterMap map = (RISCV64RegisterMap) regMap;
++  return false; // Mute compiler
++}
 +
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(map != null, "map must be set");
-+    }
++void os::Linux::init_thread_fpu_state(void) {
++}
 +
-+    // Default is we done have to follow them. The sender_for_xxx will
-+    // update it accordingly
-+    map.setIncludeArgumentOops(false);
++int os::Linux::get_fpu_control_word(void) {
++  return 0;
++}
 +
-+    if (isEntryFrame())       return senderForEntryFrame(map);
-+    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
++void os::Linux::set_fpu_control_word(int fpu_control) {
++}
 +
-+    if(cb == null) {
-+      cb = VM.getVM().getCodeCache().findBlob(getPC());
-+    } else {
-+      if (Assert.ASSERTS_ENABLED) {
-+        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
-+      }
-+    }
++////////////////////////////////////////////////////////////////////////////////
++// thread stack
 +
-+    if (cb != null) {
-+      return senderForCompiledFrame(map, cb);
-+    }
++// Minimum usable stack sizes required to get to user code. Space for
++// HotSpot guard pages is added later.
++size_t os::Posix::_compiler_thread_min_stack_allowed = 72 * K;
++size_t os::Posix::_java_thread_min_stack_allowed = 72 * K;
++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 72 * K;
 +
-+    // Must be native-compiled frame, i.e. the marshaling code for native
-+    // methods that exists in the core system.
-+    return new RISCV64Frame(getSenderSP(), getLink(), getSenderPC());
-+  }
++// return default stack size for thr_type
++size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
++  // default stack size (compiler thread needs larger stack)
++  size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M);
++  return s;
++}
 +
-+  private Frame senderForEntryFrame(RISCV64RegisterMap map) {
-+    if (DEBUG) {
-+      System.out.println("senderForEntryFrame");
-+    }
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(map != null, "map must be set");
-+    }
-+    // Java frame called from C; skip all C frames and return top C
-+    // frame of that chunk as the sender
-+    RISCV64JavaCallWrapper jcw = (RISCV64JavaCallWrapper) getEntryFrameCallWrapper();
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
-+      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
-+    }
-+    RISCV64Frame fr;
-+    if (jcw.getLastJavaPC() != null) {
-+      fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
-+    } else {
-+      fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
-+    }
-+    map.clear();
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
-+    }
-+    return fr;
-+  }
++/////////////////////////////////////////////////////////////////////////////
++// helper functions for fatal error handler
 +
-+  //------------------------------------------------------------------------------
-+  // frame::adjust_unextended_sp
-+  private void adjustUnextendedSP() {
-+    // If we are returning to a compiled MethodHandle call site, the
-+    // saved_fp will in fact be a saved value of the unextended SP.  The
-+    // simplest way to tell whether we are returning to such a call site
-+    // is as follows:
++static const char* reg_abi_names[] = {
++  "pc",
++  "x1(ra)", "x2(sp)", "x3(gp)", "x4(tp)",
++  "x5(t0)", "x6(t1)", "x7(t2)",
++  "x8(s0)", "x9(s1)",
++  "x10(a0)", "x11(a1)", "x12(a2)", "x13(a3)", "x14(a4)", "x15(a5)", "x16(a6)", "x17(a7)",
++  "x18(s2)", "x19(s3)", "x20(s4)", "x21(s5)", "x22(s6)", "x23(s7)", "x24(s8)", "x25(s9)", "x26(s10)", "x27(s11)",
++  "x28(t3)", "x29(t4)","x30(t5)", "x31(t6)"
++};
 +
-+    CodeBlob cb = cb();
-+    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
-+    if (senderNm != null) {
-+      // If the sender PC is a deoptimization point, get the original
-+      // PC.  For MethodHandle call site the unextended_sp is stored in
-+      // saved_fp.
-+      if (senderNm.isDeoptMhEntry(getPC())) {
-+        raw_unextendedSP = getFP();
-+      }
-+      else if (senderNm.isDeoptEntry(getPC())) {
-+      }
-+      else if (senderNm.isMethodHandleReturn(getPC())) {
-+        raw_unextendedSP = getFP();
-+      }
-+    }
++void os::print_context(outputStream *st, const void *context) {
++  if (context == NULL) {
++    return;
 +  }
 +
-+  private Frame senderForInterpreterFrame(RISCV64RegisterMap map) {
-+    if (DEBUG) {
-+      System.out.println("senderForInterpreterFrame");
-+    }
-+    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
-+    Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
-+    // We do not need to update the callee-save register mapping because above
-+    // us is either another interpreter frame or a converter-frame, but never
-+    // directly a compiled frame.
-+    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
-+    // However c2 no longer uses callee save register for java calls so there
-+    // are no callee register to find.
++  const ucontext_t *uc = (const ucontext_t*)context;
++  st->print_cr("Registers:");
++  for (int r = 0; r < 32; r++) {
++    st->print("%-*.*s=", 8, 8, reg_abi_names[r]);
++    print_location(st, uc->uc_mcontext.__gregs[r]);
++  }
++  st->cr();
 +
-+    if (map.getUpdateMap())
-+      updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
++  intptr_t *frame_sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
++  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(frame_sp));
++  print_hex_dump(st, (address)frame_sp, (address)(frame_sp + 64), sizeof(intptr_t));
++  st->cr();
 +
-+    return new RISCV64Frame(sp, unextendedSP, getLink(), getSenderPC());
-+  }
++  // Note: it may be unsafe to inspect memory near pc. For example, pc may
++  // point to garbage if entry point in an nmethod is corrupted. Leave
++  // this at the end, and hope for the best.
++  address pc = os::Posix::ucontext_get_pc(uc);
++  print_instructions(st, pc, sizeof(char));
++  st->cr();
++}
 +
-+  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
-+    map.setLocation(fp, savedFPAddr);
++void os::print_register_info(outputStream *st, const void *context) {
++  if (context == NULL) {
++    return;
 +  }
 +
-+  private Frame senderForCompiledFrame(RISCV64RegisterMap map, CodeBlob cb) {
-+    if (DEBUG) {
-+      System.out.println("senderForCompiledFrame");
-+    }
-+
-+    //
-+    // NOTE: some of this code is (unfortunately) duplicated  RISCV64CurrentFrameGuess
-+    //
++  const ucontext_t *uc = (const ucontext_t*)context;
 +
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(map != null, "map must be set");
-+    }
++  st->print_cr("Register to memory mapping:");
++  st->cr();
 +
-+    // frame owned by optimizing compiler
-+    if (Assert.ASSERTS_ENABLED) {
-+        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
-+    }
-+    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
++  // this is horrendously verbose but the layout of the registers in the
++  // context does not match how we defined our abstract Register set, so
++  // we can't just iterate through the gregs area
 +
-+    // The return_address is always the word on the stack
-+    Address senderPC = senderSP.getAddressAt(RETURN_ADDR_OFFSET * VM.getVM().getAddressSize());
++  // this is only for the "general purpose" registers
 +
-+    // This is the saved value of FP which may or may not really be an FP.
-+    // It is only an FP if the sender is an interpreter frame.
-+    Address savedFPAddr = senderSP.addOffsetTo(LINK_OFFSET * VM.getVM().getAddressSize());
++  for (int r = 0; r < 32; r++)
++    st->print_cr("%-*.*s=" INTPTR_FORMAT, 8, 8, reg_abi_names[r], (uintptr_t)uc->uc_mcontext.__gregs[r]);
++  st->cr();
++}
 +
-+    if (map.getUpdateMap()) {
-+      // Tell GC to use argument oopmaps for some runtime stubs that need it.
-+      // For C1, the runtime stub might not have oop maps, so set this flag
-+      // outside of update_register_map.
-+      map.setIncludeArgumentOops(cb.callerMustGCArguments());
++void os::setup_fpu() {
++}
 +
-+      if (cb.getOopMaps() != null) {
-+        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
-+      }
++#ifndef PRODUCT
++void os::verify_stack_alignment() {
++  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
++}
++#endif
 +
-+      // Since the prolog does the save and restore of FP there is no oopmap
-+      // for it so we must fill in its location as if there was an oopmap entry
-+      // since if our caller was compiled code there could be live jvm state in it.
-+      updateMapWithSavedLink(map, savedFPAddr);
-+    }
++int os::extra_bang_size_in_bytes() {
++  return 0;
++}
 +
-+    return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
++extern "C" {
++  int SpinPause() {
++    return 0;
 +  }
 +
-+  protected boolean hasSenderPD() {
-+    return true;
++  void _Copy_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
++    if (from > to) {
++      const jshort *end = from + count;
++      while (from < end) {
++        *(to++) = *(from++);
++      }
++    } else if (from < to) {
++      const jshort *end = from;
++      from += count - 1;
++      to   += count - 1;
++      while (from >= end) {
++        *(to--) = *(from--);
++      }
++    }
 +  }
-+
-+  public long frameSize() {
-+    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
++  void _Copy_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
++    if (from > to) {
++      const jint *end = from + count;
++      while (from < end) {
++        *(to++) = *(from++);
++      }
++    } else if (from < to) {
++      const jint *end = from;
++      from += count - 1;
++      to   += count - 1;
++      while (from >= end) {
++        *(to--) = *(from--);
++      }
++    }
 +  }
-+
-+    public Address getLink() {
-+        try {
-+            if (DEBUG) {
-+                System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET)
-+                        + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0));
-+            }
-+            return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
-+        } catch (Exception e) {
-+            if (DEBUG)
-+                System.out.println("Returning null");
-+            return null;
-+        }
++  void _Copy_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
++    if (from > to) {
++      const jlong *end = from + count;
++      while (from < end) {
++        os::atomic_copy64(from++, to++);
++      }
++    } else if (from < to) {
++      const jlong *end = from;
++      from += count - 1;
++      to   += count - 1;
++      while (from >= end) {
++        os::atomic_copy64(from--, to--);
++      }
 +    }
++  }
 +
-+  public Address getUnextendedSP() { return raw_unextendedSP; }
++  void _Copy_arrayof_conjoint_bytes(const HeapWord* from,
++                                    HeapWord* to,
++                                    size_t    count) {
++    memmove(to, from, count);
++  }
++  void _Copy_arrayof_conjoint_jshorts(const HeapWord* from,
++                                      HeapWord* to,
++                                      size_t    count) {
++    memmove(to, from, count * 2);
++  }
++  void _Copy_arrayof_conjoint_jints(const HeapWord* from,
++                                    HeapWord* to,
++                                    size_t    count) {
++    memmove(to, from, count * 4);
++  }
++  void _Copy_arrayof_conjoint_jlongs(const HeapWord* from,
++                                     HeapWord* to,
++                                     size_t    count) {
++    memmove(to, from, count * 8);
++  }
++};
+diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
+new file mode 100644
+index 00000000000..6d415630661
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
++
++  static void setup_fpu();
++
++  // Used to register dynamic code cache area with the OS
++  // Note: Currently only used in 64 bit Windows implementations
++  static bool register_code_area(char *low, char *high) { return true; }
++
++  // Atomically copy 64 bits of data
++  static void atomic_copy64(const volatile void *src, volatile void *dst) {
++    *(jlong *) dst = *(const jlong *) src;
++  }
++
++  // SYSCALL_RISCV_FLUSH_ICACHE is used to flush instruction cache. The "fence.i" instruction
++  // only work on the current hart, so kernel provides the icache flush syscall to flush icache
++  // on each hart. You can pass a flag to determine a global or local icache flush.
++  static void icache_flush(long int start, long int end)
++  {
++    const int SYSCALL_RISCV_FLUSH_ICACHE = 259;
++    register long int __a7 asm ("a7") = SYSCALL_RISCV_FLUSH_ICACHE;
++    register long int __a0 asm ("a0") = start;
++    register long int __a1 asm ("a1") = end;
++    // the flush can be applied to either all threads or only the current.
++    // 0 means a global icache flush, and the icache flush will be applied
++    // to other harts concurrently executing.
++    register long int __a2 asm ("a2") = 0;
++    __asm__ volatile ("ecall\n\t"
++                      : "+r" (__a0)
++                      : "r" (__a0), "r" (__a1), "r" (__a2), "r" (__a7)
++                      : "memory");
++  }
++
++#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
+new file mode 100644
+index 00000000000..a6432c84ec7
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
++#define OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
++
++#include "runtime/prefetch.hpp"
++
++
++inline void Prefetch::read (const void *loc, intx interval) {
++}
++
++inline void Prefetch::write(void *loc, intx interval) {
++}
++
++#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
+new file mode 100644
+index 00000000000..3100572e9fd
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/thread.inline.hpp"
++
++frame JavaThread::pd_last_frame() {
++  assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
++  return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
++// currently interrupted by SIGPROF
++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
++  void* ucontext, bool isInJava) {
++
++  assert(Thread::current() == this, "caller must be current thread");
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
++  // If we have a last_Java_frame, then we should use it even if
++  // isInJava == true.  It should be more reliable than ucontext info.
++  if (has_last_Java_frame() && frame_anchor()->walkable()) {
++    *fr_addr = pd_last_frame();
++    return true;
++  }
++
++  // At this point, we don't have a last_Java_frame, so
++  // we try to glean some information out of the ucontext
++  // if we were running Java code when SIGPROF came in.
++  if (isInJava) {
++    ucontext_t* uc = (ucontext_t*) ucontext;
++
++    intptr_t* ret_fp = NULL;
++    intptr_t* ret_sp = NULL;
++    address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp);
++    if (addr == NULL || ret_sp == NULL ) {
++      // ucontext wasn't useful
++      return false;
++    }
++
++    frame ret_frame(ret_sp, ret_fp, addr);
++    if (!ret_frame.safe_for_sender(this)) {
++#ifdef COMPILER2
++      frame ret_frame2(ret_sp, NULL, addr);
++      if (!ret_frame2.safe_for_sender(this)) {
++        // nothing else to try if the frame isn't good
++        return false;
++      }
++      ret_frame = ret_frame2;
++#else
++      // nothing else to try if the frame isn't good
++      return false;
++#endif /* COMPILER2 */
++    }
++    *fr_addr = ret_frame;
++    return true;
++  }
++
++  // nothing else to try
++  return false;
++}
++
++void JavaThread::cache_global_variables() { }
+diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
+new file mode 100644
+index 00000000000..61e2cf85b63
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
+@@ -0,0 +1,48 @@
++/*
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
++
++ private:
++  void pd_initialize() {
++    _anchor.clear();
++  }
++
++  frame pd_last_frame();
++
++ public:
++  static ByteSize last_Java_fp_offset()          {
++    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
++  }
++
++  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
++    bool isInJava);
++
++  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
++private:
++  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
++
++#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
+new file mode 100644
+index 00000000000..6cf7683a586
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
++
++// These are the OS and CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* Threads (NOTE: incomplete) */                                                                                                   \
++  /******************************/                                                                                                   \
++  nonstatic_field(OSThread,                      _thread_id,                                      OSThread::thread_id_t)             \
++  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
++
++
++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
++                                                                          \
++  /**********************/                                                \
++  /* Posix Thread IDs   */                                                \
++  /**********************/                                                \
++                                                                          \
++  declare_integer_type(OSThread::thread_id_t)                             \
++  declare_unsigned_integer_type(pthread_t)
++
++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
+new file mode 100644
+index 00000000000..4623dbfad42
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
+@@ -0,0 +1,118 @@
++/*
++ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/register.hpp"
++#include "runtime/os.hpp"
++#include "runtime/os.inline.hpp"
++#include "runtime/vm_version.hpp"
++
++#include <asm/hwcap.h>
++#include <sys/auxv.h>
++
++#ifndef HWCAP_ISA_I
++#define HWCAP_ISA_I  (1 << ('I' - 'A'))
++#endif
++
++#ifndef HWCAP_ISA_M
++#define HWCAP_ISA_M  (1 << ('M' - 'A'))
++#endif
++
++#ifndef HWCAP_ISA_A
++#define HWCAP_ISA_A  (1 << ('A' - 'A'))
++#endif
++
++#ifndef HWCAP_ISA_F
++#define HWCAP_ISA_F  (1 << ('F' - 'A'))
++#endif
++
++#ifndef HWCAP_ISA_D
++#define HWCAP_ISA_D  (1 << ('D' - 'A'))
++#endif
++
++#ifndef HWCAP_ISA_C
++#define HWCAP_ISA_C  (1 << ('C' - 'A'))
++#endif
++
++#ifndef HWCAP_ISA_V
++#define HWCAP_ISA_V  (1 << ('V' - 'A'))
++#endif
++
++#ifndef HWCAP_ISA_B
++#define HWCAP_ISA_B  (1 << ('B' - 'A'))
++#endif
++
++#define read_csr(csr)                                           \
++({                                                              \
++        register unsigned long __v;                             \
++        __asm__ __volatile__ ("csrr %0, %1"                     \
++                              : "=r" (__v)                      \
++                              : "i" (csr)                       \
++                              : "memory");                      \
++        __v;                                                    \
++})
++
++uint32_t VM_Version::get_current_vector_length() {
++  assert(_features & CPU_V, "should not call this");
++  return (uint32_t)read_csr(CSR_VLENB);
++}
++
++void VM_Version::get_os_cpu_info() {
++
++  uint64_t auxv = getauxval(AT_HWCAP);
++
++  static_assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP");
++  static_assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP");
++  static_assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP");
++  static_assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP");
++  static_assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP");
++  static_assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP");
++  static_assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP");
++  static_assert(CPU_B == HWCAP_ISA_B, "Flag CPU_B must follow Linux HWCAP");
++  _features = auxv & (
++      HWCAP_ISA_I |
++      HWCAP_ISA_M |
++      HWCAP_ISA_A |
++      HWCAP_ISA_F |
++      HWCAP_ISA_D |
++      HWCAP_ISA_C |
++      HWCAP_ISA_V |
++      HWCAP_ISA_B);
++
++  if (FILE *f = fopen("/proc/cpuinfo", "r")) {
++    char buf[512], *p;
++    while (fgets(buf, sizeof (buf), f) != NULL) {
++      if ((p = strchr(buf, ':')) != NULL) {
++        if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) {
++          char* uarch = os::strdup(p + 2);
++          uarch[strcspn(uarch, "\n")] = '\0';
++          _uarch = uarch;
++          break;
++        }
++      }
++    }
++    fclose(f);
++  }
++}
+diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
+index e30d39f73d1..733ee9e654c 100644
+--- a/src/hotspot/share/c1/c1_LIR.cpp
++++ b/src/hotspot/share/c1/c1_LIR.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -199,7 +199,6 @@ bool LIR_OprDesc::is_oop() const {
+ void LIR_Op2::verify() const {
+ #ifdef ASSERT
+   switch (code()) {
+-    case lir_cmove:
+     case lir_xchg:
+       break;
+ 
+@@ -252,9 +251,7 @@ void LIR_Op2::verify() const {
+ 
+ 
+ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block)
+-  : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+-  , _cond(cond)
+-  , _type(type)
++  : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
+   , _label(block->label())
+   , _block(block)
+   , _ublock(NULL)
+@@ -262,9 +259,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block
+ }
+ 
+ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
+-  LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+-  , _cond(cond)
+-  , _type(type)
++  LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
+   , _label(stub->entry())
+   , _block(NULL)
+   , _ublock(NULL)
+@@ -272,9 +267,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
+ }
+ 
+ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock)
+-  : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+-  , _cond(cond)
+-  , _type(type)
++  : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
+   , _label(block->label())
+   , _block(block)
+   , _ublock(ublock)
+@@ -296,13 +289,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) {
+ }
+ 
+ void LIR_OpBranch::negate_cond() {
+-  switch (_cond) {
+-    case lir_cond_equal:        _cond = lir_cond_notEqual;     break;
+-    case lir_cond_notEqual:     _cond = lir_cond_equal;        break;
+-    case lir_cond_less:         _cond = lir_cond_greaterEqual; break;
+-    case lir_cond_lessEqual:    _cond = lir_cond_greater;      break;
+-    case lir_cond_greaterEqual: _cond = lir_cond_less;         break;
+-    case lir_cond_greater:      _cond = lir_cond_lessEqual;    break;
++  switch (cond()) {
++    case lir_cond_equal:        set_cond(lir_cond_notEqual);     break;
++    case lir_cond_notEqual:     set_cond(lir_cond_equal);        break;
++    case lir_cond_less:         set_cond(lir_cond_greaterEqual); break;
++    case lir_cond_lessEqual:    set_cond(lir_cond_greater);      break;
++    case lir_cond_greaterEqual: set_cond(lir_cond_less);         break;
++    case lir_cond_greater:      set_cond(lir_cond_lessEqual);    break;
+     default: ShouldNotReachHere();
+   }
+ }
+@@ -525,6 +518,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+       assert(op->as_OpBranch() != NULL, "must be");
+       LIR_OpBranch* opBranch = (LIR_OpBranch*)op;
+ 
++      assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() &&
++             opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() &&
++             opBranch->_tmp5->is_illegal(), "not used");
++
++      if (opBranch->_opr1->is_valid()) do_input(opBranch->_opr1);
++      if (opBranch->_opr2->is_valid()) do_input(opBranch->_opr2);
++
+       if (opBranch->_info != NULL)     do_info(opBranch->_info);
+       assert(opBranch->_result->is_illegal(), "not used");
+       if (opBranch->_stub != NULL)     opBranch->stub()->visit(this);
+@@ -615,17 +615,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+     // to the result operand, otherwise the backend fails
+     case lir_cmove:
+     {
+-      assert(op->as_Op2() != NULL, "must be");
+-      LIR_Op2* op2 = (LIR_Op2*)op;
++      assert(op->as_Op4() != NULL, "must be");
++      LIR_Op4* op4 = (LIR_Op4*)op;
+ 
+-      assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() &&
+-             op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used");
+-      assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used");
++      assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() &&
++             op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "not used");
++      assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used");
+ 
+-      do_input(op2->_opr1);
+-      do_input(op2->_opr2);
+-      do_temp(op2->_opr2);
+-      do_output(op2->_result);
++      do_input(op4->_opr1);
++      do_input(op4->_opr2);
++      if (op4->_opr3->is_valid()) do_input(op4->_opr3);
++      if (op4->_opr4->is_valid()) do_input(op4->_opr4);
++      do_temp(op4->_opr2);
++      do_output(op4->_result);
+ 
+       break;
+     }
+@@ -1048,6 +1050,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) {
+   masm->emit_op3(this);
+ }
+ 
++void LIR_Op4::emit_code(LIR_Assembler* masm) {
++  masm->emit_op4(this);
++}
++
+ void LIR_OpLock::emit_code(LIR_Assembler* masm) {
+   masm->emit_lock(this);
+   if (stub()) {
+@@ -1084,6 +1090,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block)
+   , _file(NULL)
+   , _line(0)
+ #endif
++#ifdef RISCV
++  , _cmp_opr1(LIR_OprFact::illegalOpr)
++  , _cmp_opr2(LIR_OprFact::illegalOpr)
++#endif
+ { }
+ 
+ 
+@@ -1101,6 +1111,38 @@ void LIR_List::set_file_and_line(const char * file, int line) {
+ }
+ #endif
+ 
++#ifdef RISCV
++void LIR_List::set_cmp_oprs(LIR_Op* op) {
++  switch (op->code()) {
++    case lir_cmp:
++      _cmp_opr1 = op->as_Op2()->in_opr1();
++      _cmp_opr2 = op->as_Op2()->in_opr2();
++      break;
++    case lir_branch: // fall through
++    case lir_cond_float_branch:
++      assert(op->as_OpBranch()->cond() == lir_cond_always ||
++            (_cmp_opr1 != LIR_OprFact::illegalOpr && _cmp_opr2 != LIR_OprFact::illegalOpr),
++            "conditional branches must have legal operands");
++      if (op->as_OpBranch()->cond() != lir_cond_always) {
++        op->as_Op2()->set_in_opr1(_cmp_opr1);
++        op->as_Op2()->set_in_opr2(_cmp_opr2);
++      }
++      break;
++    case lir_cmove:
++      op->as_Op4()->set_in_opr3(_cmp_opr1);
++      op->as_Op4()->set_in_opr4(_cmp_opr2);
++      break;
++#if INCLUDE_ZGC
++    case lir_zloadbarrier_test:
++      _cmp_opr1 = FrameMap::as_opr(t1);
++      _cmp_opr2 = LIR_OprFact::intConst(0);
++      break;
++#endif
++    default:
++      break;
++  }
++}
++#endif
+ 
+ void LIR_List::append(LIR_InsertionBuffer* buffer) {
+   assert(this == buffer->lir_list(), "wrong lir list");
+@@ -1680,7 +1722,6 @@ const char * LIR_Op::name() const {
+      case lir_cmp_l2i:               s = "cmp_l2i";       break;
+      case lir_ucmp_fd2i:             s = "ucomp_fd2i";    break;
+      case lir_cmp_fd2i:              s = "comp_fd2i";     break;
+-     case lir_cmove:                 s = "cmove";         break;
+      case lir_add:                   s = "add";           break;
+      case lir_sub:                   s = "sub";           break;
+      case lir_mul:                   s = "mul";           break;
+@@ -1705,6 +1746,8 @@ const char * LIR_Op::name() const {
+      case lir_irem:                  s = "irem";          break;
+      case lir_fmad:                  s = "fmad";          break;
+      case lir_fmaf:                  s = "fmaf";          break;
++     // LIR_Op4
++     case lir_cmove:                 s = "cmove";         break;
+      // LIR_OpJavaCall
+      case lir_static_call:           s = "static";        break;
+      case lir_optvirtual_call:       s = "optvirtual";    break;
+@@ -1841,6 +1884,8 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) {
+ // LIR_OpBranch
+ void LIR_OpBranch::print_instr(outputStream* out) const {
+   print_condition(out, cond());             out->print(" ");
++  in_opr1()->print(out); out->print(" ");
++  in_opr2()->print(out); out->print(" ");
+   if (block() != NULL) {
+     out->print("[B%d] ", block()->block_id());
+   } else if (stub() != NULL) {
+@@ -1927,7 +1972,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const {
+ 
+ // LIR_Op2
+ void LIR_Op2::print_instr(outputStream* out) const {
+-  if (code() == lir_cmove || code() == lir_cmp) {
++  if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) {
+     print_condition(out, condition());         out->print(" ");
+   }
+   in_opr1()->print(out);    out->print(" ");
+@@ -1978,6 +2023,15 @@ void LIR_Op3::print_instr(outputStream* out) const {
+   result_opr()->print(out);
+ }
+ 
++// LIR_Op4
++void LIR_Op4::print_instr(outputStream* out) const {
++  print_condition(out, condition()); out->print(" ");
++  in_opr1()->print(out);             out->print(" ");
++  in_opr2()->print(out);             out->print(" ");
++  in_opr3()->print(out);             out->print(" ");
++  in_opr4()->print(out);             out->print(" ");
++  result_opr()->print(out);
++}
+ 
+ void LIR_OpLock::print_instr(outputStream* out) const {
+   hdr_opr()->print(out);   out->print(" ");
+diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
+index 3234ca018b7..efff6bf7a30 100644
+--- a/src/hotspot/share/c1/c1_LIR.hpp
++++ b/src/hotspot/share/c1/c1_LIR.hpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -867,6 +867,7 @@ class    LIR_Op2;
+ class    LIR_OpDelay;
+ class    LIR_Op3;
+ class      LIR_OpAllocArray;
++class    LIR_Op4;
+ class    LIR_OpCall;
+ class      LIR_OpJavaCall;
+ class      LIR_OpRTCall;
+@@ -916,8 +917,6 @@ enum LIR_Code {
+       , lir_null_check
+       , lir_return
+       , lir_leal
+-      , lir_branch
+-      , lir_cond_float_branch
+       , lir_move
+       , lir_convert
+       , lir_alloc_object
+@@ -929,11 +928,12 @@ enum LIR_Code {
+       , lir_unwind
+   , end_op1
+   , begin_op2
++      , lir_branch
++      , lir_cond_float_branch
+       , lir_cmp
+       , lir_cmp_l2i
+       , lir_ucmp_fd2i
+       , lir_cmp_fd2i
+-      , lir_cmove
+       , lir_add
+       , lir_sub
+       , lir_mul
+@@ -964,6 +964,9 @@ enum LIR_Code {
+       , lir_fmad
+       , lir_fmaf
+   , end_op3
++  , begin_op4
++      , lir_cmove
++  , end_op4
+   , begin_opJavaCall
+       , lir_static_call
+       , lir_optvirtual_call
+@@ -1001,6 +1004,11 @@ enum LIR_Code {
+   , begin_opAssert
+     , lir_assert
+   , end_opAssert
++#ifdef INCLUDE_ZGC
++  , begin_opZLoadBarrierTest
++    , lir_zloadbarrier_test
++  , end_opZLoadBarrierTest
++#endif
+ };
+ 
+ 
+@@ -1134,6 +1142,7 @@ class LIR_Op: public CompilationResourceObj {
+   virtual LIR_Op1* as_Op1() { return NULL; }
+   virtual LIR_Op2* as_Op2() { return NULL; }
+   virtual LIR_Op3* as_Op3() { return NULL; }
++  virtual LIR_Op4* as_Op4() { return NULL; }
+   virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
+   virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
+   virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
+@@ -1410,51 +1419,6 @@ class LIR_OpRTCall: public LIR_OpCall {
+   virtual void verify() const;
+ };
+ 
+-
+-class LIR_OpBranch: public LIR_Op {
+- friend class LIR_OpVisitState;
+-
+- private:
+-  LIR_Condition _cond;
+-  BasicType     _type;
+-  Label*        _label;
+-  BlockBegin*   _block;  // if this is a branch to a block, this is the block
+-  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
+-  CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
+-
+- public:
+-  LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
+-    : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
+-    , _cond(cond)
+-    , _type(type)
+-    , _label(lbl)
+-    , _block(NULL)
+-    , _ublock(NULL)
+-    , _stub(NULL) { }
+-
+-  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block);
+-  LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub);
+-
+-  // for unordered comparisons
+-  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock);
+-
+-  LIR_Condition cond()        const              { return _cond;        }
+-  BasicType     type()        const              { return _type;        }
+-  Label*        label()       const              { return _label;       }
+-  BlockBegin*   block()       const              { return _block;       }
+-  BlockBegin*   ublock()      const              { return _ublock;      }
+-  CodeStub*     stub()        const              { return _stub;       }
+-
+-  void          change_block(BlockBegin* b);
+-  void          change_ublock(BlockBegin* b);
+-  void          negate_cond();
+-
+-  virtual void emit_code(LIR_Assembler* masm);
+-  virtual LIR_OpBranch* as_OpBranch() { return this; }
+-  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+-};
+-
+-
+ class ConversionStub;
+ 
+ class LIR_OpConvert: public LIR_Op1 {
+@@ -1614,19 +1578,19 @@ class LIR_Op2: public LIR_Op {
+   void verify() const;
+ 
+  public:
+-  LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL)
++  LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL, BasicType type = T_ILLEGAL)
+     : LIR_Op(code, LIR_OprFact::illegalOpr, info)
+     , _opr1(opr1)
+     , _opr2(opr2)
+-    , _type(T_ILLEGAL)
+-    , _condition(condition)
+     , _fpu_stack_size(0)
++    , _type(type)
+     , _tmp1(LIR_OprFact::illegalOpr)
+     , _tmp2(LIR_OprFact::illegalOpr)
+     , _tmp3(LIR_OprFact::illegalOpr)
+     , _tmp4(LIR_OprFact::illegalOpr)
+-    , _tmp5(LIR_OprFact::illegalOpr) {
+-    assert(code == lir_cmp || code == lir_assert, "code check");
++    , _tmp5(LIR_OprFact::illegalOpr)
++    , _condition(condition) {
++    assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check");
+   }
+ 
+   LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type)
+@@ -1651,14 +1615,14 @@ class LIR_Op2: public LIR_Op {
+     , _opr1(opr1)
+     , _opr2(opr2)
+     , _type(type)
+-    , _condition(lir_cond_unknown)
+     , _fpu_stack_size(0)
+     , _tmp1(LIR_OprFact::illegalOpr)
+     , _tmp2(LIR_OprFact::illegalOpr)
+     , _tmp3(LIR_OprFact::illegalOpr)
+     , _tmp4(LIR_OprFact::illegalOpr)
+-    , _tmp5(LIR_OprFact::illegalOpr) {
+-    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
++    , _tmp5(LIR_OprFact::illegalOpr)
++    , _condition(lir_cond_unknown) {
++    assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check");
+   }
+ 
+   LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
+@@ -1667,14 +1631,14 @@ class LIR_Op2: public LIR_Op {
+     , _opr1(opr1)
+     , _opr2(opr2)
+     , _type(T_ILLEGAL)
+-    , _condition(lir_cond_unknown)
+     , _fpu_stack_size(0)
+     , _tmp1(tmp1)
+     , _tmp2(tmp2)
+     , _tmp3(tmp3)
+     , _tmp4(tmp4)
+-    , _tmp5(tmp5) {
+-    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
++    , _tmp5(tmp5)
++    , _condition(lir_cond_unknown) {
++    assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check");
+   }
+ 
+   LIR_Opr in_opr1() const                        { return _opr1; }
+@@ -1686,10 +1650,10 @@ class LIR_Op2: public LIR_Op {
+   LIR_Opr tmp4_opr() const                       { return _tmp4; }
+   LIR_Opr tmp5_opr() const                       { return _tmp5; }
+   LIR_Condition condition() const  {
+-    assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
++    assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition;
+   }
+   void set_condition(LIR_Condition condition) {
+-    assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove");  _condition = condition;
++    assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition;
+   }
+ 
+   void set_fpu_stack_size(int size)              { _fpu_stack_size = size; }
+@@ -1703,6 +1667,51 @@ class LIR_Op2: public LIR_Op {
+   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+ };
+ 
++class LIR_OpBranch: public LIR_Op2 {
++ friend class LIR_OpVisitState;
++
++ private:
++  Label*        _label;
++  BlockBegin*   _block;  // if this is a branch to a block, this is the block
++  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
++  CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
++
++ public:
++  LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
++    : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL, type)
++    , _label(lbl)
++    , _block(NULL)
++    , _ublock(NULL)
++    , _stub(NULL) { }
++
++  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block);
++  LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub);
++
++  // for unordered comparisons
++  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock);
++
++  LIR_Condition cond() const {
++    return condition();
++  }
++
++  void set_cond(LIR_Condition cond) {
++    set_condition(cond);
++  }
++
++  Label*        label()       const              { return _label;       }
++  BlockBegin*   block()       const              { return _block;       }
++  BlockBegin*   ublock()      const              { return _ublock;      }
++  CodeStub*     stub()        const              { return _stub;        }
++
++  void          change_block(BlockBegin* b);
++  void          change_ublock(BlockBegin* b);
++  void          negate_cond();
++
++  virtual void emit_code(LIR_Assembler* masm);
++  virtual LIR_OpBranch* as_OpBranch() { return this; }
++  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
++};
++
+ class LIR_OpAllocArray : public LIR_Op {
+  friend class LIR_OpVisitState;
+ 
+@@ -1766,6 +1775,63 @@ class LIR_Op3: public LIR_Op {
+   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+ };
+ 
++class LIR_Op4: public LIR_Op {
++  friend class LIR_OpVisitState;
++ protected:
++  LIR_Opr   _opr1;
++  LIR_Opr   _opr2;
++  LIR_Opr   _opr3;
++  LIR_Opr   _opr4;
++  BasicType _type;
++  LIR_Opr   _tmp1;
++  LIR_Opr   _tmp2;
++  LIR_Opr   _tmp3;
++  LIR_Opr   _tmp4;
++  LIR_Opr   _tmp5;
++  LIR_Condition _condition;
++
++ public:
++  LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4,
++          LIR_Opr result, BasicType type)
++    : LIR_Op(code, result, NULL)
++    , _opr1(opr1)
++    , _opr2(opr2)
++    , _opr3(opr3)
++    , _opr4(opr4)
++    , _type(type)
++    , _tmp1(LIR_OprFact::illegalOpr)
++    , _tmp2(LIR_OprFact::illegalOpr)
++    , _tmp3(LIR_OprFact::illegalOpr)
++    , _tmp4(LIR_OprFact::illegalOpr)
++    , _tmp5(LIR_OprFact::illegalOpr)
++    , _condition(condition) {
++    assert(code == lir_cmove, "code check");
++    assert(type != T_ILLEGAL, "cmove should have type");
++  }
++
++  LIR_Opr in_opr1() const                        { return _opr1; }
++  LIR_Opr in_opr2() const                        { return _opr2; }
++  LIR_Opr in_opr3() const                        { return _opr3; }
++  LIR_Opr in_opr4() const                        { return _opr4; }
++  BasicType type()  const                        { return _type; }
++  LIR_Opr tmp1_opr() const                       { return _tmp1; }
++  LIR_Opr tmp2_opr() const                       { return _tmp2; }
++  LIR_Opr tmp3_opr() const                       { return _tmp3; }
++  LIR_Opr tmp4_opr() const                       { return _tmp4; }
++  LIR_Opr tmp5_opr() const                       { return _tmp5; }
++
++  LIR_Condition condition() const                { return _condition; }
++  void set_condition(LIR_Condition condition)    { _condition = condition; }
++
++  void set_in_opr1(LIR_Opr opr)                  { _opr1 = opr; }
++  void set_in_opr2(LIR_Opr opr)                  { _opr2 = opr; }
++  void set_in_opr3(LIR_Opr opr)                  { _opr3 = opr; }
++  void set_in_opr4(LIR_Opr opr)                  { _opr4 = opr; }
++  virtual void emit_code(LIR_Assembler* masm);
++  virtual LIR_Op4* as_Op4() { return this; }
++
++  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
++};
+ 
+ //--------------------------------
+ class LabelObj: public CompilationResourceObj {
+@@ -1988,6 +2054,10 @@ class LIR_List: public CompilationResourceObj {
+   const char *  _file;
+   int           _line;
+ #endif
++#ifdef RISCV
++  LIR_Opr       _cmp_opr1;
++  LIR_Opr       _cmp_opr2;
++#endif
+ 
+  public:
+   void append(LIR_Op* op) {
+@@ -2000,6 +2070,12 @@ class LIR_List: public CompilationResourceObj {
+     }
+ #endif // PRODUCT
+ 
++#ifdef RISCV
++    set_cmp_oprs(op);
++    // lir_cmp set cmp oprs only on riscv
++    if (op->code() == lir_cmp) return;
++#endif
++
+     _operations.append(op);
+ 
+ #ifdef ASSERT
+@@ -2016,6 +2092,10 @@ class LIR_List: public CompilationResourceObj {
+   void set_file_and_line(const char * file, int line);
+ #endif
+ 
++#ifdef RISCV
++  void set_cmp_oprs(LIR_Op* op);
++#endif
++
+   //---------- accessors ---------------
+   LIR_OpList* instructions_list()                { return &_operations; }
+   int         length() const                     { return _operations.length(); }
+@@ -2149,8 +2229,9 @@ class LIR_List: public CompilationResourceObj {
+   void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
+   void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info);
+ 
+-  void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
+-    append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type));
++  void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type,
++             LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) {
++    append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type));
+   }
+ 
+   void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
+diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp
+index 160483d5f74..42a0350f7d9 100644
+--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp
++++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp
+@@ -709,10 +709,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
+       comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
+       break;
+ 
+-    case lir_cmove:
+-      cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type());
+-      break;
+-
+     case lir_shl:
+     case lir_shr:
+     case lir_ushr:
+@@ -776,6 +772,17 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
+   }
+ }
+ 
++void LIR_Assembler::emit_op4(LIR_Op4* op) {
++  switch(op->code()) {
++    case lir_cmove:
++      cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type(), op->in_opr3(), op->in_opr4());
++      break;
++
++    default:
++      Unimplemented();
++      break;
++  }
++}
+ 
+ void LIR_Assembler::build_frame() {
+   _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp
+index 44a5bcbe542..c677bd346fc 100644
+--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp
++++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp
+@@ -190,6 +190,7 @@ class LIR_Assembler: public CompilationResourceObj {
+   void emit_op1(LIR_Op1* op);
+   void emit_op2(LIR_Op2* op);
+   void emit_op3(LIR_Op3* op);
++  void emit_op4(LIR_Op4* op);
+   void emit_opBranch(LIR_OpBranch* op);
+   void emit_opLabel(LIR_OpLabel* op);
+   void emit_arraycopy(LIR_OpArrayCopy* op);
+@@ -222,8 +223,8 @@ class LIR_Assembler: public CompilationResourceObj {
+   void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);
+   void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);  // info set for null exceptions
+   void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op);
+-  void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type);
+-
++  void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type,
++             LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr);
+   void call(        LIR_OpJavaCall* op, relocInfo::relocType rtype);
+   void ic_call(     LIR_OpJavaCall* op);
+   void vtable_call( LIR_OpJavaCall* op);
+diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
+index c28055fd996..a4dfe8552ae 100644
+--- a/src/hotspot/share/c1/c1_LinearScan.cpp
++++ b/src/hotspot/share/c1/c1_LinearScan.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -1242,11 +1242,11 @@ void LinearScan::add_register_hints(LIR_Op* op) {
+       break;
+     }
+     case lir_cmove: {
+-      assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2");
+-      LIR_Op2* cmove = (LIR_Op2*)op;
++      assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4");
++      LIR_Op4* cmove = (LIR_Op4*)op;
+ 
+       LIR_Opr move_from = cmove->in_opr1();
+-      LIR_Opr move_to = cmove->result_opr();
++      LIR_Opr move_to   = cmove->result_opr();
+ 
+       if (move_to->is_register() && move_from->is_register()) {
+         Interval* from = interval_at(reg_num(move_from));
+@@ -3140,6 +3140,9 @@ void LinearScan::do_linear_scan() {
+     }
+   }
+ 
++#ifndef RISCV
++  // Disable these optimizations on riscv temporarily, because it does not
++  // work when the comparison operands are bound to branches or cmoves.
+   { TIME_LINEAR_SCAN(timer_optimize_lir);
+ 
+     EdgeMoveOptimizer::optimize(ir()->code());
+@@ -3147,6 +3150,7 @@ void LinearScan::do_linear_scan() {
+     // check that cfg is still correct after optimizations
+     ir()->verify();
+   }
++#endif
+ 
+   NOT_PRODUCT(print_lir(1, "Before Code Generation", false));
+   NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final));
+@@ -6284,14 +6288,14 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
+               // There might be a cmove inserted for profiling which depends on the same
+               // compare. If we change the condition of the respective compare, we have
+               // to take care of this cmove as well.
+-              LIR_Op2* prev_cmove = NULL;
++              LIR_Op4* prev_cmove = NULL;
+ 
+               for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) {
+                 prev_op = instructions->at(j);
+                 // check for the cmove
+                 if (prev_op->code() == lir_cmove) {
+-                  assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2");
+-                  prev_cmove = (LIR_Op2*)prev_op;
++                  assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4");
++                  prev_cmove = (LIR_Op4*)prev_op;
+                   assert(prev_branch->cond() == prev_cmove->condition(), "should be the same");
+                 }
+                 if (prev_op->code() == lir_cmp) {
+diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
+index 4771a8b8652..6d377fa005d 100644
+--- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
++++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -31,7 +31,7 @@
+ #include "utilities/defaultStream.hpp"
+ 
+ void ShenandoahArguments::initialize() {
+-#if !(defined AARCH64 || defined AMD64 || defined IA32)
++#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined RISCV64)
+   vm_exit_during_initialization("Shenandoah GC is not supported on this platform.");
+ #endif
+ 
+diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
+index 9f8ce742433..f36dd612eff 100644
+--- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
++++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -100,7 +100,7 @@ class LIR_OpZLoadBarrierTest : public LIR_Op {
+ 
+ public:
+   LIR_OpZLoadBarrierTest(LIR_Opr opr) :
+-      LIR_Op(),
++      LIR_Op(lir_zloadbarrier_test, LIR_OprFact::illegalOpr, NULL),
+       _opr(opr) {}
+ 
+   virtual void visit(LIR_OpVisitState* state) {
+diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
+index e01a242a57e..ff16de0e778 100644
+--- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
++++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
+@@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) {
+ inline bool JfrBigEndian::platform_supports_unaligned_reads(void) {
+ #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390)
+   return true;
+-#elif defined(SPARC) || defined(ARM) || defined(AARCH64)
++#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(RISCV)
+   return false;
+ #else
+   #warning "Unconfigured platform"
+diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp
+index c64d0879592..bc856d4b617 100644
+--- a/src/hotspot/share/opto/regmask.hpp
++++ b/src/hotspot/share/opto/regmask.hpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+diff --git a/src/hotspot/share/runtime/abstract_vm_version.cpp b/src/hotspot/share/runtime/abstract_vm_version.cpp
+index c46247f2bdb..b5e64b65ff7 100644
+--- a/src/hotspot/share/runtime/abstract_vm_version.cpp
++++ b/src/hotspot/share/runtime/abstract_vm_version.cpp
+@@ -196,7 +196,8 @@ const char* Abstract_VM_Version::jre_release_version() {
+                  IA32_ONLY("x86")                \
+                  IA64_ONLY("ia64")               \
+                  S390_ONLY("s390")               \
+-                 SPARC_ONLY("sparc")
++                 SPARC_ONLY("sparc")             \
++                 RISCV64_ONLY("riscv64")
+ #endif // !ZERO
+ #endif // !CPU
+ 
+diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp
+index e7b32723e47..434826853ee 100644
+--- a/src/hotspot/share/runtime/synchronizer.cpp
++++ b/src/hotspot/share/runtime/synchronizer.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp
+index aa914eccafc..a2f98e6a251 100644
+--- a/src/hotspot/share/runtime/thread.hpp
++++ b/src/hotspot/share/runtime/thread.hpp
+@@ -1234,7 +1234,7 @@ class JavaThread: public Thread {
+   address last_Java_pc(void)                     { return _anchor.last_Java_pc(); }
+ 
+   // Safepoint support
+-#if !(defined(PPC64) || defined(AARCH64))
++#if !(defined(PPC64) || defined(AARCH64) || defined(RISCV64))
+   JavaThreadState thread_state() const           { return _thread_state; }
+   void set_thread_state(JavaThreadState s)       {
+     assert(current_or_null() == NULL || current_or_null() == this,
+diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp
+index dee8534f739..9af07aeb459 100644
+--- a/src/hotspot/share/runtime/thread.inline.hpp
++++ b/src/hotspot/share/runtime/thread.inline.hpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
+  * Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+@@ -142,7 +142,7 @@ inline void JavaThread::set_pending_async_exception(oop e) {
+   set_has_async_exception();
+ }
+ 
+-#if defined(PPC64) || defined (AARCH64)
++#if defined(PPC64) || defined (AARCH64) || defined(RISCV64)
+ inline JavaThreadState JavaThread::thread_state() const    {
+   return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state);
+ }
+diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
+index cf802538689..e8ab3097ac7 100644
+--- a/src/hotspot/share/utilities/macros.hpp
++++ b/src/hotspot/share/utilities/macros.hpp
+@@ -597,6 +597,32 @@
+ 
+ #define MACOS_AARCH64_ONLY(x) MACOS_ONLY(AARCH64_ONLY(x))
+ 
++#if defined(RISCV32) || defined(RISCV64)
++#define RISCV
++#define RISCV_ONLY(code) code
++#define NOT_RISCV(code)
++#else
++#undef RISCV
++#define RISCV_ONLY(code)
++#define NOT_RISCV(code) code
++#endif
++
++#ifdef RISCV32
++#define RISCV32_ONLY(code) code
++#define NOT_RISCV32(code)
++#else
++#define RISCV32_ONLY(code)
++#define NOT_RISCV32(code) code
++#endif
++
++#ifdef RISCV64
++#define RISCV64_ONLY(code) code
++#define NOT_RISCV64(code)
++#else
++#define RISCV64_ONLY(code)
++#define NOT_RISCV64(code) code
++#endif
++
+ #ifdef VM_LITTLE_ENDIAN
+ #define LITTLE_ENDIAN_ONLY(code) code
+ #define BIG_ENDIAN_ONLY(code)
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+index 0d834302c57..45a927fb5ee 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2002, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -58,6 +58,10 @@
+ #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h"
+ #endif
+ 
++#ifdef riscv64
++#include "sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext.h"
++#endif
++
+ static jfieldID p_ps_prochandle_ID = 0;
+ static jfieldID threadList_ID = 0;
+ static jfieldID loadObjectList_ID = 0;
+@@ -397,7 +401,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+   return (err == PS_OK)? array : 0;
+ }
+ 
+-#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64)
++#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64)
+ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
+   (JNIEnv *env, jobject this_obj, jint lwp_id) {
+ 
+@@ -425,6 +429,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+ #if defined(sparc) || defined(sparcv9)
+ #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG
+ #endif
++#ifdef riscv64
++#define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG
++#endif
+ #if defined(ppc64) || defined(ppc64le)
+ #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG
+ #endif
+@@ -534,6 +541,44 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+   }
+ #endif /* aarch64 */
+ 
++#if defined(riscv64)
++#define REG_INDEX(reg)  sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg
++
++  regs[REG_INDEX(PC)]  = gregs.pc;
++  regs[REG_INDEX(LR)]  = gregs.ra;
++  regs[REG_INDEX(SP)]  = gregs.sp;
++  regs[REG_INDEX(R3)]  = gregs.gp;
++  regs[REG_INDEX(R4)]  = gregs.tp;
++  regs[REG_INDEX(R5)]  = gregs.t0;
++  regs[REG_INDEX(R6)]  = gregs.t1;
++  regs[REG_INDEX(R7)]  = gregs.t2;
++  regs[REG_INDEX(R8)]  = gregs.s0;
++  regs[REG_INDEX(R9)]  = gregs.s1;
++  regs[REG_INDEX(R10)]  = gregs.a0;
++  regs[REG_INDEX(R11)]  = gregs.a1;
++  regs[REG_INDEX(R12)]  = gregs.a2;
++  regs[REG_INDEX(R13)]  = gregs.a3;
++  regs[REG_INDEX(R14)]  = gregs.a4;
++  regs[REG_INDEX(R15)]  = gregs.a5;
++  regs[REG_INDEX(R16)]  = gregs.a6;
++  regs[REG_INDEX(R17)]  = gregs.a7;
++  regs[REG_INDEX(R18)]  = gregs.s2;
++  regs[REG_INDEX(R19)]  = gregs.s3;
++  regs[REG_INDEX(R20)]  = gregs.s4;
++  regs[REG_INDEX(R21)]  = gregs.s5;
++  regs[REG_INDEX(R22)]  = gregs.s6;
++  regs[REG_INDEX(R23)]  = gregs.s7;
++  regs[REG_INDEX(R24)]  = gregs.s8;
++  regs[REG_INDEX(R25)]  = gregs.s9;
++  regs[REG_INDEX(R26)]  = gregs.s10;
++  regs[REG_INDEX(R27)]  = gregs.s11;
++  regs[REG_INDEX(R28)]  = gregs.t3;
++  regs[REG_INDEX(R29)]  = gregs.t4;
++  regs[REG_INDEX(R30)]  = gregs.t5;
++  regs[REG_INDEX(R31)]  = gregs.t6;
++
++#endif /* riscv64 */
++
+ #if defined(ppc64) || defined(ppc64le)
+ #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg
+ 
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
+index 8318e8e0213..ab092d4ee33 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -43,6 +43,8 @@
+ #elif defined(arm)
+ #include <asm/ptrace.h>
+ #define user_regs_struct  pt_regs
++#elif defined(riscv64)
++#include <asm/ptrace.h>
+ #endif
+ 
+ // This C bool type must be int for compatibility with Linux calls and
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+index 0f5f0119c73..9bff9ee9b15 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+@@ -36,6 +36,7 @@
+ import sun.jvm.hotspot.debugger.MachineDescriptionAMD64;
+ import sun.jvm.hotspot.debugger.MachineDescriptionPPC64;
+ import sun.jvm.hotspot.debugger.MachineDescriptionAArch64;
++import sun.jvm.hotspot.debugger.MachineDescriptionRISCV64;
+ import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86;
+ import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit;
+ import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit;
+@@ -598,6 +599,8 @@ private void setupDebuggerLinux() {
+             } else {
+                     machDesc = new MachineDescriptionSPARC32Bit();
+             }
++        } else if (cpu.equals("riscv64")) {
++            machDesc = new MachineDescriptionRISCV64();
+         } else {
+           try {
+             machDesc = (MachineDescription)
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
+new file mode 100644
+index 00000000000..a972516dee3
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
+@@ -0,0 +1,40 @@
++/*
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger;
++
++public class MachineDescriptionRISCV64 extends MachineDescriptionTwosComplement implements MachineDescription {
++  public long getAddressSize() {
++    return 8;
++  }
++
++  public boolean isLP64() {
++    return true;
++  }
++
++  public boolean isBigEndian() {
++    return false;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+index 5e5a6bb7141..dc0bcb3da94 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
+  * Copyright (c) 2015, Red Hat Inc.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+@@ -34,12 +34,14 @@
+ import sun.jvm.hotspot.debugger.amd64.*;
+ import sun.jvm.hotspot.debugger.aarch64.*;
+ import sun.jvm.hotspot.debugger.sparc.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
+ import sun.jvm.hotspot.debugger.ppc64.*;
+ import sun.jvm.hotspot.debugger.linux.x86.*;
+ import sun.jvm.hotspot.debugger.linux.amd64.*;
+ import sun.jvm.hotspot.debugger.linux.sparc.*;
+ import sun.jvm.hotspot.debugger.linux.ppc64.*;
+ import sun.jvm.hotspot.debugger.linux.aarch64.*;
++import sun.jvm.hotspot.debugger.linux.riscv64.*;
+ import sun.jvm.hotspot.utilities.*;
+ 
+ class LinuxCDebugger implements CDebugger {
+@@ -116,7 +118,14 @@ public CFrame topFrameForThread(ThreadProxy thread) throws DebuggerException {
+        Address pc  = context.getRegisterAsAddress(AARCH64ThreadContext.PC);
+        if (pc == null) return null;
+        return new LinuxAARCH64CFrame(dbg, fp, pc);
+-     } else {
++    } else if (cpu.equals("riscv64")) {
++       RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext();
++       Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP);
++       if (fp == null) return null;
++       Address pc  = context.getRegisterAsAddress(RISCV64ThreadContext.PC);
++       if (pc == null) return null;
++       return new LinuxRISCV64CFrame(dbg, fp, pc);
++    } else {
+        // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu
+        ThreadContext context = (ThreadContext) thread.getContext();
+        return context.getTopFrame(dbg);
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
+new file mode 100644
+index 00000000000..f06da24bd0e
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
+@@ -0,0 +1,90 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.riscv64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.debugger.linux.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++import sun.jvm.hotspot.debugger.cdbg.basic.*;
++
++public final class LinuxRISCV64CFrame extends BasicCFrame {
++   private static final int C_FRAME_LINK_OFFSET        = -2;
++   private static final int C_FRAME_RETURN_ADDR_OFFSET = -1;
++
++   public LinuxRISCV64CFrame(LinuxDebugger dbg, Address fp, Address pc) {
++      super(dbg.getCDebugger());
++      this.fp = fp;
++      this.pc = pc;
++      this.dbg = dbg;
++   }
++
++   // override base class impl to avoid ELF parsing
++   public ClosestSymbol closestSymbolToPC() {
++      // try native lookup in debugger.
++      return dbg.lookup(dbg.getAddressValue(pc()));
++   }
++
++   public Address pc() {
++      return pc;
++   }
++
++   public Address localVariableBase() {
++      return fp;
++   }
++
++   public CFrame sender(ThreadProxy thread) {
++      RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext();
++      Address rsp = context.getRegisterAsAddress(RISCV64ThreadContext.SP);
++
++      if ((fp == null) || fp.lessThan(rsp)) {
++        return null;
++      }
++
++      // Check alignment of fp
++      if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) {
++        return null;
++      }
++
++      Address nextFP = fp.getAddressAt(C_FRAME_LINK_OFFSET * ADDRESS_SIZE);
++      if (nextFP == null || nextFP.lessThanOrEqual(fp)) {
++        return null;
++      }
++      Address nextPC  = fp.getAddressAt(C_FRAME_RETURN_ADDR_OFFSET * ADDRESS_SIZE);
++      if (nextPC == null) {
++        return null;
++      }
++      return new LinuxRISCV64CFrame(dbg, nextFP, nextPC);
++   }
++
++   // package/class internals only
++   private static final int ADDRESS_SIZE = 8;
++   private Address pc;
++   private Address sp;
++   private Address fp;
++   private LinuxDebugger dbg;
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
+new file mode 100644
+index 00000000000..fdb841ccf3d
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
+@@ -0,0 +1,48 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.riscv64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.debugger.linux.*;
++
++public class LinuxRISCV64ThreadContext extends RISCV64ThreadContext {
++  private LinuxDebugger debugger;
++
++  public LinuxRISCV64ThreadContext(LinuxDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
+new file mode 100644
+index 00000000000..96d5dee47ce
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
+@@ -0,0 +1,88 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.riscv64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class ProcRISCV64Thread implements ThreadProxy {
++    private ProcDebugger debugger;
++    private int         id;
++
++    public ProcRISCV64Thread(ProcDebugger debugger, Address addr) {
++        this.debugger = debugger;
++
++        // FIXME: the size here should be configurable. However, making it
++        // so would produce a dependency on the "types" package from the
++        // debugger package, which is not desired.
++        this.id       = (int) addr.getCIntegerAt(0, 4, true);
++    }
++
++    public ProcRISCV64Thread(ProcDebugger debugger, long id) {
++        this.debugger = debugger;
++        this.id = (int) id;
++    }
++
++    public ThreadContext getContext() throws IllegalThreadStateException {
++        ProcRISCV64ThreadContext context = new ProcRISCV64ThreadContext(debugger);
++        long[] regs = debugger.getThreadIntegerRegisterSet(id);
++        if (Assert.ASSERTS_ENABLED) {
++            Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size mismatch");
++        }
++        for (int i = 0; i < regs.length; i++) {
++            context.setRegister(i, regs[i]);
++        }
++        return context;
++    }
++
++    public boolean canSetContext() throws DebuggerException {
++        return false;
++    }
++
++    public void setContext(ThreadContext context)
++    throws IllegalThreadStateException, DebuggerException {
++        throw new DebuggerException("Unimplemented");
++    }
++
++    public String toString() {
++        return "t@" + id;
++    }
++
++    public boolean equals(Object obj) {
++        if ((obj == null) || !(obj instanceof ProcRISCV64Thread)) {
++            return false;
++        }
++
++        return (((ProcRISCV64Thread) obj).id == id);
++    }
++
++    public int hashCode() {
++        return id;
++    }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
+new file mode 100644
+index 00000000000..f2aa845e665
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
+@@ -0,0 +1,48 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.riscv64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcRISCV64ThreadContext extends RISCV64ThreadContext {
++    private ProcDebugger debugger;
++
++    public ProcRISCV64ThreadContext(ProcDebugger debugger) {
++        super();
++        this.debugger = debugger;
++    }
++
++    public void setRegisterAsAddress(int index, Address value) {
++        setRegister(index, debugger.getAddressValue(value));
++    }
++
++    public Address getRegisterAsAddress(int index) {
++        return debugger.newAddress(getRegister(index));
++    }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
+new file mode 100644
+index 00000000000..19f64b8ce2d
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
+@@ -0,0 +1,46 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.riscv64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcRISCV64ThreadFactory implements ProcThreadFactory {
++    private ProcDebugger debugger;
++
++    public ProcRISCV64ThreadFactory(ProcDebugger debugger) {
++        this.debugger = debugger;
++    }
++
++    public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++        return new ProcRISCV64Thread(debugger, threadIdentifierAddr);
++    }
++
++    public ThreadProxy createThreadWrapper(long id) {
++        return new ProcRISCV64Thread(debugger, id);
++    }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
+new file mode 100644
+index 00000000000..aecbda59023
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.riscv64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class RemoteRISCV64Thread extends RemoteThread  {
++  public RemoteRISCV64Thread(RemoteDebuggerClient debugger, Address addr) {
++     super(debugger, addr);
++  }
++
++  public RemoteRISCV64Thread(RemoteDebuggerClient debugger, long id) {
++     super(debugger, id);
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    RemoteRISCV64ThreadContext context = new RemoteRISCV64ThreadContext(debugger);
++    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
++                                  debugger.getThreadIntegerRegisterSet(id);
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size of register set must match");
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
+new file mode 100644
+index 00000000000..1d3da6be5af
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
+@@ -0,0 +1,48 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.riscv64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteRISCV64ThreadContext extends RISCV64ThreadContext {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteRISCV64ThreadContext(RemoteDebuggerClient debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
+new file mode 100644
+index 00000000000..725b94e25a3
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
+@@ -0,0 +1,46 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.riscv64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteRISCV64ThreadFactory implements RemoteThreadFactory {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteRISCV64ThreadFactory(RemoteDebuggerClient debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new RemoteRISCV64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new RemoteRISCV64Thread(debugger, id);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java
+new file mode 100644
+index 00000000000..fb60a70427a
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java
+@@ -0,0 +1,172 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.riscv64;
++
++import java.lang.annotation.Native;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++
++/** Specifies the thread context on riscv64 platforms; only a sub-portion
++ * of the context is guaranteed to be present on all operating
++ * systems. */
++
++public abstract class RISCV64ThreadContext implements ThreadContext {
++    // Taken from /usr/include/asm/sigcontext.h on Linux/RISCV64.
++
++    //  /*
++    //   * Signal context structure - contains all info to do with the state
++    //   * before the signal handler was invoked.
++    //   */
++    // struct sigcontext {
++    //   struct user_regs_struct sc_regs;
++    //   union __riscv_fp_state sc_fpregs;
++    // };
++    //
++    // struct user_regs_struct {
++    //    unsigned long pc;
++    //    unsigned long ra;
++    //    unsigned long sp;
++    //    unsigned long gp;
++    //    unsigned long tp;
++    //    unsigned long t0;
++    //    unsigned long t1;
++    //    unsigned long t2;
++    //    unsigned long s0;
++    //    unsigned long s1;
++    //    unsigned long a0;
++    //    unsigned long a1;
++    //    unsigned long a2;
++    //    unsigned long a3;
++    //    unsigned long a4;
++    //    unsigned long a5;
++    //    unsigned long a6;
++    //    unsigned long a7;
++    //    unsigned long s2;
++    //    unsigned long s3;
++    //    unsigned long s4;
++    //    unsigned long s5;
++    //    unsigned long s6;
++    //    unsigned long s7;
++    //    unsigned long s8;
++    //    unsigned long s9;
++    //    unsigned long s10;
++    //    unsigned long s11;
++    //    unsigned long t3;
++    //    unsigned long t4;
++    //    unsigned long t5;
++    //    unsigned long t6;
++    // };
++
++    // NOTE: the indices for the various registers must be maintained as
++    // listed across various operating systems. However, only a small
++    // subset of the registers' values are guaranteed to be present (and
++    // must be present for the SA's stack walking to work)
++
++    // One instance of the Native annotation is enough to trigger header generation
++    // for this file.
++    @Native
++    public static final int R0 = 0;
++    public static final int R1 = 1;
++    public static final int R2 = 2;
++    public static final int R3 = 3;
++    public static final int R4 = 4;
++    public static final int R5 = 5;
++    public static final int R6 = 6;
++    public static final int R7 = 7;
++    public static final int R8 = 8;
++    public static final int R9 = 9;
++    public static final int R10 = 10;
++    public static final int R11 = 11;
++    public static final int R12 = 12;
++    public static final int R13 = 13;
++    public static final int R14 = 14;
++    public static final int R15 = 15;
++    public static final int R16 = 16;
++    public static final int R17 = 17;
++    public static final int R18 = 18;
++    public static final int R19 = 19;
++    public static final int R20 = 20;
++    public static final int R21 = 21;
++    public static final int R22 = 22;
++    public static final int R23 = 23;
++    public static final int R24 = 24;
++    public static final int R25 = 25;
++    public static final int R26 = 26;
++    public static final int R27 = 27;
++    public static final int R28 = 28;
++    public static final int R29 = 29;
++    public static final int R30 = 30;
++    public static final int R31 = 31;
++
++    public static final int NPRGREG = 32;
++
++    public static final int PC = R0;
++    public static final int LR = R1;
++    public static final int SP = R2;
++    public static final int FP = R8;
++
++    private long[] data;
++
++    public RISCV64ThreadContext() {
++        data = new long[NPRGREG];
++    }
++
++    public int getNumRegisters() {
++        return NPRGREG;
++    }
++
++    public String getRegisterName(int index) {
++        switch (index) {
++        case LR: return "lr";
++        case SP: return "sp";
++        case PC: return "pc";
++        default:
++            return "r" + index;
++        }
++    }
++
++    public void setRegister(int index, long value) {
++        data[index] = value;
++    }
++
++    public long getRegister(int index) {
++        return data[index];
++    }
++
++    public CFrame getTopFrame(Debugger dbg) {
++        return null;
++    }
++
++    /** This can't be implemented in this class since we would have to
++     * tie the implementation to, for example, the debugging system */
++    public abstract void setRegisterAsAddress(int index, Address value);
++
++    /** This can't be implemented in this class since we would have to
++     * tie the implementation to, for example, the debugging system */
++    public abstract Address getRegisterAsAddress(int index);
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
+index 190062785a7..89d676fe3b9 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -38,6 +38,7 @@
+ import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess;
++import sun.jvm.hotspot.runtime.linux_riscv64.LinuxRISCV64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess;
+@@ -99,6 +100,8 @@ private static synchronized void initialize(TypeDataBase db) {
+                 access = new LinuxPPC64JavaThreadPDAccess();
+             } else if (cpu.equals("aarch64")) {
+                 access = new LinuxAARCH64JavaThreadPDAccess();
++            } else if (cpu.equals("riscv64")) {
++                access = new LinuxRISCV64JavaThreadPDAccess();
+             } else {
+               try {
+                 access = (JavaThreadPDAccess)
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
+new file mode 100644
+index 00000000000..f2e224f28ee
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
+@@ -0,0 +1,134 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.linux_riscv64;
++
++import java.io.*;
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.runtime.riscv64.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++import sun.jvm.hotspot.utilities.Observable;
++import sun.jvm.hotspot.utilities.Observer;
++
++public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess {
++  private static AddressField  lastJavaFPField;
++  private static AddressField  osThreadField;
++
++  // Field from OSThread
++  private static CIntegerField osThreadThreadIDField;
++
++  // This is currently unneeded but is being kept in case we change
++  // the currentFrameGuess algorithm
++  private static final long GUESS_SCAN_RANGE = 128 * 1024;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaThread");
++    osThreadField           = type.getAddressField("_osthread");
++
++    Type anchorType = db.lookupType("JavaFrameAnchor");
++    lastJavaFPField         = anchorType.getAddressField("_last_Java_fp");
++
++    Type osThreadType = db.lookupType("OSThread");
++    osThreadThreadIDField   = osThreadType.getCIntegerField("_thread_id");
++  }
++
++  public Address getLastJavaFP(Address addr) {
++    return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
++  }
++
++  public Address getLastJavaPC(Address addr) {
++    return null;
++  }
++
++  public Address getBaseOfStackPointer(Address addr) {
++    return null;
++  }
++
++  public Frame getLastFramePD(JavaThread thread, Address addr) {
++    Address fp = thread.getLastJavaFP();
++    if (fp == null) {
++      return null; // no information
++    }
++    return new RISCV64Frame(thread.getLastJavaSP(), fp);
++  }
++
++  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
++    return new RISCV64RegisterMap(thread, updateMap);
++  }
++
++  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext();
++    RISCV64CurrentFrameGuess guesser = new RISCV64CurrentFrameGuess(context, thread);
++    if (!guesser.run(GUESS_SCAN_RANGE)) {
++      return null;
++    }
++    if (guesser.getPC() == null) {
++      return new RISCV64Frame(guesser.getSP(), guesser.getFP());
++    } else {
++      return new RISCV64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
++    }
++  }
++
++  public void printThreadIDOn(Address addr, PrintStream tty) {
++    tty.print(getThreadProxy(addr));
++  }
++
++  public void printInfoOn(Address threadAddr, PrintStream tty) {
++    tty.print("Thread id: ");
++    printThreadIDOn(threadAddr, tty);
++  }
++
++  public Address getLastSP(Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext();
++    return context.getRegisterAsAddress(RISCV64ThreadContext.SP);
++  }
++
++  public ThreadProxy getThreadProxy(Address addr) {
++    // Addr is the address of the JavaThread.
++    // Fetch the OSThread (for now and for simplicity, not making a
++    // separate "OSThread" class in this package)
++    Address osThreadAddr = osThreadField.getValue(addr);
++    // Get the address of the _thread_id from the OSThread
++    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
++
++    JVMDebugger debugger = VM.getVM().getDebugger();
++    return debugger.getThreadForIdentifierAddress(threadIdAddr);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
+new file mode 100644
+index 00000000000..34701c6922f
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
+@@ -0,0 +1,223 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.riscv64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.interpreter.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.runtime.riscv64.*;
++
++/** <P> Should be able to be used on all riscv64 platforms we support
++    (Linux/riscv64) to implement JavaThread's "currentFrameGuess()"
++    functionality. Input is an RISCV64ThreadContext; output is SP, FP,
++    and PC for an RISCV64Frame. Instantiation of the RISCV64Frame is
++    left to the caller, since we may need to subclass RISCV64Frame to
++    support signal handler frames on Unix platforms. </P>
++
++    <P> Algorithm is to walk up the stack within a given range (say,
++    512K at most) looking for a plausible PC and SP for a Java frame,
++    also considering those coming in from the context. If we find a PC
++    that belongs to the VM (i.e., in generated code like the
++    interpreter or CodeCache) then we try to find an associated FP.
++    We repeat this until we either find a complete frame or run out of
++    stack to look at. </P> */
++
++public class RISCV64CurrentFrameGuess {
++  private RISCV64ThreadContext context;
++  private JavaThread       thread;
++  private Address          spFound;
++  private Address          fpFound;
++  private Address          pcFound;
++
++  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.riscv64.RISCV64Frame.DEBUG")
++                                       != null;
++
++  public RISCV64CurrentFrameGuess(RISCV64ThreadContext context,
++                              JavaThread thread) {
++    this.context = context;
++    this.thread  = thread;
++  }
++
++  /** Returns false if not able to find a frame within a reasonable range. */
++  public boolean run(long regionInBytesToSearch) {
++    Address sp  = context.getRegisterAsAddress(RISCV64ThreadContext.SP);
++    Address pc  = context.getRegisterAsAddress(RISCV64ThreadContext.PC);
++    Address fp  = context.getRegisterAsAddress(RISCV64ThreadContext.FP);
++    if (sp == null) {
++      // Bail out if no last java frame either
++      if (thread.getLastJavaSP() != null) {
++        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++        return true;
++      }
++      return false;
++    }
++    Address end = sp.addOffsetTo(regionInBytesToSearch);
++    VM vm       = VM.getVM();
++
++    setValues(null, null, null); // Assume we're not going to find anything
++
++    if (vm.isJavaPCDbg(pc)) {
++      if (vm.isClientCompiler()) {
++        // If the topmost frame is a Java frame, we are (pretty much)
++        // guaranteed to have a viable FP. We should be more robust
++        // than this (we have the potential for losing entire threads'
++        // stack traces) but need to see how much work we really have
++        // to do here. Searching the stack for an (SP, FP) pair is
++        // hard since it's easy to misinterpret inter-frame stack
++        // pointers as base-of-frame pointers; we also don't know the
++        // sizes of C1 frames (not registered in the nmethod) so can't
++        // derive them from SP.
++
++        setValues(sp, fp, pc);
++        return true;
++      } else {
++        if (vm.getInterpreter().contains(pc)) {
++          if (DEBUG) {
++            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
++                               sp + ", fp = " + fp + ", pc = " + pc);
++          }
++          setValues(sp, fp, pc);
++          return true;
++        }
++
++        // For the server compiler, FP is not guaranteed to be valid
++        // for compiled code. In addition, an earlier attempt at a
++        // non-searching algorithm (see below) failed because the
++        // stack pointer from the thread context was pointing
++        // (considerably) beyond the ostensible end of the stack, into
++        // garbage; walking from the topmost frame back caused a crash.
++        //
++        // This algorithm takes the current PC as a given and tries to
++        // find the correct corresponding SP by walking up the stack
++        // and repeatedly performing stackwalks (very inefficient).
++        //
++        // FIXME: there is something wrong with stackwalking across
++        // adapter frames...this is likely to be the root cause of the
++        // failure with the simpler algorithm below.
++
++        for (long offset = 0;
++             offset < regionInBytesToSearch;
++             offset += vm.getAddressSize()) {
++          try {
++            Address curSP = sp.addOffsetTo(offset);
++            Frame frame = new RISCV64Frame(curSP, null, pc);
++            RegisterMap map = thread.newRegisterMap(false);
++            while (frame != null) {
++              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
++                // We were able to traverse all the way to the
++                // bottommost Java frame.
++                // This sp looks good. Keep it.
++                if (DEBUG) {
++                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
++                }
++                setValues(curSP, null, pc);
++                return true;
++              }
++              frame = frame.sender(map);
++            }
++          } catch (Exception e) {
++            if (DEBUG) {
++              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
++            }
++            // Bad SP. Try another.
++          }
++        }
++
++        // Were not able to find a plausible SP to go with this PC.
++        // Bail out.
++        return false;
++      }
++    } else {
++      // If the current program counter was not known to us as a Java
++      // PC, we currently assume that we are in the run-time system
++      // and attempt to look to thread-local storage for saved SP and
++      // FP. Note that if these are null (because we were, in fact,
++      // in Java code, i.e., vtable stubs or similar, and the SA
++      // didn't have enough insight into the target VM to understand
++      // that) then we are going to lose the entire stack trace for
++      // the thread, which is sub-optimal. FIXME.
++
++      if (DEBUG) {
++        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
++                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
++      }
++      if (thread.getLastJavaSP() == null) {
++        return false; // No known Java frames on stack
++      }
++
++      // The runtime has a nasty habit of not saving fp in the frame
++      // anchor, leaving us to grovel about in the stack to find a
++      // plausible address.  Fortunately, this only happens in
++      // compiled code; there we always have a valid PC, and we always
++      // push LR and FP onto the stack as a pair, with FP at the lower
++      // address.
++      pc = thread.getLastJavaPC();
++      fp = thread.getLastJavaFP();
++      sp = thread.getLastJavaSP();
++
++      if (fp == null) {
++        CodeCache cc = vm.getCodeCache();
++        if (cc.contains(pc)) {
++          CodeBlob cb = cc.findBlob(pc);
++          if (DEBUG) {
++            System.out.println("FP is null.  Found blob frame size " + cb.getFrameSize());
++          }
++          // See if we can derive a frame pointer from SP and PC
++          long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize();
++          if (link_offset >= 0) {
++            fp = sp.addOffsetTo(link_offset);
++          }
++        }
++      }
++
++      // We found a PC in the frame anchor. Check that it's plausible, and
++      // if it is, use it.
++      if (vm.isJavaPCDbg(pc)) {
++        setValues(sp, fp, pc);
++      } else {
++        setValues(sp, fp, null);
++      }
++
++      return true;
++    }
++  }
++
++  public Address getSP() { return spFound; }
++  public Address getFP() { return fpFound; }
++  /** May be null if getting values from thread-local storage; take
++      care to call the correct RISCV64Frame constructor to recover this if
++      necessary */
++  public Address getPC() { return pcFound; }
++
++  private void setValues(Address sp, Address fp, Address pc) {
++    spFound = sp;
++    fpFound = fp;
++    pcFound = pc;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
+new file mode 100644
+index 00000000000..df280005d72
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
+@@ -0,0 +1,556 @@
++/*
++ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Red Hat Inc.
++ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.riscv64;
++
++import java.util.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.compiler.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.oops.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++import sun.jvm.hotspot.utilities.Observable;
++import sun.jvm.hotspot.utilities.Observer;
++
++/** Specialization of and implementation of abstract methods of the
++    Frame class for the riscv64 family of CPUs. */
++
++public class RISCV64Frame extends Frame {
++  private static final boolean DEBUG;
++  static {
++    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.RISCV64.RISCV64Frame.DEBUG") != null;
++  }
++
++  // Java frames
++  private static final int LINK_OFFSET                =  -2;
++  private static final int RETURN_ADDR_OFFSET         =  -1;
++  private static final int SENDER_SP_OFFSET           =   0;
++
++  // Interpreter frames
++  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -3;
++  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
++  private static       int INTERPRETER_FRAME_MDX_OFFSET;         // Non-core builds only
++  private static       int INTERPRETER_FRAME_PADDING_OFFSET;
++  private static       int INTERPRETER_FRAME_MIRROR_OFFSET;
++  private static       int INTERPRETER_FRAME_CACHE_OFFSET;
++  private static       int INTERPRETER_FRAME_LOCALS_OFFSET;
++  private static       int INTERPRETER_FRAME_BCX_OFFSET;
++  private static       int INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET;
++  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
++
++  // Entry frames
++  private static       int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -10;
++
++  // Native frames
++  private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET =  2;
++
++  private static VMReg fp = new VMReg(8);
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    INTERPRETER_FRAME_MDX_OFFSET                  = INTERPRETER_FRAME_METHOD_OFFSET - 1;
++    INTERPRETER_FRAME_PADDING_OFFSET              = INTERPRETER_FRAME_MDX_OFFSET - 1;
++    INTERPRETER_FRAME_MIRROR_OFFSET               = INTERPRETER_FRAME_PADDING_OFFSET - 1;
++    INTERPRETER_FRAME_CACHE_OFFSET                = INTERPRETER_FRAME_MIRROR_OFFSET - 1;
++    INTERPRETER_FRAME_LOCALS_OFFSET               = INTERPRETER_FRAME_CACHE_OFFSET - 1;
++    INTERPRETER_FRAME_BCX_OFFSET                  = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
++    INTERPRETER_FRAME_INITIAL_SP_OFFSET           = INTERPRETER_FRAME_BCX_OFFSET - 1;
++    INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET    = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++    INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++  }
++
++
++  // an additional field beyond sp and pc:
++  Address raw_fp; // frame pointer
++  private Address raw_unextendedSP;
++
++  private RISCV64Frame() {
++  }
++
++  private void adjustForDeopt() {
++    if ( pc != null) {
++      // Look for a deopt pc and if it is deopted convert to original pc
++      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
++      if (cb != null && cb.isJavaMethod()) {
++        NMethod nm = (NMethod) cb;
++        if (pc.equals(nm.deoptHandlerBegin())) {
++          if (Assert.ASSERTS_ENABLED) {
++            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
++          }
++          // adjust pc if frame is deoptimized.
++          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
++          deoptimized = true;
++        }
++      }
++    }
++  }
++
++  public RISCV64Frame(Address raw_sp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("RISCV64Frame(sp, fp, pc): " + this);
++      dumpStack();
++    }
++  }
++
++  public RISCV64Frame(Address raw_sp, Address raw_fp) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++
++    // We cannot assume SP[-1] always contains a valid return PC (e.g. if
++    // the callee is a C/C++ compiled frame). If the PC is not known to
++    // Java then this.pc is null.
++    Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
++    if (VM.getVM().isJavaPCDbg(savedPC)) {
++      this.pc = savedPC;
++    }
++
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("RISCV64Frame(sp, fp): " + this);
++      dumpStack();
++    }
++  }
++
++  public RISCV64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_unextendedSp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("RISCV64Frame(sp, unextendedSP, fp, pc): " + this);
++      dumpStack();
++    }
++
++  }
++
++  public Object clone() {
++    RISCV64Frame frame = new RISCV64Frame();
++    frame.raw_sp = raw_sp;
++    frame.raw_unextendedSP = raw_unextendedSP;
++    frame.raw_fp = raw_fp;
++    frame.pc = pc;
++    frame.deoptimized = deoptimized;
++    return frame;
++  }
++
++  public boolean equals(Object arg) {
++    if (arg == null) {
++      return false;
++    }
++
++    if (!(arg instanceof RISCV64Frame)) {
++      return false;
++    }
++
++    RISCV64Frame other = (RISCV64Frame) arg;
++
++    return (AddressOps.equal(getSP(), other.getSP()) &&
++            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
++            AddressOps.equal(getFP(), other.getFP()) &&
++            AddressOps.equal(getPC(), other.getPC()));
++  }
++
++  public int hashCode() {
++    if (raw_sp == null) {
++      return 0;
++    }
++
++    return raw_sp.hashCode();
++  }
++
++  public String toString() {
++    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
++         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
++         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
++         ", pc: " + (pc == null? "null" : pc.toString());
++  }
++
++  // accessors for the instance variables
++  public Address getFP() { return raw_fp; }
++  public Address getSP() { return raw_sp; }
++  public Address getID() { return raw_sp; }
++
++  // FIXME: not implemented yet
++  public boolean isSignalHandlerFrameDbg() { return false; }
++  public int     getSignalNumberDbg()      { return 0;     }
++  public String  getSignalNameDbg()        { return null;  }
++
++  public boolean isInterpretedFrameValid() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
++    }
++
++    // These are reasonable sanity checks
++    if (getFP() == null || getFP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getSP() == null || getSP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
++      return false;
++    }
++
++    // These are hacks to keep us out of trouble.
++    // The problem with these is that they mask other problems
++    if (getFP().lessThanOrEqual(getSP())) {
++      // this attempts to deal with unsigned comparison above
++      return false;
++    }
++
++    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
++      // stack frames shouldn't be large.
++      return false;
++    }
++
++    return true;
++  }
++
++  public Frame sender(RegisterMap regMap, CodeBlob cb) {
++    RISCV64RegisterMap map = (RISCV64RegisterMap) regMap;
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // Default is we done have to follow them. The sender_for_xxx will
++    // update it accordingly
++    map.setIncludeArgumentOops(false);
++
++    if (isEntryFrame())       return senderForEntryFrame(map);
++    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
++
++    if(cb == null) {
++      cb = VM.getVM().getCodeCache().findBlob(getPC());
++    } else {
++      if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
++      }
++    }
++
++    if (cb != null) {
++      return senderForCompiledFrame(map, cb);
++    }
++
++    // Must be native-compiled frame, i.e. the marshaling code for native
++    // methods that exists in the core system.
++    return new RISCV64Frame(getSenderSP(), getLink(), getSenderPC());
++  }
++
++  private Frame senderForEntryFrame(RISCV64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForEntryFrame");
++    }
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++    // Java frame called from C; skip all C frames and return top C
++    // frame of that chunk as the sender
++    RISCV64JavaCallWrapper jcw = (RISCV64JavaCallWrapper) getEntryFrameCallWrapper();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
++      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
++    }
++    RISCV64Frame fr;
++    if (jcw.getLastJavaPC() != null) {
++      fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
++    } else {
++      fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
++    }
++    map.clear();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
++    }
++    return fr;
++  }
++
++  //------------------------------------------------------------------------------
++  // frame::adjust_unextended_sp
++  private void adjustUnextendedSP() {
++    // If we are returning to a compiled MethodHandle call site, the
++    // saved_fp will in fact be a saved value of the unextended SP.  The
++    // simplest way to tell whether we are returning to such a call site
++    // is as follows:
++
++    CodeBlob cb = cb();
++    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
++    if (senderNm != null) {
++      // If the sender PC is a deoptimization point, get the original
++      // PC.  For MethodHandle call site the unextended_sp is stored in
++      // saved_fp.
++      if (senderNm.isDeoptMhEntry(getPC())) {
++        raw_unextendedSP = getFP();
++      }
++      else if (senderNm.isDeoptEntry(getPC())) {
++      }
++      else if (senderNm.isMethodHandleReturn(getPC())) {
++        raw_unextendedSP = getFP();
++      }
++    }
++  }
++
++  private Frame senderForInterpreterFrame(RISCV64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForInterpreterFrame");
++    }
++    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++    Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
++    // We do not need to update the callee-save register mapping because above
++    // us is either another interpreter frame or a converter-frame, but never
++    // directly a compiled frame.
++    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
++    // However c2 no longer uses callee save register for java calls so there
++    // are no callee register to find.
++
++    if (map.getUpdateMap())
++      updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
++
++    return new RISCV64Frame(sp, unextendedSP, getLink(), getSenderPC());
++  }
++
++  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
++    map.setLocation(fp, savedFPAddr);
++  }
++
++  private Frame senderForCompiledFrame(RISCV64RegisterMap map, CodeBlob cb) {
++    if (DEBUG) {
++      System.out.println("senderForCompiledFrame");
++    }
++
++    //
++    // NOTE: some of this code is (unfortunately) duplicated  RISCV64CurrentFrameGuess
++    //
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // frame owned by optimizing compiler
++    if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
++    }
++    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
++
++    // The return_address is always the word on the stack
++    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
++
++    // This is the saved value of FP which may or may not really be an FP.
++    // It is only an FP if the sender is an interpreter frame.
++    Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize());
++
++    if (map.getUpdateMap()) {
++      // Tell GC to use argument oopmaps for some runtime stubs that need it.
++      // For C1, the runtime stub might not have oop maps, so set this flag
++      // outside of update_register_map.
++      map.setIncludeArgumentOops(cb.callerMustGCArguments());
++
++      if (cb.getOopMaps() != null) {
++        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
++      }
++
++      // Since the prolog does the save and restore of FP there is no oopmap
++      // for it so we must fill in its location as if there was an oopmap entry
++      // since if our caller was compiled code there could be live jvm state in it.
++      updateMapWithSavedLink(map, savedFPAddr);
++    }
++
++    return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
++  }
++
++  protected boolean hasSenderPD() {
++    return true;
++  }
++
++  public long frameSize() {
++    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
++  }
++
++    public Address getLink() {
++        try {
++            if (DEBUG) {
++                System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET)
++                        + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0));
++            }
++            return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
++        } catch (Exception e) {
++            if (DEBUG)
++                System.out.println("Returning null");
++            return null;
++        }
++    }
++
++  public Address getUnextendedSP() { return raw_unextendedSP; }
++
++  // Return address:
++  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
++  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
++
++  // return address of param, zero origin index.
++  public Address getNativeParamAddr(int idx) {
++    return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
++  }
++
++  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
++
++  public Address addressOfInterpreterFrameLocals() {
++    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
++  }
++
++  private Address addressOfInterpreterFrameBCX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
++  }
++
++  public int getInterpreterFrameBCI() {
++    // FIXME: this is not atomic with respect to GC and is unsuitable
++    // for use in a non-debugging, or reflective, system. Need to
++    // figure out how to express this.
++    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
++    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
++    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
++    return bcpToBci(bcp, method);
++  }
++
++  public Address addressOfInterpreterFrameMDX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
++  }
++
++  // expression stack
++  // (the max_stack arguments are used by the GC; see class FrameClosure)
++
++  public Address addressOfInterpreterFrameExpressionStack() {
++    Address monitorEnd = interpreterFrameMonitorEnd().address();
++    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
++  }
++
++  public int getInterpreterFrameExpressionStackDirection() { return -1; }
++
++  // top of expression stack
++  public Address addressOfInterpreterFrameTOS() {
++    return getSP();
++  }
++
++  /** Expression stack from top down */
++  public Address addressOfInterpreterFrameTOSAt(int slot) {
++    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
++  }
++
++  public Address getInterpreterFrameSenderSP() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "interpreted frame expected");
++    }
++    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++  }
++
++  // Monitors
++  public BasicObjectLock interpreterFrameMonitorBegin() {
++    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
++  }
++
++  public BasicObjectLock interpreterFrameMonitorEnd() {
++    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
++    if (Assert.ASSERTS_ENABLED) {
++      // make sure the pointer points inside the frame
++      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
++      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
++    }
++    return new BasicObjectLock(result);
++  }
++
++  public int interpreterFrameMonitorSize() {
++    return BasicObjectLock.size();
++  }
++
++  // Method
++  public Address addressOfInterpreterFrameMethod() {
++    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
++  }
++
++  // Constant pool cache
++  public Address addressOfInterpreterFrameCPCache() {
++    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
++  }
++
++  // Entry frames
++  public JavaCallWrapper getEntryFrameCallWrapper() {
++    return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
++  }
++
++  protected Address addressOfSavedOopResult() {
++    // offset is 2 for compiler2 and 3 for compiler1
++    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
++                               VM.getVM().getAddressSize());
++  }
++
++  protected Address addressOfSavedReceiver() {
++    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++  }
++
++  private void dumpStack() {
++    for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++         AddressOps.lt(addr, getSP());
++         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++      System.out.println(addr + ": " + addr.getAddressAt(0));
++    }
++    System.out.println("-----------------------");
++    for (Address addr = getSP();
++         AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
++         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++      System.out.println(addr + ": " + addr.getAddressAt(0));
++    }
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
+new file mode 100644
+index 00000000000..d0ad2b559a6
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
+@@ -0,0 +1,61 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.riscv64;
++
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.utilities.*;
++import sun.jvm.hotspot.utilities.Observable;
++import sun.jvm.hotspot.utilities.Observer;
++
++public class RISCV64JavaCallWrapper extends JavaCallWrapper {
++  private static AddressField lastJavaFPField;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaFrameAnchor");
++
++    lastJavaFPField  = type.getAddressField("_last_Java_fp");
++  }
++
++  public RISCV64JavaCallWrapper(Address addr) {
++    super(addr);
++  }
++
++  public Address getLastJavaFP() {
++    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
+new file mode 100644
+index 00000000000..4aeb1c6f557
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.riscv64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class RISCV64RegisterMap extends RegisterMap {
++
++  /** This is the only public constructor */
++  public RISCV64RegisterMap(JavaThread thread, boolean updateMap) {
++    super(thread, updateMap);
++  }
++
++  protected RISCV64RegisterMap(RegisterMap map) {
++    super(map);
++  }
++
++  public Object clone() {
++    RISCV64RegisterMap retval = new RISCV64RegisterMap(this);
++    return retval;
++  }
++
++  // no PD state to clear or copy:
++  protected void clearPD() {}
++  protected void initializePD() {}
++  protected void initializeFromPD(RegisterMap map) {}
++  protected Address getLocationPD(VMReg reg) { return null; }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+index 7d7a6107cab..6552ce255fc 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -54,7 +54,7 @@ public static String getOS() throws UnsupportedPlatformException {
+ 
+   public static boolean knownCPU(String cpu) {
+     final String[] KNOWN =
+-        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"};
++        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "riscv64"};
+ 
+     for(String s : KNOWN) {
+       if(s.equals(cpu))
+diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java
+index 7805918c28a..823b9f39dbf 100644
+--- a/test/hotspot/jtreg/compiler/c2/TestBit.java
++++ b/test/hotspot/jtreg/compiler/c2/TestBit.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -34,7 +34,7 @@
+  *
+  * @run driver compiler.c2.TestBit
+  *
+- * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le"
++ * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64"
+  * @requires vm.debug == true & vm.compiler2.enabled
+  */
+ public class TestBit {
+@@ -54,7 +54,8 @@ static void runTest(String testName) throws Exception {
+         String expectedTestBitInstruction =
+             "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" :
+             "aarch64".equals(System.getProperty("os.arch")) ? "tb"   :
+-            "amd64".equals(System.getProperty("os.arch"))   ? "test" : null;
++            "amd64".equals(System.getProperty("os.arch"))   ? "test" :
++            "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null;
+ 
+         if (expectedTestBitInstruction != null) {
+             output.shouldContain(expectedTestBitInstruction);
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
+index 558b4218f0b..55374b116e6 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -42,6 +42,7 @@
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
+ import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
+ 
+@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable {
+                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
+                 new GenericTestCaseForUnsupportedAArch64CPU(
+                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
++                new GenericTestCaseForUnsupportedRISCV64CPU(
++                        SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
+                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
+                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
+                 new GenericTestCaseForOtherCPU(
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
+index 3ed72bf0a99..8fb82ee4531 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -42,6 +42,7 @@
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
+ import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
+ 
+@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable {
+                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
+                 new GenericTestCaseForUnsupportedAArch64CPU(
+                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
++                new GenericTestCaseForUnsupportedRISCV64CPU(
++                        SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
+                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
+                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
+                 new GenericTestCaseForOtherCPU(
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
+index c05cf309dae..aca32137eda 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -42,6 +42,7 @@
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
+ import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
+ 
+@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable {
+                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
+                 new GenericTestCaseForUnsupportedAArch64CPU(
+                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
++                new GenericTestCaseForUnsupportedRISCV64CPU(
++                        SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
+                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
+                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
+                 new GenericTestCaseForOtherCPU(
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
+index 58ce5366bae..8deac4f7895 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -41,6 +41,7 @@
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
+ import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU;
+ 
+@@ -53,6 +54,8 @@ public static void main(String args[]) throws Throwable {
+                         SHAOptionsBase.USE_SHA_OPTION),
+                 new GenericTestCaseForUnsupportedAArch64CPU(
+                         SHAOptionsBase.USE_SHA_OPTION),
++                new GenericTestCaseForUnsupportedRISCV64CPU(
++                        SHAOptionsBase.USE_SHA_OPTION),
+                 new UseSHASpecificTestCaseForUnsupportedCPU(
+                         SHAOptionsBase.USE_SHA_OPTION),
+                 new GenericTestCaseForOtherCPU(
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+index faa9fdbae67..26635002040 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -32,26 +32,27 @@
+ 
+ /**
+  * Generic test case for SHA-related options targeted to any CPU except
+- * AArch64, PPC, S390x, SPARC and X86.
++ * AArch64, RISCV64, PPC, S390x, SPARC and X86.
+  */
+ public class GenericTestCaseForOtherCPU extends
+         SHAOptionsBase.TestCase {
+     public GenericTestCaseForOtherCPU(String optionName) {
+-        // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86.
++        // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, SPARC and X86.
+         super(optionName, new NotPredicate(
+                               new OrPredicate(Platform::isAArch64,
++                              new OrPredicate(Platform::isRISCV64,
+                               new OrPredicate(Platform::isS390x,
+                               new OrPredicate(Platform::isSparc,
+                               new OrPredicate(Platform::isPPC,
+                               new OrPredicate(Platform::isX64,
+-                                              Platform::isX86)))))));
++                                              Platform::isX86))))))));
+     }
+ 
+     @Override
+     protected void verifyWarnings() throws Throwable {
+         String shouldPassMessage = String.format("JVM should start with "
+                 + "option '%s' without any warnings", optionName);
+-        // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of
++        // Verify that on non-x86, non-RISCV64, non-SPARC and non-AArch64 CPU usage of
+         //  SHA-related options will not cause any warnings.
+         CommandLineOptionTest.verifySameJVMStartup(null,
+                 new String[] { ".*" + optionName + ".*" }, shouldPassMessage,
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
+new file mode 100644
+index 00000000000..2ecfec07a4c
+--- /dev/null
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
+@@ -0,0 +1,115 @@
++/*
++ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++package compiler.intrinsics.sha.cli.testcases;
++
++import compiler.intrinsics.sha.cli.DigestOptionsBase;
++import jdk.test.lib.process.ExitCode;
++import jdk.test.lib.Platform;
++import jdk.test.lib.cli.CommandLineOptionTest;
++import jdk.test.lib.cli.predicate.AndPredicate;
++import jdk.test.lib.cli.predicate.NotPredicate;
++
++/**
++ * Generic test case for SHA-related options targeted to RISCV64 CPUs
++ * which don't support instruction required by the tested option.
++ */
++public class GenericTestCaseForUnsupportedRISCV64CPU extends
++        DigestOptionsBase.TestCase {
++
++    final private boolean checkUseSHA;
++
++    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) {
++        this(optionName, true);
++    }
++
++    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) {
++        super(optionName, new AndPredicate(Platform::isRISCV64,
++                new NotPredicate(DigestOptionsBase.getPredicateForOption(
++                        optionName))));
++
++        this.checkUseSHA = checkUseSHA;
++    }
++
++    @Override
++    protected void verifyWarnings() throws Throwable {
++        String shouldPassMessage = String.format("JVM startup should pass with"
++                + "option '-XX:-%s' without any warnings", optionName);
++        //Verify that option could be disabled without any warnings.
++        CommandLineOptionTest.verifySameJVMStartup(null, new String[] {
++                        DigestOptionsBase.getWarningForUnsupportedCPU(optionName)
++                }, shouldPassMessage, shouldPassMessage, ExitCode.OK,
++                DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                CommandLineOptionTest.prepareBooleanFlag(optionName, false));
++
++        if (checkUseSHA) {
++            shouldPassMessage = String.format("If JVM is started with '-XX:-"
++                    + "%s' '-XX:+%s', output should contain warning.",
++                    DigestOptionsBase.USE_SHA_OPTION, optionName);
++
++            // Verify that when the tested option is enabled, then
++            // a warning will occur in VM output if UseSHA is disabled.
++            if (!optionName.equals(DigestOptionsBase.USE_SHA_OPTION)) {
++                CommandLineOptionTest.verifySameJVMStartup(
++                        new String[] { DigestOptionsBase.getWarningForUnsupportedCPU(optionName) },
++                        null,
++                        shouldPassMessage,
++                        shouldPassMessage,
++                        ExitCode.OK,
++                        DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                        CommandLineOptionTest.prepareBooleanFlag(DigestOptionsBase.USE_SHA_OPTION, false),
++                        CommandLineOptionTest.prepareBooleanFlag(optionName, true));
++            }
++        }
++    }
++
++    @Override
++    protected void verifyOptionValues() throws Throwable {
++        // Verify that option is disabled by default.
++        CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
++                String.format("Option '%s' should be disabled by default",
++                        optionName),
++                DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
++
++        if (checkUseSHA) {
++            // Verify that option is disabled even if it was explicitly enabled
++            // using CLI options.
++            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
++                    String.format("Option '%s' should be off on unsupported "
++                            + "RISCV64CPU even if set to true directly", optionName),
++                    DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                    CommandLineOptionTest.prepareBooleanFlag(optionName, true));
++
++            // Verify that option is disabled when +UseSHA was passed to JVM.
++            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
++                    String.format("Option '%s' should be off on unsupported "
++                            + "RISCV64CPU even if %s flag set to JVM",
++                            optionName, CommandLineOptionTest.prepareBooleanFlag(
++                                DigestOptionsBase.USE_SHA_OPTION, true)),
++                    DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                    CommandLineOptionTest.prepareBooleanFlag(
++                            DigestOptionsBase.USE_SHA_OPTION, true));
++        }
++    }
++}
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
+index 2e3e2717a65..7be8af6d035 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
+index 0e06a9e4327..797927b42bf 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
+index c3cdbf37464..be8f7d586c2 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
+index d33bd411f16..d96d5e29c00 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions
+  *      -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
+index 992fa4b5161..b09c873d05d 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8138583
+  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test
+- * @requires os.arch=="aarch64"
++ * @requires os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
+index 3e79b3528b7..fe40ed6f98d 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8138583
+  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test
+- * @requires os.arch=="aarch64"
++ * @requires os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
+index 6603dd224ef..51631910493 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8135028
+  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
+index d9a0c988004..d999ae423cf 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
+index 722db95aed3..65912a5c7fa 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
+index f58f21feb23..fffdc2f7565 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+index 7774dabcb5f..7afe3560f30 100644
+--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
++++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -61,15 +61,17 @@ public class IntrinsicPredicates {
+ 
+     public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null),
++              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null),
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha1" }, null),
+               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha1" }, null),
+               // x86 variants
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha" },  null),
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha" },  null),
+-                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null))))));
++                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null)))))));
+ 
+     public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256"       }, null),
++              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha256"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha256"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha256"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
+@@ -79,10 +81,11 @@ public class IntrinsicPredicates {
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
+-                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
++                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
+ 
+     public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512"       }, null),
++              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha512"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha512"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha512"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
+@@ -92,7 +95,7 @@ public class IntrinsicPredicates {
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
+-                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
++                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
+ 
+     public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE
+             = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE,
+diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
+index 57256aa5a32..d4d43b01ae6 100644
+--- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
++++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -112,7 +112,7 @@ public static void main(String args[]) throws Exception {
+             // It's ok for ARM not to have symbols, because it does not support NMT detail
+             // when targeting thumb2. It's also ok for Windows not to have symbols, because
+             // they are only available if the symbols file is included with the build.
+-            if (Platform.isWindows() || Platform.isARM()) {
++            if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) {
+                 return; // we are done
+             }
+             output.reportDiagnosticSummary();
+diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
+index 127bb6abcd9..eab19273ad8 100644
+--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
++++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -239,7 +239,7 @@ private static boolean isAlwaysSupportedPlatform() {
+         return Platform.isAix() ||
+             (Platform.isLinux() &&
+              (Platform.isPPC() || Platform.isS390x() || Platform.isX64() ||
+-              Platform.isX86())) ||
++              Platform.isX86() || Platform.isRISCV64())) ||
+             Platform.isOSX() ||
+             Platform.isSolaris();
+     }
+diff --git a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
+index 54640b245f8..f0b7aed5ceb 100644
+--- a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
++++ b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
+@@ -1,5 +1,4 @@
+ /*
+- * Copyright (c) 2018, Google and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+index 77458554b76..d4bfe31dd7a 100644
+--- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
++++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+@@ -45,7 +45,7 @@
+  */
+ public class TestMutuallyExclusivePlatformPredicates {
+     private static enum MethodGroup {
+-        ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
++        ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
+         BITNESS("is32bit", "is64bit"),
+         OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"),
+         VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"),
+diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
+index 7990c49a1f6..abeff80e5e8 100644
+--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
++++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -54,8 +54,8 @@ public static void main(String[] args) throws Throwable {
+             Events.assertField(event, "hwThreads").atLeast(1);
+             Events.assertField(event, "cores").atLeast(1);
+             Events.assertField(event, "sockets").atLeast(1);
+-            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390");
+-            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390");
++            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
++            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
+         }
+     }
+ }
+diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
+index f4ee0546c70..635cdd18054 100644
+--- a/test/lib/jdk/test/lib/Platform.java
++++ b/test/lib/jdk/test/lib/Platform.java
+@@ -202,6 +202,10 @@ public static boolean isARM() {
+         return isArch("arm.*");
+     }
+ 
++    public static boolean isRISCV64() {
++        return isArch("riscv64");
++    }
++
+     public static boolean isPPC() {
+         return isArch("ppc.*");
+     }
+
+From c51e546566c937354842a27696bd2221087101ae Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 28 Mar 2023 16:30:04 +0800
+Subject: [PATCH 002/140] Drop zgc part
+
+---
+ .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |   6 +-
+ .../riscv/gc/z/zBarrierSetAssembler_riscv.cpp | 441 ------------------
+ .../riscv/gc/z/zBarrierSetAssembler_riscv.hpp | 101 ----
+ src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp | 212 ---------
+ src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp |  36 --
+ src/hotspot/cpu/riscv/gc/z/z_riscv64.ad       | 233 ---------
+ .../cpu/riscv/macroAssembler_riscv.cpp        |  46 --
+ .../cpu/riscv/macroAssembler_riscv.hpp        |   9 -
+ src/hotspot/cpu/riscv/stubGenerator_riscv.cpp |  10 -
+ 9 files changed, 1 insertion(+), 1093 deletions(-)
+ delete mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
+ delete mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
+ delete mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
+ delete mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
+ delete mode 100644 src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+index 742c2126e60..bba3bd4709c 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+@@ -871,11 +871,7 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
+     if (UseCompressedOops && !wide) {
+       __ decode_heap_oop(dest->as_register());
+     }
+-
+-    if (!UseZGC) {
+-      // Load barrier has not yet been applied, so ZGC can't verify the oop here
+-      __ verify_oop(dest->as_register());
+-    }
++    __ verify_oop(dest->as_register());
+   }
+ }
+ 
+diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
+deleted file mode 100644
+index 3d3f4d4d774..00000000000
+--- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
++++ /dev/null
+@@ -1,441 +0,0 @@
+-/*
+- * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#include "precompiled.hpp"
+-#include "asm/macroAssembler.inline.hpp"
+-#include "code/codeBlob.hpp"
+-#include "code/vmreg.inline.hpp"
+-#include "gc/z/zBarrier.inline.hpp"
+-#include "gc/z/zBarrierSet.hpp"
+-#include "gc/z/zBarrierSetAssembler.hpp"
+-#include "gc/z/zBarrierSetRuntime.hpp"
+-#include "gc/z/zThreadLocalData.hpp"
+-#include "memory/resourceArea.hpp"
+-#include "runtime/sharedRuntime.hpp"
+-#include "utilities/macros.hpp"
+-#ifdef COMPILER1
+-#include "c1/c1_LIRAssembler.hpp"
+-#include "c1/c1_MacroAssembler.hpp"
+-#include "gc/z/c1/zBarrierSetC1.hpp"
+-#endif // COMPILER1
+-#ifdef COMPILER2
+-#include "gc/z/c2/zBarrierSetC2.hpp"
+-#endif // COMPILER2
+-
+-#ifdef PRODUCT
+-#define BLOCK_COMMENT(str) /* nothing */
+-#else
+-#define BLOCK_COMMENT(str) __ block_comment(str)
+-#endif
+-
+-#undef __
+-#define __ masm->
+-
+-void ZBarrierSetAssembler::load_at(MacroAssembler* masm,
+-                                   DecoratorSet decorators,
+-                                   BasicType type,
+-                                   Register dst,
+-                                   Address src,
+-                                   Register tmp1,
+-                                   Register tmp_thread) {
+-  if (!ZBarrierSet::barrier_needed(decorators, type)) {
+-    // Barrier not needed
+-    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+-    return;
+-  }
+-
+-  assert_different_registers(t1, src.base());
+-  assert_different_registers(t0, t1, dst);
+-
+-  Label done;
+-
+-  // Load bad mask into temp register.
+-  __ la(t0, src);
+-  __ ld(t1, address_bad_mask_from_thread(xthread));
+-  __ ld(dst, Address(t0));
+-
+-  // Test reference against bad mask. If mask bad, then we need to fix it up.
+-  __ andr(t1, dst, t1);
+-  __ beqz(t1, done);
+-
+-  __ enter();
+-
+-  __ push_call_clobbered_registers_except(RegSet::of(dst));
+-
+-  if (c_rarg0 != dst) {
+-    __ mv(c_rarg0, dst);
+-  }
+-
+-  __ mv(c_rarg1, t0);
+-
+-  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
+-
+-  // Make sure dst has the return value.
+-  if (dst != x10) {
+-    __ mv(dst, x10);
+-  }
+-
+-  __ pop_call_clobbered_registers_except(RegSet::of(dst));
+-  __ leave();
+-
+-  __ bind(done);
+-}
+-
+-#ifdef ASSERT
+-
+-void ZBarrierSetAssembler::store_at(MacroAssembler* masm,
+-                                    DecoratorSet decorators,
+-                                    BasicType type,
+-                                    Address dst,
+-                                    Register val,
+-                                    Register tmp1,
+-                                    Register tmp2) {
+-  // Verify value
+-  if (is_reference_type(type)) {
+-    // Note that src could be noreg, which means we
+-    // are storing null and can skip verification.
+-    if (val != noreg) {
+-      Label done;
+-
+-      // tmp1 and tmp2 are often set to noreg.
+-      RegSet savedRegs = RegSet::of(t0);
+-      __ push_reg(savedRegs, sp);
+-
+-      __ ld(t0, address_bad_mask_from_thread(xthread));
+-      __ andr(t0, val, t0);
+-      __ beqz(t0, done);
+-      __ stop("Verify oop store failed");
+-      __ should_not_reach_here();
+-      __ bind(done);
+-      __ pop_reg(savedRegs, sp);
+-    }
+-  }
+-
+-  // Store value
+-  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+-}
+-
+-#endif // ASSERT
+-
+-void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm,
+-                                              DecoratorSet decorators,
+-                                              bool is_oop,
+-                                              Register src,
+-                                              Register dst,
+-                                              Register count,
+-                                              RegSet saved_regs) {
+-  if (!is_oop) {
+-    // Barrier not needed
+-    return;
+-  }
+-
+-  BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {");
+-
+-  assert_different_registers(src, count, t0);
+-
+-  __ push_reg(saved_regs, sp);
+-
+-  if (count == c_rarg0 && src == c_rarg1) {
+-    // exactly backwards!!
+-    __ xorr(c_rarg0, c_rarg0, c_rarg1);
+-    __ xorr(c_rarg1, c_rarg0, c_rarg1);
+-    __ xorr(c_rarg0, c_rarg0, c_rarg1);
+-  } else {
+-    __ mv(c_rarg0, src);
+-    __ mv(c_rarg1, count);
+-  }
+-
+-  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2);
+-
+-  __ pop_reg(saved_regs, sp);
+-
+-  BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue");
+-}
+-
+-void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm,
+-                                                         Register jni_env,
+-                                                         Register robj,
+-                                                         Register tmp,
+-                                                         Label& slowpath) {
+-  BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {");
+-
+-  assert_different_registers(jni_env, robj, tmp);
+-
+-  // Resolve jobject
+-  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath);
+-
+-  // Compute the offset of address bad mask from the field of jni_environment
+-  long int bad_mask_relative_offset = (long int) (in_bytes(ZThreadLocalData::address_bad_mask_offset()) -
+-                                                  in_bytes(JavaThread::jni_environment_offset()));
+-
+-  // Load the address bad mask
+-  __ ld(tmp, Address(jni_env, bad_mask_relative_offset));
+-
+-  // Check address bad mask
+-  __ andr(tmp, robj, tmp);
+-  __ bnez(tmp, slowpath);
+-
+-  BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native");
+-}
+-
+-#ifdef COMPILER2
+-
+-OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
+-  if (!OptoReg::is_reg(opto_reg)) {
+-    return OptoReg::Bad;
+-  }
+-
+-  const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+-  if (vm_reg->is_FloatRegister()) {
+-    return opto_reg & ~1;
+-  }
+-
+-  return opto_reg;
+-}
+-
+-#undef __
+-#define __ _masm->
+-
+-class ZSaveLiveRegisters {
+-private:
+-  MacroAssembler* const _masm;
+-  RegSet                _gp_regs;
+-  FloatRegSet           _fp_regs;
+-  VectorRegSet          _vp_regs;
+-
+-public:
+-  void initialize(ZLoadBarrierStubC2* stub) {
+-    // Record registers that needs to be saved/restored
+-    RegMaskIterator rmi(stub->live());
+-    while (rmi.has_next()) {
+-      const OptoReg::Name opto_reg = rmi.next();
+-      if (OptoReg::is_reg(opto_reg)) {
+-        const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
+-        if (vm_reg->is_Register()) {
+-          _gp_regs += RegSet::of(vm_reg->as_Register());
+-        } else if (vm_reg->is_FloatRegister()) {
+-          _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
+-        } else if (vm_reg->is_VectorRegister()) {
+-          const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegisterImpl::max_slots_per_register - 1));
+-          _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister());
+-        } else {
+-          fatal("Unknown register type");
+-        }
+-      }
+-    }
+-
+-    // Remove C-ABI SOE registers, tmp regs and _ref register that will be updated
+-    _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2) + RegSet::of(x8, x9) + RegSet::of(x5, stub->ref());
+-  }
+-
+-  ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
+-      _masm(masm),
+-      _gp_regs(),
+-      _fp_regs(),
+-      _vp_regs() {
+-    // Figure out what registers to save/restore
+-    initialize(stub);
+-
+-    // Save registers
+-    __ push_reg(_gp_regs, sp);
+-    __ push_fp(_fp_regs, sp);
+-    __ push_vp(_vp_regs, sp);
+-  }
+-
+-  ~ZSaveLiveRegisters() {
+-    // Restore registers
+-    __ pop_vp(_vp_regs, sp);
+-    __ pop_fp(_fp_regs, sp);
+-    __ pop_reg(_gp_regs, sp);
+-  }
+-};
+-
+-class ZSetupArguments {
+-private:
+-  MacroAssembler* const _masm;
+-  const Register        _ref;
+-  const Address         _ref_addr;
+-
+-public:
+-  ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
+-      _masm(masm),
+-      _ref(stub->ref()),
+-      _ref_addr(stub->ref_addr()) {
+-
+-    // Setup arguments
+-    if (_ref_addr.base() == noreg) {
+-      // No self healing
+-      if (_ref != c_rarg0) {
+-        __ mv(c_rarg0, _ref);
+-      }
+-      __ mv(c_rarg1, zr);
+-    } else {
+-      // Self healing
+-      if (_ref == c_rarg0) {
+-        // _ref is already at correct place
+-        __ la(c_rarg1, _ref_addr);
+-      } else if (_ref != c_rarg1) {
+-        // _ref is in wrong place, but not in c_rarg1, so fix it first
+-        __ la(c_rarg1, _ref_addr);
+-        __ mv(c_rarg0, _ref);
+-      } else if (_ref_addr.base() != c_rarg0) {
+-        assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0");
+-        __ mv(c_rarg0, _ref);
+-        __ la(c_rarg1, _ref_addr);
+-      } else {
+-        assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0");
+-        if (_ref_addr.base() == c_rarg0) {
+-          __ mv(t1, c_rarg1);
+-          __ la(c_rarg1, _ref_addr);
+-          __ mv(c_rarg0, t1);
+-        } else {
+-          ShouldNotReachHere();
+-        }
+-      }
+-    }
+-  }
+-
+-  ~ZSetupArguments() {
+-    // Transfer result
+-    if (_ref != x10) {
+-      __ mv(_ref, x10);
+-    }
+-  }
+-};
+-
+-#undef __
+-#define __ masm->
+-
+-void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
+-  BLOCK_COMMENT("ZLoadBarrierStubC2");
+-
+-  // Stub entry
+-  __ bind(*stub->entry());
+-
+-  {
+-    ZSaveLiveRegisters save_live_registers(masm, stub);
+-    ZSetupArguments setup_arguments(masm, stub);
+-    int32_t offset = 0;
+-    __ la_patchable(t0, stub->slow_path(), offset);
+-    __ jalr(x1, t0, offset);
+-  }
+-
+-  // Stub exit
+-  __ j(*stub->continuation());
+-}
+-
+-#undef __
+-
+-#endif // COMPILER2
+-
+-#ifdef COMPILER1
+-#undef __
+-#define __ ce->masm()->
+-
+-void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce,
+-                                                         LIR_Opr ref) const {
+-  assert_different_registers(xthread, ref->as_register(), t1);
+-  __ ld(t1, address_bad_mask_from_thread(xthread));
+-  __ andr(t1, t1, ref->as_register());
+-}
+-
+-void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce,
+-                                                         ZLoadBarrierStubC1* stub) const {
+-  // Stub entry
+-  __ bind(*stub->entry());
+-
+-  Register ref = stub->ref()->as_register();
+-  Register ref_addr = noreg;
+-  Register tmp = noreg;
+-
+-  if (stub->tmp()->is_valid()) {
+-    // Load address into tmp register
+-    ce->leal(stub->ref_addr(), stub->tmp());
+-    ref_addr = tmp = stub->tmp()->as_pointer_register();
+-  } else {
+-    // Address already in register
+-    ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register();
+-  }
+-
+-  assert_different_registers(ref, ref_addr, noreg);
+-
+-  // Save x10 unless it is the result or tmp register
+-  // Set up SP to accomodate parameters and maybe x10.
+-  if (ref != x10 && tmp != x10) {
+-    __ sub(sp, sp, 32);
+-    __ sd(x10, Address(sp, 16));
+-  } else {
+-    __ sub(sp, sp, 16);
+-  }
+-
+-  // Setup arguments and call runtime stub
+-  ce->store_parameter(ref_addr, 1);
+-  ce->store_parameter(ref, 0);
+-
+-  __ far_call(stub->runtime_stub());
+-
+-  // Verify result
+-  __ verify_oop(x10, "Bad oop");
+-
+-
+-  // Move result into place
+-  if (ref != x10) {
+-    __ mv(ref, x10);
+-  }
+-
+-  // Restore x10 unless it is the result or tmp register
+-  if (ref != x10 && tmp != x10) {
+-    __ ld(x10, Address(sp, 16));
+-    __ add(sp, sp, 32);
+-  } else {
+-    __ add(sp, sp, 16);
+-  }
+-
+-  // Stub exit
+-  __ j(*stub->continuation());
+-}
+-
+-#undef __
+-#define __ sasm->
+-
+-void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
+-                                                                 DecoratorSet decorators) const {
+-  __ prologue("zgc_load_barrier stub", false);
+-
+-  __ push_call_clobbered_registers_except(RegSet::of(x10));
+-
+-  // Setup arguments
+-  __ load_parameter(0, c_rarg0);
+-  __ load_parameter(1, c_rarg1);
+-
+-  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
+-
+-  __ pop_call_clobbered_registers_except(RegSet::of(x10));
+-
+-  __ epilogue();
+-}
+-
+-#undef __
+-#endif // COMPILER1
+diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
+deleted file mode 100644
+index dc07ab635fe..00000000000
+--- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
++++ /dev/null
+@@ -1,101 +0,0 @@
+-/*
+- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#ifndef CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
+-#define CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
+-
+-#include "code/vmreg.hpp"
+-#include "oops/accessDecorators.hpp"
+-#ifdef COMPILER2
+-#include "opto/optoreg.hpp"
+-#endif // COMPILER2
+-
+-#ifdef COMPILER1
+-class LIR_Assembler;
+-class LIR_Opr;
+-class StubAssembler;
+-class ZLoadBarrierStubC1;
+-#endif // COMPILER1
+-
+-#ifdef COMPILER2
+-class Node;
+-class ZLoadBarrierStubC2;
+-#endif // COMPILER2
+-
+-class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
+-public:
+-  virtual void load_at(MacroAssembler* masm,
+-                       DecoratorSet decorators,
+-                       BasicType type,
+-                       Register dst,
+-                       Address src,
+-                       Register tmp1,
+-                       Register tmp_thread);
+-
+-#ifdef ASSERT
+-  virtual void store_at(MacroAssembler* masm,
+-                        DecoratorSet decorators,
+-                        BasicType type,
+-                        Address dst,
+-                        Register val,
+-                        Register tmp1,
+-                        Register tmp2);
+-#endif // ASSERT
+-
+-  virtual void arraycopy_prologue(MacroAssembler* masm,
+-                                  DecoratorSet decorators,
+-                                  bool is_oop,
+-                                  Register src,
+-                                  Register dst,
+-                                  Register count,
+-                                  RegSet saved_regs);
+-
+-  virtual void try_resolve_jobject_in_native(MacroAssembler* masm,
+-                                             Register jni_env,
+-                                             Register robj,
+-                                             Register tmp,
+-                                             Label& slowpath);
+-
+-#ifdef COMPILER1
+-  void generate_c1_load_barrier_test(LIR_Assembler* ce,
+-                                     LIR_Opr ref) const;
+-
+-  void generate_c1_load_barrier_stub(LIR_Assembler* ce,
+-                                     ZLoadBarrierStubC1* stub) const;
+-
+-  void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
+-                                             DecoratorSet decorators) const;
+-#endif // COMPILER1
+-
+-#ifdef COMPILER2
+-  OptoReg::Name refine_register(const Node* node,
+-                                OptoReg::Name opto_reg);
+-
+-  void generate_c2_load_barrier_stub(MacroAssembler* masm,
+-                                     ZLoadBarrierStubC2* stub) const;
+-#endif // COMPILER2
+-};
+-
+-#endif // CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
+deleted file mode 100644
+index d14997790af..00000000000
+--- a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
++++ /dev/null
+@@ -1,212 +0,0 @@
+-/*
+- * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#include "precompiled.hpp"
+-#include "gc/shared/gcLogPrecious.hpp"
+-#include "gc/shared/gc_globals.hpp"
+-#include "gc/z/zGlobals.hpp"
+-#include "runtime/globals.hpp"
+-#include "runtime/os.hpp"
+-#include "utilities/globalDefinitions.hpp"
+-#include "utilities/powerOfTwo.hpp"
+-
+-#ifdef LINUX
+-#include <sys/mman.h>
+-#endif // LINUX
+-
+-//
+-// The heap can have three different layouts, depending on the max heap size.
+-//
+-// Address Space & Pointer Layout 1
+-// --------------------------------
+-//
+-//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
+-//  .                                .
+-//  .                                .
+-//  .                                .
+-//  +--------------------------------+ 0x0000014000000000 (20TB)
+-//  |         Remapped View          |
+-//  +--------------------------------+ 0x0000010000000000 (16TB)
+-//  .                                .
+-//  +--------------------------------+ 0x00000c0000000000 (12TB)
+-//  |         Marked1 View           |
+-//  +--------------------------------+ 0x0000080000000000 (8TB)
+-//  |         Marked0 View           |
+-//  +--------------------------------+ 0x0000040000000000 (4TB)
+-//  .                                .
+-//  +--------------------------------+ 0x0000000000000000
+-//
+-//   6                  4 4  4 4
+-//   3                  6 5  2 1                                             0
+-//  +--------------------+----+-----------------------------------------------+
+-//  |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111|
+-//  +--------------------+----+-----------------------------------------------+
+-//  |                    |    |
+-//  |                    |    * 41-0 Object Offset (42-bits, 4TB address space)
+-//  |                    |
+-//  |                    * 45-42 Metadata Bits (4-bits)  0001 = Marked0      (Address view 4-8TB)
+-//  |                                                    0010 = Marked1      (Address view 8-12TB)
+-//  |                                                    0100 = Remapped     (Address view 16-20TB)
+-//  |                                                    1000 = Finalizable  (Address view N/A)
+-//  |
+-//  * 63-46 Fixed (18-bits, always zero)
+-//
+-//
+-// Address Space & Pointer Layout 2
+-// --------------------------------
+-//
+-//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
+-//  .                                .
+-//  .                                .
+-//  .                                .
+-//  +--------------------------------+ 0x0000280000000000 (40TB)
+-//  |         Remapped View          |
+-//  +--------------------------------+ 0x0000200000000000 (32TB)
+-//  .                                .
+-//  +--------------------------------+ 0x0000180000000000 (24TB)
+-//  |         Marked1 View           |
+-//  +--------------------------------+ 0x0000100000000000 (16TB)
+-//  |         Marked0 View           |
+-//  +--------------------------------+ 0x0000080000000000 (8TB)
+-//  .                                .
+-//  +--------------------------------+ 0x0000000000000000
+-//
+-//   6                 4 4  4 4
+-//   3                 7 6  3 2                                              0
+-//  +------------------+-----+------------------------------------------------+
+-//  |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111|
+-//  +-------------------+----+------------------------------------------------+
+-//  |                   |    |
+-//  |                   |    * 42-0 Object Offset (43-bits, 8TB address space)
+-//  |                   |
+-//  |                   * 46-43 Metadata Bits (4-bits)  0001 = Marked0      (Address view 8-16TB)
+-//  |                                                   0010 = Marked1      (Address view 16-24TB)
+-//  |                                                   0100 = Remapped     (Address view 32-40TB)
+-//  |                                                   1000 = Finalizable  (Address view N/A)
+-//  |
+-//  * 63-47 Fixed (17-bits, always zero)
+-//
+-//
+-// Address Space & Pointer Layout 3
+-// --------------------------------
+-//
+-//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
+-//  .                                .
+-//  .                                .
+-//  .                                .
+-//  +--------------------------------+ 0x0000500000000000 (80TB)
+-//  |         Remapped View          |
+-//  +--------------------------------+ 0x0000400000000000 (64TB)
+-//  .                                .
+-//  +--------------------------------+ 0x0000300000000000 (48TB)
+-//  |         Marked1 View           |
+-//  +--------------------------------+ 0x0000200000000000 (32TB)
+-//  |         Marked0 View           |
+-//  +--------------------------------+ 0x0000100000000000 (16TB)
+-//  .                                .
+-//  +--------------------------------+ 0x0000000000000000
+-//
+-//   6               4  4  4 4
+-//   3               8  7  4 3                                               0
+-//  +------------------+----+-------------------------------------------------+
+-//  |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111|
+-//  +------------------+----+-------------------------------------------------+
+-//  |                  |    |
+-//  |                  |    * 43-0 Object Offset (44-bits, 16TB address space)
+-//  |                  |
+-//  |                  * 47-44 Metadata Bits (4-bits)  0001 = Marked0      (Address view 16-32TB)
+-//  |                                                  0010 = Marked1      (Address view 32-48TB)
+-//  |                                                  0100 = Remapped     (Address view 64-80TB)
+-//  |                                                  1000 = Finalizable  (Address view N/A)
+-//  |
+-//  * 63-48 Fixed (16-bits, always zero)
+-//
+-
+-// Default value if probing is not implemented for a certain platform: 128TB
+-static const size_t DEFAULT_MAX_ADDRESS_BIT = 47;
+-// Minimum value returned, if probing fails: 64GB
+-static const size_t MINIMUM_MAX_ADDRESS_BIT = 36;
+-
+-static size_t probe_valid_max_address_bit() {
+-#ifdef LINUX
+-  size_t max_address_bit = 0;
+-  const size_t page_size = os::vm_page_size();
+-  for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) {
+-    const uintptr_t base_addr = ((uintptr_t) 1U) << i;
+-    if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) {
+-      // msync suceeded, the address is valid, and maybe even already mapped.
+-      max_address_bit = i;
+-      break;
+-    }
+-    if (errno != ENOMEM) {
+-      // Some error occured. This should never happen, but msync
+-      // has some undefined behavior, hence ignore this bit.
+-#ifdef ASSERT
+-      fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
+-#else // ASSERT
+-      log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
+-#endif // ASSERT
+-      continue;
+-    }
+-    // Since msync failed with ENOMEM, the page might not be mapped.
+-    // Try to map it, to see if the address is valid.
+-    void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
+-    if (result_addr != MAP_FAILED) {
+-      munmap(result_addr, page_size);
+-    }
+-    if ((uintptr_t) result_addr == base_addr) {
+-      // address is valid
+-      max_address_bit = i;
+-      break;
+-    }
+-  }
+-  if (max_address_bit == 0) {
+-    // probing failed, allocate a very high page and take that bit as the maximum
+-    const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT;
+-    void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
+-    if (result_addr != MAP_FAILED) {
+-      max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1;
+-      munmap(result_addr, page_size);
+-    }
+-  }
+-  log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit);
+-  return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT);
+-#else // LINUX
+-  return DEFAULT_MAX_ADDRESS_BIT;
+-#endif // LINUX
+-}
+-
+-size_t ZPlatformAddressOffsetBits() {
+-  const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
+-  const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
+-  const size_t min_address_offset_bits = max_address_offset_bits - 2;
+-  const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
+-  const size_t address_offset_bits = log2i_exact(address_offset);
+-  return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
+-}
+-
+-size_t ZPlatformAddressMetadataShift() {
+-  return ZPlatformAddressOffsetBits();
+-}
+diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
+deleted file mode 100644
+index f20ecd9b073..00000000000
+--- a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
++++ /dev/null
+@@ -1,36 +0,0 @@
+-/*
+- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#ifndef CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
+-#define CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
+-
+-const size_t ZPlatformGranuleSizeShift = 21; // 2MB
+-const size_t ZPlatformHeapViews        = 3;
+-const size_t ZPlatformCacheLineSize    = 64;
+-
+-size_t ZPlatformAddressOffsetBits();
+-size_t ZPlatformAddressMetadataShift();
+-
+-#endif // CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
+deleted file mode 100644
+index 6b6f87814a5..00000000000
+--- a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
++++ /dev/null
+@@ -1,233 +0,0 @@
+-//
+-// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
+-// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+-// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+-//
+-// This code is free software; you can redistribute it and/or modify it
+-// under the terms of the GNU General Public License version 2 only, as
+-// published by the Free Software Foundation.
+-//
+-// This code is distributed in the hope that it will be useful, but WITHOUT
+-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+-// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+-// version 2 for more details (a copy is included in the LICENSE file that
+-// accompanied this code).
+-//
+-// You should have received a copy of the GNU General Public License version
+-// 2 along with this work; if not, write to the Free Software Foundation,
+-// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+-//
+-// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+-// or visit www.oracle.com if you need additional information or have any
+-// questions.
+-//
+-
+-source_hpp %{
+-
+-#include "gc/shared/gc_globals.hpp"
+-#include "gc/z/c2/zBarrierSetC2.hpp"
+-#include "gc/z/zThreadLocalData.hpp"
+-
+-%}
+-
+-source %{
+-
+-static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) {
+-  if (barrier_data == ZLoadBarrierElided) {
+-    return;
+-  }
+-  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data);
+-  __ ld(tmp, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
+-  __ andr(tmp, tmp, ref);
+-  __ bnez(tmp, *stub->entry(), true /* far */);
+-  __ bind(*stub->continuation());
+-}
+-
+-static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
+-  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong);
+-  __ j(*stub->entry());
+-  __ bind(*stub->continuation());
+-}
+-
+-%}
+-
+-// Load Pointer
+-instruct zLoadP(iRegPNoSp dst, memory mem)
+-%{
+-  match(Set dst (LoadP mem));
+-  predicate(UseZGC && (n->as_Load()->barrier_data() != 0));
+-  effect(TEMP dst);
+-
+-  ins_cost(4 * DEFAULT_COST);
+-
+-  format %{ "ld  $dst, $mem, #@zLoadP" %}
+-
+-  ins_encode %{
+-    const Address ref_addr (as_Register($mem$$base), $mem$$disp);
+-    __ ld($dst$$Register, ref_addr);
+-    z_load_barrier(_masm, this, ref_addr, $dst$$Register, t0 /* tmp */, barrier_data());
+-  %}
+-
+-  ins_pipe(iload_reg_mem);
+-%}
+-
+-instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
+-  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+-  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+-  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
+-  effect(KILL cr, TEMP_DEF res);
+-
+-  ins_cost(2 * VOLATILE_REF_COST);
+-
+-  format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapP\n\t"
+-            "mv $res, $res == $oldval" %}
+-
+-  ins_encode %{
+-    Label failed;
+-    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+-    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+-               Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
+-               true /* result_as_bool */);
+-    __ beqz($res$$Register, failed);
+-    __ mv(t0, $oldval$$Register);
+-    __ bind(failed);
+-    if (barrier_data() != ZLoadBarrierElided) {
+-      Label good;
+-      __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */);
+-      __ andr(t1, t1, t0);
+-      __ beqz(t1, good);
+-      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */);
+-      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+-                 Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
+-                 true /* result_as_bool */);
+-      __ bind(good);
+-    }
+-  %}
+-
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
+-  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+-  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+-  predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
+-  effect(KILL cr, TEMP_DEF res);
+-
+-  ins_cost(2 * VOLATILE_REF_COST);
+-
+-  format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapPAcq\n\t"
+-            "mv $res, $res == $oldval" %}
+-
+-  ins_encode %{
+-    Label failed;
+-    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+-    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+-               Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
+-               true /* result_as_bool */);
+-    __ beqz($res$$Register, failed);
+-    __ mv(t0, $oldval$$Register);
+-    __ bind(failed);
+-    if (barrier_data() != ZLoadBarrierElided) {
+-      Label good;
+-      __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */);
+-      __ andr(t1, t1, t0);
+-      __ beqz(t1, good);
+-      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */);
+-      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+-                 Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
+-                 true /* result_as_bool */);
+-      __ bind(good);
+-    }
+-  %}
+-
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{
+-  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+-  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
+-  effect(TEMP_DEF res);
+-
+-  ins_cost(2 * VOLATILE_REF_COST);
+-
+-  format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangeP" %}
+-
+-  ins_encode %{
+-    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+-    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+-               Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
+-    if (barrier_data() != ZLoadBarrierElided) {
+-      Label good;
+-      __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
+-      __ andr(t0, t0, $res$$Register);
+-      __ beqz(t0, good);
+-      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */);
+-      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+-                 Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
+-      __ bind(good);
+-    }
+-  %}
+-
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{
+-  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+-  predicate(UseZGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
+-  effect(TEMP_DEF res);
+-
+-  ins_cost(2 * VOLATILE_REF_COST);
+-
+-  format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangePAcq" %}
+-
+-  ins_encode %{
+-    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+-    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+-               Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
+-    if (barrier_data() != ZLoadBarrierElided) {
+-      Label good;
+-      __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
+-      __ andr(t0, t0, $res$$Register);
+-      __ beqz(t0, good);
+-      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */);
+-      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
+-                 Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
+-      __ bind(good);
+-    }
+-  %}
+-
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
+-  match(Set prev (GetAndSetP mem newv));
+-  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
+-  effect(TEMP_DEF prev, KILL cr);
+-
+-  ins_cost(2 * VOLATILE_REF_COST);
+-
+-  format %{ "atomic_xchg  $prev, $newv, [$mem], #@zGetAndSetP" %}
+-
+-  ins_encode %{
+-    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
+-    z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data());
+-  %}
+-
+-  ins_pipe(pipe_serial);
+-%}
+-
+-instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
+-  match(Set prev (GetAndSetP mem newv));
+-  predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0));
+-  effect(TEMP_DEF prev, KILL cr);
+-
+-  ins_cost(VOLATILE_REF_COST);
+-
+-  format %{ "atomic_xchg_acq  $prev, $newv, [$mem], #@zGetAndSetPAcq" %}
+-
+-  ins_encode %{
+-    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
+-    z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data());
+-  %}
+-  ins_pipe(pipe_serial);
+-%}
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index 86710295444..9d2cc4cf89f 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -1046,52 +1046,6 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
+   return count;
+ }
+ 
+-#ifdef COMPILER2
+-int MacroAssembler::push_vp(unsigned int bitset, Register stack) {
+-  CompressibleRegion cr(this);
+-  int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+-
+-  // Scan bitset to accumulate register pairs
+-  unsigned char regs[32];
+-  int count = 0;
+-  for (int reg = 31; reg >= 0; reg--) {
+-    if ((1U << 31) & bitset) {
+-      regs[count++] = reg;
+-    }
+-    bitset <<= 1;
+-  }
+-
+-  for (int i = 0; i < count; i++) {
+-    sub(stack, stack, vector_size_in_bytes);
+-    vs1r_v(as_VectorRegister(regs[i]), stack);
+-  }
+-
+-  return count * vector_size_in_bytes / wordSize;
+-}
+-
+-int MacroAssembler::pop_vp(unsigned int bitset, Register stack) {
+-  CompressibleRegion cr(this);
+-  int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+-
+-  // Scan bitset to accumulate register pairs
+-  unsigned char regs[32];
+-  int count = 0;
+-  for (int reg = 31; reg >= 0; reg--) {
+-    if ((1U << 31) & bitset) {
+-      regs[count++] = reg;
+-    }
+-    bitset <<= 1;
+-  }
+-
+-  for (int i = count - 1; i >= 0; i--) {
+-    vl1r_v(as_VectorRegister(regs[i]), stack);
+-    add(stack, stack, vector_size_in_bytes);
+-  }
+-
+-  return count * vector_size_in_bytes / wordSize;
+-}
+-#endif // COMPILER2
+-
+ void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
+   CompressibleRegion cr(this);
+   // Push integer registers x7, x10-x17, x28-x31.
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index 23e09475be1..b2f0455a1f1 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -484,12 +484,6 @@ class MacroAssembler: public Assembler {
+   void pop_reg(Register Rd);
+   int  push_reg(unsigned int bitset, Register stack);
+   int  pop_reg(unsigned int bitset, Register stack);
+-  void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
+-  void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
+-#ifdef COMPILER2
+-  void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); }
+-  void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); }
+-#endif // COMPILER2
+ 
+   // Push and pop everything that might be clobbered by a native
+   // runtime call except t0 and t1. (They are always
+@@ -783,9 +777,6 @@ class MacroAssembler: public Assembler {
+   int push_fp(unsigned int bitset, Register stack);
+   int pop_fp(unsigned int bitset, Register stack);
+ 
+-  int push_vp(unsigned int bitset, Register stack);
+-  int pop_vp(unsigned int bitset, Register stack);
+-
+   // vext
+   void vmnot_m(VectorRegister vd, VectorRegister vs);
+   void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
+diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+index b3fdd04db1b..b05edf7172c 100644
+--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+@@ -546,16 +546,6 @@ class StubGenerator: public StubCodeGenerator {
+     // make sure object is 'reasonable'
+     __ beqz(x10, exit); // if obj is NULL it is OK
+ 
+-#if INCLUDE_ZGC
+-    if (UseZGC) {
+-      // Check if mask is good.
+-      // verifies that ZAddressBadMask & x10 == 0
+-      __ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
+-      __ andr(c_rarg2, x10, c_rarg3);
+-      __ bnez(c_rarg2, error);
+-    }
+-#endif
+-
+     // Check if the oop is in the right area of memory
+     __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask());
+     __ andr(c_rarg2, x10, c_rarg3);
+
+From 7772140df96747b42b13007d0827fc21d2a8b926 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Mon, 27 Mar 2023 15:43:39 +0800
+Subject: [PATCH 003/140] Drop the C2 Vector part
+
+---
+ make/hotspot/gensrc/GensrcAdlc.gmk            |    1 -
+ .../cpu/riscv/c2_MacroAssembler_riscv.cpp     |  325 ---
+ .../cpu/riscv/c2_MacroAssembler_riscv.hpp     |   52 -
+ src/hotspot/cpu/riscv/globals_riscv.hpp       |    8 +-
+ .../cpu/riscv/macroAssembler_riscv.cpp        |   22 +-
+ .../cpu/riscv/macroAssembler_riscv.hpp        |    4 +-
+ src/hotspot/cpu/riscv/matcher_riscv.hpp       |   44 +-
+ src/hotspot/cpu/riscv/register_riscv.cpp      |    5 -
+ src/hotspot/cpu/riscv/register_riscv.hpp      |    4 +-
+ src/hotspot/cpu/riscv/riscv.ad                |  476 +---
+ src/hotspot/cpu/riscv/riscv_v.ad              | 2065 -----------------
+ src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |   61 +-
+ src/hotspot/cpu/riscv/stubGenerator_riscv.cpp |  110 -
+ src/hotspot/cpu/riscv/vm_version_riscv.cpp    |    4 -
+ src/hotspot/cpu/riscv/vmreg_riscv.cpp         |   10 +-
+ src/hotspot/cpu/riscv/vmreg_riscv.hpp         |   17 +-
+ 16 files changed, 41 insertions(+), 3167 deletions(-)
+ delete mode 100644 src/hotspot/cpu/riscv/riscv_v.ad
+
+diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
+index 67f4c6f0574..51137b99db2 100644
+--- a/make/hotspot/gensrc/GensrcAdlc.gmk
++++ b/make/hotspot/gensrc/GensrcAdlc.gmk
+@@ -152,7 +152,6 @@ ifeq ($(call check-jvm-feature, compiler2), true)
+ 
+   ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv)
+     AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
+-        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \
+         $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \
+     )))
+   endif
+diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+index 27770dc17aa..73f84a724ca 100644
+--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+@@ -1319,328 +1319,3 @@ void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRe
+ 
+   bind(Done);
+ }
+-
+-void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
+-                                        VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) {
+-  Label loop;
+-  Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16;
+-
+-  bind(loop);
+-  vsetvli(tmp1, cnt, sew, Assembler::m2);
+-  vlex_v(vr1, a1, sew);
+-  vlex_v(vr2, a2, sew);
+-  vmsne_vv(vrs, vr1, vr2);
+-  vfirst_m(tmp2, vrs);
+-  bgez(tmp2, DONE);
+-  sub(cnt, cnt, tmp1);
+-  if (!islatin) {
+-    slli(tmp1, tmp1, 1); // get byte counts
+-  }
+-  add(a1, a1, tmp1);
+-  add(a2, a2, tmp1);
+-  bnez(cnt, loop);
+-
+-  mv(result, true);
+-}
+-
+-void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) {
+-  Label DONE;
+-  Register tmp1 = t0;
+-  Register tmp2 = t1;
+-
+-  BLOCK_COMMENT("string_equals_v {");
+-
+-  mv(result, false);
+-
+-  if (elem_size == 2) {
+-    srli(cnt, cnt, 1);
+-  }
+-
+-  element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
+-
+-  bind(DONE);
+-  BLOCK_COMMENT("} string_equals_v");
+-}
+-
+-// used by C2 ClearArray patterns.
+-// base: Address of a buffer to be zeroed
+-// cnt: Count in HeapWords
+-//
+-// base, cnt, v0, v1 and t0 are clobbered.
+-void C2_MacroAssembler::clear_array_v(Register base, Register cnt) {
+-  Label loop;
+-
+-  // making zero words
+-  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
+-  vxor_vv(v0, v0, v0);
+-
+-  bind(loop);
+-  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
+-  vse64_v(v0, base);
+-  sub(cnt, cnt, t0);
+-  shadd(base, t0, base, t0, 3);
+-  bnez(cnt, loop);
+-}
+-
+-void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result,
+-                                        Register cnt1, int elem_size) {
+-  Label DONE;
+-  Register tmp1 = t0;
+-  Register tmp2 = t1;
+-  Register cnt2 = tmp2;
+-  int length_offset = arrayOopDesc::length_offset_in_bytes();
+-  int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
+-
+-  BLOCK_COMMENT("arrays_equals_v {");
+-
+-  // if (a1 == a2), return true
+-  mv(result, true);
+-  beq(a1, a2, DONE);
+-
+-  mv(result, false);
+-  // if a1 == null or a2 == null, return false
+-  beqz(a1, DONE);
+-  beqz(a2, DONE);
+-  // if (a1.length != a2.length), return false
+-  lwu(cnt1, Address(a1, length_offset));
+-  lwu(cnt2, Address(a2, length_offset));
+-  bne(cnt1, cnt2, DONE);
+-
+-  la(a1, Address(a1, base_offset));
+-  la(a2, Address(a2, base_offset));
+-
+-  element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
+-
+-  bind(DONE);
+-
+-  BLOCK_COMMENT("} arrays_equals_v");
+-}
+-
+-void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2,
+-                                         Register result, Register tmp1, Register tmp2, int encForm) {
+-  Label DIFFERENCE, DONE, L, loop;
+-  bool encLL = encForm == StrIntrinsicNode::LL;
+-  bool encLU = encForm == StrIntrinsicNode::LU;
+-  bool encUL = encForm == StrIntrinsicNode::UL;
+-
+-  bool str1_isL = encLL || encLU;
+-  bool str2_isL = encLL || encUL;
+-
+-  int minCharsInWord = encLL ? wordSize : wordSize / 2;
+-
+-  BLOCK_COMMENT("string_compare {");
+-
+-  // for Lating strings, 1 byte for 1 character
+-  // for UTF16 strings, 2 bytes for 1 character
+-  if (!str1_isL)
+-    sraiw(cnt1, cnt1, 1);
+-  if (!str2_isL)
+-    sraiw(cnt2, cnt2, 1);
+-
+-  // if str1 == str2, return the difference
+-  // save the minimum of the string lengths in cnt2.
+-  sub(result, cnt1, cnt2);
+-  bgt(cnt1, cnt2, L);
+-  mv(cnt2, cnt1);
+-  bind(L);
+-
+-  if (str1_isL == str2_isL) { // LL or UU
+-    element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE);
+-    j(DONE);
+-  } else { // LU or UL
+-    Register strL = encLU ? str1 : str2;
+-    Register strU = encLU ? str2 : str1;
+-    VectorRegister vstr1 = encLU ? v4 : v0;
+-    VectorRegister vstr2 = encLU ? v0 : v4;
+-
+-    bind(loop);
+-    vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2);
+-    vle8_v(vstr1, strL);
+-    vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4);
+-    vzext_vf2(vstr2, vstr1);
+-    vle16_v(vstr1, strU);
+-    vmsne_vv(v0, vstr2, vstr1);
+-    vfirst_m(tmp2, v0);
+-    bgez(tmp2, DIFFERENCE);
+-    sub(cnt2, cnt2, tmp1);
+-    add(strL, strL, tmp1);
+-    shadd(strU, tmp1, strU, tmp1, 1);
+-    bnez(cnt2, loop);
+-    j(DONE);
+-  }
+-  bind(DIFFERENCE);
+-  slli(tmp1, tmp2, 1);
+-  add(str1, str1, str1_isL ? tmp2 : tmp1);
+-  add(str2, str2, str2_isL ? tmp2 : tmp1);
+-  str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0));
+-  str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0));
+-  sub(result, tmp1, tmp2);
+-
+-  bind(DONE);
+-}
+-
+-void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) {
+-  Label loop;
+-  assert_different_registers(src, dst, len, tmp, t0);
+-
+-  BLOCK_COMMENT("byte_array_inflate_v {");
+-  bind(loop);
+-  vsetvli(tmp, len, Assembler::e8, Assembler::m2);
+-  vle8_v(v2, src);
+-  vsetvli(t0, len, Assembler::e16, Assembler::m4);
+-  vzext_vf2(v0, v2);
+-  vse16_v(v0, dst);
+-  sub(len, len, tmp);
+-  add(src, src, tmp);
+-  shadd(dst, tmp, dst, tmp, 1);
+-  bnez(len, loop);
+-  BLOCK_COMMENT("} byte_array_inflate_v");
+-}
+-
+-// Compress char[] array to byte[].
+-// result: the array length if every element in array can be encoded; 0, otherwise.
+-void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) {
+-  Label done;
+-  encode_iso_array_v(src, dst, len, result, tmp);
+-  beqz(len, done);
+-  mv(result, zr);
+-  bind(done);
+-}
+-
+-// result: the number of elements had been encoded.
+-void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) {
+-  Label loop, DIFFERENCE, DONE;
+-
+-  BLOCK_COMMENT("encode_iso_array_v {");
+-  mv(result, 0);
+-
+-  bind(loop);
+-  mv(tmp, 0xff);
+-  vsetvli(t0, len, Assembler::e16, Assembler::m2);
+-  vle16_v(v2, src);
+-  // if element > 0xff, stop
+-  vmsgtu_vx(v1, v2, tmp);
+-  vfirst_m(tmp, v1);
+-  vmsbf_m(v0, v1);
+-  // compress char to byte
+-  vsetvli(t0, len, Assembler::e8);
+-  vncvt_x_x_w(v1, v2, Assembler::v0_t);
+-  vse8_v(v1, dst, Assembler::v0_t);
+-
+-  bgez(tmp, DIFFERENCE);
+-  add(result, result, t0);
+-  add(dst, dst, t0);
+-  sub(len, len, t0);
+-  shadd(src, t0, src, t0, 1);
+-  bnez(len, loop);
+-  j(DONE);
+-
+-  bind(DIFFERENCE);
+-  add(result, result, tmp);
+-
+-  bind(DONE);
+-  BLOCK_COMMENT("} encode_iso_array_v");
+-}
+-
+-void C2_MacroAssembler::count_positives_v(Register ary, Register len, Register result, Register tmp) {
+-  Label LOOP, SET_RESULT, DONE;
+-
+-  BLOCK_COMMENT("count_positives_v {");
+-  mv(result, zr);
+-
+-  bind(LOOP);
+-  vsetvli(t0, len, Assembler::e8, Assembler::m4);
+-  vle8_v(v0, ary);
+-  vmslt_vx(v0, v0, zr);
+-  vfirst_m(tmp, v0);
+-  bgez(tmp, SET_RESULT);
+-  // if tmp == -1, all bytes are positive
+-  add(result, result, t0);
+-
+-  sub(len, len, t0);
+-  add(ary, ary, t0);
+-  bnez(len, LOOP);
+-  j(DONE);
+-
+-  // add remaining positive bytes count
+-  bind(SET_RESULT);
+-  add(result, result, tmp);
+-
+-  bind(DONE);
+-  BLOCK_COMMENT("} count_positives_v");
+-}
+-
+-void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1,
+-                                              Register ch, Register result,
+-                                              Register tmp1, Register tmp2,
+-                                              bool isL) {
+-  mv(result, zr);
+-
+-  Label loop, MATCH, DONE;
+-  Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16;
+-  bind(loop);
+-  vsetvli(tmp1, cnt1, sew, Assembler::m4);
+-  vlex_v(v0, str1, sew);
+-  vmseq_vx(v0, v0, ch);
+-  vfirst_m(tmp2, v0);
+-  bgez(tmp2, MATCH); // if equal, return index
+-
+-  add(result, result, tmp1);
+-  sub(cnt1, cnt1, tmp1);
+-  if (!isL) slli(tmp1, tmp1, 1);
+-  add(str1, str1, tmp1);
+-  bnez(cnt1, loop);
+-
+-  mv(result, -1);
+-  j(DONE);
+-
+-  bind(MATCH);
+-  add(result, result, tmp2);
+-
+-  bind(DONE);
+-}
+-
+-// Set dst to NaN if any NaN input.
+-void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2,
+-                                    bool is_double, bool is_min) {
+-  assert_different_registers(dst, src1, src2);
+-
+-  vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
+-
+-  is_min ? vfmin_vv(dst, src1, src2)
+-         : vfmax_vv(dst, src1, src2);
+-
+-  vmfne_vv(v0,  src1, src1);
+-  vfadd_vv(dst, src1, src1, Assembler::v0_t);
+-  vmfne_vv(v0,  src2, src2);
+-  vfadd_vv(dst, src2, src2, Assembler::v0_t);
+-}
+-
+-// Set dst to NaN if any NaN input.
+-void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst,
+-                                           FloatRegister src1, VectorRegister src2,
+-                                           VectorRegister tmp1, VectorRegister tmp2,
+-                                           bool is_double, bool is_min) {
+-  assert_different_registers(src2, tmp1, tmp2);
+-
+-  Label L_done, L_NaN;
+-  vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
+-  vfmv_s_f(tmp2, src1);
+-
+-  is_min ? vfredmin_vs(tmp1, src2, tmp2)
+-         : vfredmax_vs(tmp1, src2, tmp2);
+-
+-  fsflags(zr);
+-  // Checking NaNs
+-  vmflt_vf(tmp2, src2, src1);
+-  frflags(t0);
+-  bnez(t0, L_NaN);
+-  j(L_done);
+-
+-  bind(L_NaN);
+-  vfmv_s_f(tmp2, src1);
+-  vfredsum_vs(tmp1, src2, tmp2);
+-
+-  bind(L_done);
+-  vfmv_f_s(dst, tmp1);
+-}
+diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
+index c71df4c101b..90b6554af02 100644
+--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
+@@ -28,13 +28,6 @@
+ 
+ // C2_MacroAssembler contains high-level macros for C2
+ 
+- private:
+-  void element_compare(Register r1, Register r2,
+-                       Register result, Register cnt,
+-                       Register tmp1, Register tmp2,
+-                       VectorRegister vr1, VectorRegister vr2,
+-                       VectorRegister vrs,
+-                       bool is_latin, Label& DONE);
+  public:
+ 
+   void string_compare(Register str1, Register str2,
+@@ -145,49 +138,4 @@
+                  FloatRegister src1, FloatRegister src2,
+                  bool is_double, bool is_min);
+ 
+-  // intrinsic methods implemented by rvv instructions
+-  void string_equals_v(Register r1, Register r2,
+-                       Register result, Register cnt1,
+-                       int elem_size);
+-
+-  void arrays_equals_v(Register r1, Register r2,
+-                       Register result, Register cnt1,
+-                       int elem_size);
+-
+-  void string_compare_v(Register str1, Register str2,
+-                        Register cnt1, Register cnt2,
+-                        Register result,
+-                        Register tmp1, Register tmp2,
+-                        int encForm);
+-
+- void clear_array_v(Register base, Register cnt);
+-
+- void byte_array_inflate_v(Register src, Register dst,
+-                           Register len, Register tmp);
+-
+- void char_array_compress_v(Register src, Register dst,
+-                            Register len, Register result,
+-                            Register tmp);
+-
+- void encode_iso_array_v(Register src, Register dst,
+-                         Register len, Register result,
+-                         Register tmp);
+-
+- void count_positives_v(Register ary, Register len,
+-                        Register result, Register tmp);
+-
+- void string_indexof_char_v(Register str1, Register cnt1,
+-                            Register ch, Register result,
+-                            Register tmp1, Register tmp2,
+-                            bool isL);
+-
+- void minmax_FD_v(VectorRegister dst,
+-                  VectorRegister src1, VectorRegister src2,
+-                  bool is_double, bool is_min);
+-
+- void reduce_minmax_FD_v(FloatRegister dst,
+-                         FloatRegister src1, VectorRegister src2,
+-                         VectorRegister tmp1, VectorRegister tmp2,
+-                         bool is_double, bool is_min);
+-
+ #endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
+index cbfc0583883..845064d6cbc 100644
+--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
+@@ -90,10 +90,8 @@ define_pd_global(intx, InlineSmallCode,          1000);
+           "Extend fence.i to fence.i + fence.")                                  \
+   product(bool, AvoidUnalignedAccesses, true,                                    \
+           "Avoid generating unaligned memory accesses")                          \
+-  product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions")             \
+-  product(bool, UseRVB, false, EXPERIMENTAL, "Use RVB instructions")             \
+-  product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions")             \
+-  product(bool, UseRVVForBigIntegerShiftIntrinsics, true,                        \
+-          "Use RVV instructions for left/right shift of BigInteger")
++  experimental(bool, UseRVV, false, "Use RVV instructions")                      \
++  experimental(bool, UseRVB, false, "Use RVB instructions")                      \
++  experimental(bool, UseRVC, false, "Use RVC instructions")
+ 
+ #endif // CPU_RISCV_GLOBALS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index 9d2cc4cf89f..8b8d126f6c9 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -1086,7 +1086,7 @@ void MacroAssembler::popa() {
+   pop_reg(0xffffffe2, sp);
+ }
+ 
+-void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
++void MacroAssembler::push_CPU_state() {
+   CompressibleRegion cr(this);
+   // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
+   push_reg(0xffffffe0, sp);
+@@ -1096,28 +1096,10 @@ void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes)
+   for (int i = 0; i < 32; i++) {
+     fsd(as_FloatRegister(i), Address(sp, i * wordSize));
+   }
+-
+-  // vector registers
+-  if (save_vectors) {
+-    sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers);
+-    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
+-    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
+-      add(t0, sp, vector_size_in_bytes * i);
+-      vse64_v(as_VectorRegister(i), t0);
+-    }
+-  }
+ }
+ 
+-void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
++void MacroAssembler::pop_CPU_state() {
+   CompressibleRegion cr(this);
+-  // vector registers
+-  if (restore_vectors) {
+-    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
+-    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
+-      vle64_v(as_VectorRegister(i), sp);
+-      add(sp, sp, vector_size_in_bytes * 8);
+-    }
+-  }
+ 
+   // float registers
+   for (int i = 0; i < 32; i++) {
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index b2f0455a1f1..b43131514c1 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -501,8 +501,8 @@ class MacroAssembler: public Assembler {
+ 
+   void pusha();
+   void popa();
+-  void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
+-  void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
++  void push_CPU_state();
++  void pop_CPU_state();
+ 
+   // if heap base register is used - reinit it with the correct value
+   void reinit_heapbase();
+diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp
+index 23a75d20502..4c7fabd7240 100644
+--- a/src/hotspot/cpu/riscv/matcher_riscv.hpp
++++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp
+@@ -31,16 +31,9 @@
+   // false => size gets scaled to BytesPerLong, ok.
+   static const bool init_array_count_is_in_bytes = false;
+ 
+-  // Whether this platform implements the scalable vector feature
+-  static const bool implements_scalable_vector = true;
+-
+-  static const bool supports_scalable_vector() {
+-    return UseRVV;
+-  }
+-
+-  // riscv supports misaligned vectors store/load.
++  // riscv doesn't support misaligned vectors store/load on JDK11.
+   static constexpr bool misaligned_vectors_ok() {
+-    return true;
++    return false;
+   }
+ 
+   // Whether code generation need accurate ConvI2L types.
+@@ -53,9 +46,6 @@
+   // the cpu only look at the lower 5/6 bits anyway?
+   static const bool need_masked_shift_count = false;
+ 
+-  // No support for generic vector operands.
+-  static const bool supports_generic_vector_operands = false;
+-
+   static constexpr bool isSimpleConstant64(jlong value) {
+     // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
+     // Probably always true, even if a temp register is required.
+@@ -127,31 +117,6 @@
+   // the relevant 32 bits.
+   static const bool int_in_long = true;
+ 
+-  // Does the CPU supports vector variable shift instructions?
+-  static constexpr bool supports_vector_variable_shifts(void) {
+-    return false;
+-  }
+-
+-  // Does the CPU supports vector variable rotate instructions?
+-  static constexpr bool supports_vector_variable_rotates(void) {
+-    return false;
+-  }
+-
+-  // Does the CPU supports vector constant rotate instructions?
+-  static constexpr bool supports_vector_constant_rotates(int shift) {
+-    return false;
+-  }
+-
+-  // Does the CPU supports vector unsigned comparison instructions?
+-  static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
+-    return false;
+-  }
+-
+-  // Some microarchitectures have mask registers used on vectors
+-  static const bool has_predicated_vectors(void) {
+-    return false;
+-  }
+-
+   // true means we have fast l2f convers
+   // false means that conversion is done by runtime call
+   static constexpr bool convL2FSupported(void) {
+@@ -161,9 +126,4 @@
+   // Implements a variant of EncodeISOArrayNode that encode ASCII only
+   static const bool supports_encode_ascii_array = false;
+ 
+-  // Returns pre-selection estimated size of a vector operation.
+-  static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
+-    return 0;
+-  }
+-
+ #endif // CPU_RISCV_MATCHER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
+index f8116e9df8c..96cf1996a83 100644
+--- a/src/hotspot/cpu/riscv/register_riscv.cpp
++++ b/src/hotspot/cpu/riscv/register_riscv.cpp
+@@ -37,11 +37,6 @@ const int ConcreteRegisterImpl::max_fpr =
+     ConcreteRegisterImpl::max_gpr +
+     FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
+ 
+-const int ConcreteRegisterImpl::max_vpr =
+-    ConcreteRegisterImpl::max_fpr +
+-    VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register;
+-
+-
+ const char* RegisterImpl::name() const {
+   static const char *const names[number_of_registers] = {
+     "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9",
+diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
+index a9200cac647..d697751f55f 100644
+--- a/src/hotspot/cpu/riscv/register_riscv.hpp
++++ b/src/hotspot/cpu/riscv/register_riscv.hpp
+@@ -307,14 +307,12 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
+   // it's optoregs.
+ 
+     number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
+-                           FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
+-                           VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers)
++                           FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers)
+   };
+ 
+   // added to make it compile
+   static const int max_gpr;
+   static const int max_fpr;
+-  static const int max_vpr;
+ };
+ 
+ typedef AbstractRegSet<Register> RegSet;
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 588887e1d96..85593a942e9 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -226,177 +226,6 @@ reg_def F30_H ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()->next() );
+ reg_def F31   ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()         );
+ reg_def F31_H ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()->next() );
+ 
+-// ----------------------------
+-// Vector Registers
+-// ----------------------------
+-
+-// For RVV vector registers, we simply extend vector register size to 4
+-// 'logical' slots. This is nominally 128 bits but it actually covers
+-// all possible 'physical' RVV vector register lengths from 128 ~ 1024
+-// bits. The 'physical' RVV vector register length is detected during
+-// startup, so the register allocator is able to identify the correct
+-// number of bytes needed for an RVV spill/unspill.
+-
+-reg_def V0    ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()           );
+-reg_def V0_H  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next()   );
+-reg_def V0_J  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(2)  );
+-reg_def V0_K  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(3)  );
+-
+-reg_def V1    ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg() 	        );
+-reg_def V1_H  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next()   );
+-reg_def V1_J  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(2)  );
+-reg_def V1_K  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(3)  );
+-
+-reg_def V2    ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()           );
+-reg_def V2_H  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next()   );
+-reg_def V2_J  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(2)  );
+-reg_def V2_K  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(3)  );
+-
+-reg_def V3    ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()           );
+-reg_def V3_H  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next()   );
+-reg_def V3_J  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(2)  );
+-reg_def V3_K  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(3)  );
+-
+-reg_def V4    ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()           );
+-reg_def V4_H  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next()   );
+-reg_def V4_J  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(2)  );
+-reg_def V4_K  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(3)  );
+-
+-reg_def V5    ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg() 	        );
+-reg_def V5_H  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next()   );
+-reg_def V5_J  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(2)  );
+-reg_def V5_K  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(3)  );
+-
+-reg_def V6    ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()           );
+-reg_def V6_H  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next()   );
+-reg_def V6_J  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(2)  );
+-reg_def V6_K  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(3)  );
+-
+-reg_def V7    ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg() 	        );
+-reg_def V7_H  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next()   );
+-reg_def V7_J  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(2)  );
+-reg_def V7_K  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(3)  );
+-
+-reg_def V8    ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()           );
+-reg_def V8_H  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next()   );
+-reg_def V8_J  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(2)  );
+-reg_def V8_K  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(3)  );
+-
+-reg_def V9    ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()           );
+-reg_def V9_H  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next()   );
+-reg_def V9_J  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(2)  );
+-reg_def V9_K  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(3)  );
+-
+-reg_def V10   ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()          );
+-reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next()  );
+-reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) );
+-reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) );
+-
+-reg_def V11   ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()          );
+-reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next()  );
+-reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) );
+-reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) );
+-
+-reg_def V12   ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()          );
+-reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next()  );
+-reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) );
+-reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) );
+-
+-reg_def V13   ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()          );
+-reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next()  );
+-reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) );
+-reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) );
+-
+-reg_def V14   ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()          );
+-reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next()  );
+-reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) );
+-reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) );
+-
+-reg_def V15   ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()          );
+-reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next()  );
+-reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) );
+-reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) );
+-
+-reg_def V16   ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()          );
+-reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next()  );
+-reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) );
+-reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) );
+-
+-reg_def V17   ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()          );
+-reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next()  );
+-reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) );
+-reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) );
+-
+-reg_def V18   ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()          );
+-reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next()  );
+-reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) );
+-reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) );
+-
+-reg_def V19   ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()          );
+-reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next()  );
+-reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) );
+-reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) );
+-
+-reg_def V20   ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()          );
+-reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next()  );
+-reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) );
+-reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) );
+-
+-reg_def V21   ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()          );
+-reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next()  );
+-reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) );
+-reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) );
+-
+-reg_def V22   ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()          );
+-reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next()  );
+-reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) );
+-reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) );
+-
+-reg_def V23   ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()          );
+-reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next()  );
+-reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) );
+-reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) );
+-
+-reg_def V24   ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()          );
+-reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next()  );
+-reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) );
+-reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) );
+-
+-reg_def V25   ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()          );
+-reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next()  );
+-reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) );
+-reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) );
+-
+-reg_def V26   ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()          );
+-reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next()  );
+-reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) );
+-reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) );
+-
+-reg_def V27   ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()          );
+-reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next()  );
+-reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) );
+-reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) );
+-
+-reg_def V28   ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()          );
+-reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next()  );
+-reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) );
+-reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) );
+-
+-reg_def V29   ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()          );
+-reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next()  );
+-reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) );
+-reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) );
+-
+-reg_def V30   ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()          );
+-reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next()  );
+-reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) );
+-reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) );
+-
+-reg_def V31   ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()          );
+-reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next()  );
+-reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) );
+-reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) );
+-
+ // ----------------------------
+ // Special Registers
+ // ----------------------------
+@@ -495,42 +324,7 @@ alloc_class chunk1(
+     F27, F27_H,
+ );
+ 
+-alloc_class chunk2(
+-    V0, V0_H, V0_J, V0_K,
+-    V1, V1_H, V1_J, V1_K,
+-    V2, V2_H, V2_J, V2_K,
+-    V3, V3_H, V3_J, V3_K,
+-    V4, V4_H, V4_J, V4_K,
+-    V5, V5_H, V5_J, V5_K,
+-    V6, V6_H, V6_J, V6_K,
+-    V7, V7_H, V7_J, V7_K,
+-    V8, V8_H, V8_J, V8_K,
+-    V9, V9_H, V9_J, V9_K,
+-    V10, V10_H, V10_J, V10_K,
+-    V11, V11_H, V11_J, V11_K,
+-    V12, V12_H, V12_J, V12_K,
+-    V13, V13_H, V13_J, V13_K,
+-    V14, V14_H, V14_J, V14_K,
+-    V15, V15_H, V15_J, V15_K,
+-    V16, V16_H, V16_J, V16_K,
+-    V17, V17_H, V17_J, V17_K,
+-    V18, V18_H, V18_J, V18_K,
+-    V19, V19_H, V19_J, V19_K,
+-    V20, V20_H, V20_J, V20_K,
+-    V21, V21_H, V21_J, V21_K,
+-    V22, V22_H, V22_J, V22_K,
+-    V23, V23_H, V23_J, V23_K,
+-    V24, V24_H, V24_J, V24_K,
+-    V25, V25_H, V25_J, V25_K,
+-    V26, V26_H, V26_J, V26_K,
+-    V27, V27_H, V27_J, V27_K,
+-    V28, V28_H, V28_J, V28_K,
+-    V29, V29_H, V29_J, V29_K,
+-    V30, V30_H, V30_J, V30_K,
+-    V31, V31_H, V31_J, V31_K,
+-);
+-
+-alloc_class chunk3(RFLAGS);
++alloc_class chunk2(RFLAGS);
+ 
+ //----------Architecture Description Register Classes--------------------------
+ // Several register classes are automatically defined based upon information in
+@@ -826,41 +620,6 @@ reg_class double_reg(
+     F31, F31_H
+ );
+ 
+-// Class for all RVV vector registers
+-reg_class vectora_reg(
+-    V1, V1_H, V1_J, V1_K,
+-    V2, V2_H, V2_J, V2_K,
+-    V3, V3_H, V3_J, V3_K,
+-    V4, V4_H, V4_J, V4_K,
+-    V5, V5_H, V5_J, V5_K,
+-    V6, V6_H, V6_J, V6_K,
+-    V7, V7_H, V7_J, V7_K,
+-    V8, V8_H, V8_J, V8_K,
+-    V9, V9_H, V9_J, V9_K,
+-    V10, V10_H, V10_J, V10_K,
+-    V11, V11_H, V11_J, V11_K,
+-    V12, V12_H, V12_J, V12_K,
+-    V13, V13_H, V13_J, V13_K,
+-    V14, V14_H, V14_J, V14_K,
+-    V15, V15_H, V15_J, V15_K,
+-    V16, V16_H, V16_J, V16_K,
+-    V17, V17_H, V17_J, V17_K,
+-    V18, V18_H, V18_J, V18_K,
+-    V19, V19_H, V19_J, V19_K,
+-    V20, V20_H, V20_J, V20_K,
+-    V21, V21_H, V21_J, V21_K,
+-    V22, V22_H, V22_J, V22_K,
+-    V23, V23_H, V23_J, V23_K,
+-    V24, V24_H, V24_J, V24_K,
+-    V25, V25_H, V25_J, V25_K,
+-    V26, V26_H, V26_J, V26_K,
+-    V27, V27_H, V27_J, V27_K,
+-    V28, V28_H, V28_J, V28_K,
+-    V29, V29_H, V29_J, V29_K,
+-    V30, V30_H, V30_J, V30_K,
+-    V31, V31_H, V31_J, V31_K
+-);
+-
+ // Class for 64 bit register f0
+ reg_class f0_reg(
+     F0, F0_H
+@@ -881,31 +640,6 @@ reg_class f3_reg(
+     F3, F3_H
+ );
+ 
+-// class for vector register v1
+-reg_class v1_reg(
+-    V1, V1_H, V1_J, V1_K
+-);
+-
+-// class for vector register v2
+-reg_class v2_reg(
+-    V2, V2_H, V2_J, V2_K
+-);
+-
+-// class for vector register v3
+-reg_class v3_reg(
+-    V3, V3_H, V3_J, V3_K
+-);
+-
+-// class for vector register v4
+-reg_class v4_reg(
+-    V4, V4_H, V4_J, V4_K
+-);
+-
+-// class for vector register v5
+-reg_class v5_reg(
+-    V5, V5_H, V5_J, V5_K
+-);
+-
+ // class for condition codes
+ reg_class reg_flags(RFLAGS);
+ %}
+@@ -1447,7 +1181,7 @@ const Pipeline * MachEpilogNode::pipeline() const {
+ 
+ // Figure out which register class each belongs in: rc_int, rc_float or
+ // rc_stack.
+-enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack };
++enum RC { rc_bad, rc_int, rc_float, rc_stack };
+ 
+ static enum RC rc_class(OptoReg::Name reg) {
+ 
+@@ -1468,13 +1202,7 @@ static enum RC rc_class(OptoReg::Name reg) {
+     return rc_float;
+   }
+ 
+-  // we have 32 vector register * 4 halves
+-  int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers;
+-  if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) {
+-    return rc_vector;
+-  }
+-
+-  // Between vector regs & stack is the flags regs.
++  // Between float regs & stack is the flags regs.
+   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
+ 
+   return rc_stack;
+@@ -1512,30 +1240,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
+   int src_offset = ra_->reg2offset(src_lo);
+   int dst_offset = ra_->reg2offset(dst_lo);
+ 
+-  if (bottom_type()->isa_vect() != NULL) {
+-    uint ireg = ideal_reg();
+-    if (ireg == Op_VecA && cbuf) {
+-      C2_MacroAssembler _masm(cbuf);
+-      Assembler::CompressibleRegion cr(&_masm);
+-      int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+-      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
+-        // stack to stack
+-        __ spill_copy_vector_stack_to_stack(src_offset, dst_offset,
+-                                            vector_reg_size_in_bytes);
+-      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) {
+-        // vpr to stack
+-        __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo));
+-      } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) {
+-        // stack to vpr
+-        __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo));
+-      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) {
+-        // vpr to vpr
+-        __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo]));
+-      } else {
+-        ShouldNotReachHere();
+-      }
+-    }
+-  } else if (cbuf != NULL) {
++  if (cbuf != NULL) {
+     C2_MacroAssembler _masm(cbuf);
+     Assembler::CompressibleRegion cr(&_masm);
+     switch (src_lo_rc) {
+@@ -1619,17 +1324,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
+     } else {
+       st->print("%s", Matcher::regName[dst_lo]);
+     }
+-    if (bottom_type()->isa_vect() != NULL) {
+-      int vsize = 0;
+-      if (ideal_reg() == Op_VecA) {
+-        vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
+-      } else {
+-        ShouldNotReachHere();
+-      }
+-      st->print("\t# vector spill size = %d", vsize);
+-    } else {
+-      st->print("\t# spill size = %d", is64 ? 64 : 32);
+-    }
++    st->print("\t# spill size = %d", is64 ? 64 : 32);
+   }
+ 
+   return 0;
+@@ -1796,14 +1491,6 @@ const bool Matcher::match_rule_supported(int opcode) {
+       }
+       break;
+ 
+-    case Op_StrCompressedCopy: // fall through
+-    case Op_StrInflatedCopy:   // fall through
+-    case Op_CountPositives:
+-      return UseRVV;
+-
+-    case Op_EncodeISOArray:
+-      return UseRVV && SpecialEncodeISOArray;
+-
+     case Op_PopCountI:
+     case Op_PopCountL:
+       return UsePopCountInstruction;
+@@ -1821,37 +1508,15 @@ const bool Matcher::match_rule_supported(int opcode) {
+ }
+ 
+ // Identify extra cases that we might want to provide match rules for vector nodes and
+-// other intrinsics guarded with vector length (vlen) and element type (bt).
+-const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
+-  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
+-    return false;
+-  }
+-
+-  return op_vec_supported(opcode);
+-}
+-
+-const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
++// other intrinsics guarded with vector length (vlen).
++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+   return false;
+ }
+ 
+-const RegMask* Matcher::predicate_reg_mask(void) {
+-  return NULL;
+-}
+-
+-const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
+-  return NULL;
+-}
+-
+-// Vector calling convention not yet implemented.
+-const bool Matcher::supports_vector_calling_convention(void) {
++const bool Matcher::has_predicated_vectors(void) {
+   return false;
+ }
+ 
+-OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
+-  Unimplemented();
+-  return OptoRegPair(0, 0);
+-}
+-
+ // Is this branch offset short enough that a short branch can be used?
+ //
+ // NOTE: If the platform does not provide any short branch variants, then
+@@ -1877,11 +1542,6 @@ bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
+ 
+ // Vector width in bytes.
+ const int Matcher::vector_width_in_bytes(BasicType bt) {
+-  if (UseRVV) {
+-    // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV.
+-    // MaxVectorSize == VM_Version::_initial_vector_length
+-    return MaxVectorSize;
+-  }
+   return 0;
+ }
+ 
+@@ -1895,34 +1555,10 @@ const int Matcher::min_vector_size(const BasicType bt) {
+ 
+ // Vector ideal reg.
+ const uint Matcher::vector_ideal_reg(int len) {
+-  assert(MaxVectorSize >= len, "");
+-  if (UseRVV) {
+-    return Op_VecA;
+-  }
+-
+   ShouldNotReachHere();
+   return 0;
+ }
+ 
+-const int Matcher::scalable_vector_reg_size(const BasicType bt) {
+-  return Matcher::max_vector_size(bt);
+-}
+-
+-MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
+-  ShouldNotReachHere(); // generic vector operands not supported
+-  return NULL;
+-}
+-
+-bool Matcher::is_reg2reg_move(MachNode* m) {
+-  ShouldNotReachHere(); // generic vector operands not supported
+-  return false;
+-}
+-
+-bool Matcher::is_generic_vector(MachOper* opnd) {
+-  ShouldNotReachHere(); // generic vector operands not supported
+-  return false;
+-}
+-
+ // Return whether or not this register is ever used as an argument.
+ // This function is used on startup to build the trampoline stubs in
+ // generateOptoStub.  Registers not mentioned will be killed by the VM
+@@ -3384,67 +3020,6 @@ operand fRegD()
+   interface(REG_INTER);
+ %}
+ 
+-// Generic vector class. This will be used for
+-// all vector operands.
+-operand vReg()
+-%{
+-  constraint(ALLOC_IN_RC(vectora_reg));
+-  match(VecA);
+-  op_cost(0);
+-  format %{ %}
+-  interface(REG_INTER);
+-%}
+-
+-operand vReg_V1()
+-%{
+-  constraint(ALLOC_IN_RC(v1_reg));
+-  match(VecA);
+-  match(vReg);
+-  op_cost(0);
+-  format %{ %}
+-  interface(REG_INTER);
+-%}
+-
+-operand vReg_V2()
+-%{
+-  constraint(ALLOC_IN_RC(v2_reg));
+-  match(VecA);
+-  match(vReg);
+-  op_cost(0);
+-  format %{ %}
+-  interface(REG_INTER);
+-%}
+-
+-operand vReg_V3()
+-%{
+-  constraint(ALLOC_IN_RC(v3_reg));
+-  match(VecA);
+-  match(vReg);
+-  op_cost(0);
+-  format %{ %}
+-  interface(REG_INTER);
+-%}
+-
+-operand vReg_V4()
+-%{
+-  constraint(ALLOC_IN_RC(v4_reg));
+-  match(VecA);
+-  match(vReg);
+-  op_cost(0);
+-  format %{ %}
+-  interface(REG_INTER);
+-%}
+-
+-operand vReg_V5()
+-%{
+-  constraint(ALLOC_IN_RC(v5_reg));
+-  match(VecA);
+-  match(vReg);
+-  op_cost(0);
+-  format %{ %}
+-  interface(REG_INTER);
+-%}
+-
+ // Java Thread Register
+ operand javaThread_RegP(iRegP reg)
+ %{
+@@ -7939,17 +7514,6 @@ instruct castDD(fRegD dst)
+   ins_pipe(pipe_class_empty);
+ %}
+ 
+-instruct castVV(vReg dst)
+-%{
+-  match(Set dst (CastVV dst));
+-
+-  size(0);
+-  format %{ "# castVV of $dst" %}
+-  ins_encode(/* empty encoding */);
+-  ins_cost(0);
+-  ins_pipe(pipe_class_empty);
+-%}
+-
+ // ============================================================================
+ // Convert Instructions
+ 
+@@ -10076,7 +9640,7 @@ instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 su
+ instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
+ %{
+-  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
+   match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+ 
+@@ -10094,7 +9658,7 @@ instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R
+ instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
+ %{
+-  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
+   match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+ 
+@@ -10111,7 +9675,7 @@ instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R
+ instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+                           iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
+ %{
+-  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
+   match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+ 
+@@ -10129,7 +9693,7 @@ instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_
+                           iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3,
+                           rFlagsReg cr)
+ %{
+-  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
+   match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+ 
+@@ -10275,7 +9839,7 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
+                               iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+ %{
+   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+-  predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
++  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
+   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
+          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+ 
+@@ -10294,7 +9858,7 @@ instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
+                               iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+ %{
+   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+-  predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
++  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
+   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
+          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+ 
+@@ -10310,7 +9874,6 @@ instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
+ // clearing of an array
+ instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
+ %{
+-  predicate(!UseRVV);
+   match(Set dummy (ClearArray cnt base));
+   effect(USE_KILL cnt, USE_KILL base);
+ 
+@@ -10330,8 +9893,7 @@ instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
+ 
+ instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
+ %{
+-  predicate(!UseRVV && (uint64_t)n->in(2)->get_long()
+-            < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
++  predicate((uint64_t)n->in(2)->get_long() < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
+   match(Set dummy (ClearArray cnt base));
+   effect(USE_KILL base, KILL cr);
+ 
+@@ -10348,7 +9910,7 @@ instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg
+ instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
+                         iRegI_R10 result, rFlagsReg cr)
+ %{
+-  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
++  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
+   match(Set result (StrEquals (Binary str1 str2) cnt));
+   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
+ 
+@@ -10364,7 +9926,7 @@ instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
+ instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
+                         iRegI_R10 result, rFlagsReg cr)
+ %{
+-  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
++  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
+   match(Set result (StrEquals (Binary str1 str2) cnt));
+   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
+ 
+@@ -10381,7 +9943,7 @@ instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
+                        iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
+                        iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
+ %{
+-  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
++  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
+   match(Set result (AryEq ary1 ary2));
+   effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
+ 
+@@ -10398,7 +9960,7 @@ instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
+                        iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
+                        iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
+ %{
+-  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
++  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
+   match(Set result (AryEq ary1 ary2));
+   effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
+ 
+diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
+deleted file mode 100644
+index 3828e096b21..00000000000
+--- a/src/hotspot/cpu/riscv/riscv_v.ad
++++ /dev/null
+@@ -1,2065 +0,0 @@
+-//
+-// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+-// Copyright (c) 2020, Arm Limited. All rights reserved.
+-// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+-// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+-//
+-// This code is free software; you can redistribute it and/or modify it
+-// under the terms of the GNU General Public License version 2 only, as
+-// published by the Free Software Foundation.
+-//
+-// This code is distributed in the hope that it will be useful, but WITHOUT
+-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+-// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+-// version 2 for more details (a copy is included in the LICENSE file that
+-// accompanied this code).
+-//
+-// You should have received a copy of the GNU General Public License version
+-// 2 along with this work; if not, write to the Free Software Foundation,
+-// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+-//
+-// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+-// or visit www.oracle.com if you need additional information or have any
+-// questions.
+-//
+-//
+-
+-// RISCV Vector Extension Architecture Description File
+-
+-opclass vmemA(indirect);
+-
+-source_hpp %{
+-  bool op_vec_supported(int opcode);
+-%}
+-
+-source %{
+-
+-  static void loadStore(C2_MacroAssembler masm, bool is_store,
+-                        VectorRegister reg, BasicType bt, Register base) {
+-    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
+-    masm.vsetvli(t0, x0, sew);
+-    if (is_store) {
+-      masm.vsex_v(reg, base, sew);
+-    } else {
+-      masm.vlex_v(reg, base, sew);
+-    }
+-  }
+-
+-  bool op_vec_supported(int opcode) {
+-    switch (opcode) {
+-      // No multiply reduction instructions
+-      case Op_MulReductionVD:
+-      case Op_MulReductionVF:
+-      case Op_MulReductionVI:
+-      case Op_MulReductionVL:
+-      // Others
+-      case Op_Extract:
+-      case Op_ExtractB:
+-      case Op_ExtractC:
+-      case Op_ExtractD:
+-      case Op_ExtractF:
+-      case Op_ExtractI:
+-      case Op_ExtractL:
+-      case Op_ExtractS:
+-      case Op_ExtractUB:
+-      // Vector API specific
+-      case Op_AndReductionV:
+-      case Op_OrReductionV:
+-      case Op_XorReductionV:
+-      case Op_LoadVectorGather:
+-      case Op_StoreVectorScatter:
+-      case Op_VectorBlend:
+-      case Op_VectorCast:
+-      case Op_VectorCastB2X:
+-      case Op_VectorCastD2X:
+-      case Op_VectorCastF2X:
+-      case Op_VectorCastI2X:
+-      case Op_VectorCastL2X:
+-      case Op_VectorCastS2X:
+-      case Op_VectorInsert:
+-      case Op_VectorLoadConst:
+-      case Op_VectorLoadMask:
+-      case Op_VectorLoadShuffle:
+-      case Op_VectorMaskCmp:
+-      case Op_VectorRearrange:
+-      case Op_VectorReinterpret:
+-      case Op_VectorStoreMask:
+-      case Op_VectorTest:
+-        return false;
+-      default:
+-        return UseRVV;
+-    }
+-  }
+-
+-%}
+-
+-definitions %{
+-  int_def VEC_COST             (200, 200);
+-%}
+-
+-// All VEC instructions
+-
+-// vector load/store
+-instruct loadV(vReg dst, vmemA mem) %{
+-  match(Set dst (LoadVector mem));
+-  ins_cost(VEC_COST);
+-  format %{ "vle $dst, $mem\t#@loadV" %}
+-  ins_encode %{
+-    VectorRegister dst_reg = as_VectorRegister($dst$$reg);
+-    loadStore(C2_MacroAssembler(&cbuf), false, dst_reg,
+-              Matcher::vector_element_basic_type(this), as_Register($mem$$base));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct storeV(vReg src, vmemA mem) %{
+-  match(Set mem (StoreVector mem src));
+-  ins_cost(VEC_COST);
+-  format %{ "vse $src, $mem\t#@storeV" %}
+-  ins_encode %{
+-    VectorRegister src_reg = as_VectorRegister($src$$reg);
+-    loadStore(C2_MacroAssembler(&cbuf), true, src_reg,
+-              Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector abs
+-
+-instruct vabsB(vReg dst, vReg src, vReg tmp) %{
+-  match(Set dst (AbsVB src));
+-  ins_cost(VEC_COST);
+-  effect(TEMP tmp);
+-  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t"
+-            "vmax.vv $dst, $tmp, $src" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
+-    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vabsS(vReg dst, vReg src, vReg tmp) %{
+-  match(Set dst (AbsVS src));
+-  ins_cost(VEC_COST);
+-  effect(TEMP tmp);
+-  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t"
+-            "vmax.vv $dst, $tmp, $src" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
+-    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vabsI(vReg dst, vReg src, vReg tmp) %{
+-  match(Set dst (AbsVI src));
+-  ins_cost(VEC_COST);
+-  effect(TEMP tmp);
+-  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t"
+-            "vmax.vv $dst, $tmp, $src" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
+-    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vabsL(vReg dst, vReg src, vReg tmp) %{
+-  match(Set dst (AbsVL src));
+-  ins_cost(VEC_COST);
+-  effect(TEMP tmp);
+-  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t"
+-            "vmax.vv $dst, $tmp, $src" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
+-    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vabsF(vReg dst, vReg src) %{
+-  match(Set dst (AbsVF src));
+-  ins_cost(VEC_COST);
+-  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vabsD(vReg dst, vReg src) %{
+-  match(Set dst (AbsVD src));
+-  ins_cost(VEC_COST);
+-  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector add
+-
+-instruct vaddB(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (AddVB src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    __ vadd_vv(as_VectorRegister($dst$$reg),
+-               as_VectorRegister($src1$$reg),
+-               as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vaddS(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (AddVS src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    __ vadd_vv(as_VectorRegister($dst$$reg),
+-               as_VectorRegister($src1$$reg),
+-               as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vaddI(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (AddVI src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vadd_vv(as_VectorRegister($dst$$reg),
+-               as_VectorRegister($src1$$reg),
+-               as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vaddL(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (AddVL src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vadd_vv(as_VectorRegister($dst$$reg),
+-               as_VectorRegister($src1$$reg),
+-               as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vaddF(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (AddVF src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vfadd_vv(as_VectorRegister($dst$$reg),
+-                as_VectorRegister($src1$$reg),
+-                as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vaddD(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (AddVD src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vfadd_vv(as_VectorRegister($dst$$reg),
+-                as_VectorRegister($src1$$reg),
+-                as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector and
+-
+-instruct vand(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (AndV src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vand.vv  $dst, $src1, $src2\t#@vand" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vand_vv(as_VectorRegister($dst$$reg),
+-               as_VectorRegister($src1$$reg),
+-               as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector or
+-
+-instruct vor(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (OrV src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vor.vv  $dst, $src1, $src2\t#@vor" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vor_vv(as_VectorRegister($dst$$reg),
+-              as_VectorRegister($src1$$reg),
+-              as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector xor
+-
+-instruct vxor(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (XorV src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vxor.vv  $dst, $src1, $src2\t#@vxor" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vxor_vv(as_VectorRegister($dst$$reg),
+-               as_VectorRegister($src1$$reg),
+-               as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector float div
+-
+-instruct vdivF(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (DivVF src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivF" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vfdiv_vv(as_VectorRegister($dst$$reg),
+-                as_VectorRegister($src1$$reg),
+-                as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vdivD(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (DivVD src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivD" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vfdiv_vv(as_VectorRegister($dst$$reg),
+-                as_VectorRegister($src1$$reg),
+-                as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector integer max/min
+-
+-instruct vmax(vReg dst, vReg src1, vReg src2) %{
+-  predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
+-            n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
+-  match(Set dst (MaxV src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vmax.vv $dst, $src1, $src2\t#@vmax" %}
+-  ins_encode %{
+-    BasicType bt = Matcher::vector_element_basic_type(this);
+-    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
+-    __ vsetvli(t0, x0, sew);
+-    __ vmax_vv(as_VectorRegister($dst$$reg),
+-               as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vmin(vReg dst, vReg src1, vReg src2) %{
+-  predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
+-            n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
+-  match(Set dst (MinV src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vmin.vv $dst, $src1, $src2\t#@vmin" %}
+-  ins_encode %{
+-    BasicType bt = Matcher::vector_element_basic_type(this);
+-    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
+-    __ vsetvli(t0, x0, sew);
+-    __ vmin_vv(as_VectorRegister($dst$$reg),
+-               as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector float-point max/min
+-
+-instruct vmaxF(vReg dst, vReg src1, vReg src2) %{
+-  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+-  match(Set dst (MaxV src1 src2));
+-  effect(TEMP_DEF dst);
+-  ins_cost(VEC_COST);
+-  format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %}
+-  ins_encode %{
+-    __ minmax_FD_v(as_VectorRegister($dst$$reg),
+-                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
+-                   false /* is_double */, false /* is_min */);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vmaxD(vReg dst, vReg src1, vReg src2) %{
+-  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+-  match(Set dst (MaxV src1 src2));
+-  effect(TEMP_DEF dst);
+-  ins_cost(VEC_COST);
+-  format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %}
+-  ins_encode %{
+-    __ minmax_FD_v(as_VectorRegister($dst$$reg),
+-                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
+-                   true /* is_double */, false /* is_min */);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vminF(vReg dst, vReg src1, vReg src2) %{
+-  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+-  match(Set dst (MinV src1 src2));
+-  effect(TEMP_DEF dst);
+-  ins_cost(VEC_COST);
+-  format %{ "vminF $dst, $src1, $src2\t#@vminF" %}
+-  ins_encode %{
+-    __ minmax_FD_v(as_VectorRegister($dst$$reg),
+-                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
+-                   false /* is_double */, true /* is_min */);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vminD(vReg dst, vReg src1, vReg src2) %{
+-  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+-  match(Set dst (MinV src1 src2));
+-  effect(TEMP_DEF dst);
+-  ins_cost(VEC_COST);
+-  format %{ "vminD $dst, $src1, $src2\t#@vminD" %}
+-  ins_encode %{
+-    __ minmax_FD_v(as_VectorRegister($dst$$reg),
+-                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
+-                   true /* is_double */, true /* is_min */);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector fmla
+-
+-// dst_src1 = dst_src1 + src2 * src3
+-instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
+-  predicate(UseFMA);
+-  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
+-  ins_cost(VEC_COST);
+-  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
+-                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// dst_src1 = dst_src1 + src2 * src3
+-instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
+-  predicate(UseFMA);
+-  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
+-  ins_cost(VEC_COST);
+-  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
+-                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector fmls
+-
+-// dst_src1 = dst_src1 + -src2 * src3
+-// dst_src1 = dst_src1 + src2 * -src3
+-instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
+-  predicate(UseFMA);
+-  match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
+-  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
+-  ins_cost(VEC_COST);
+-  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
+-                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// dst_src1 = dst_src1 + -src2 * src3
+-// dst_src1 = dst_src1 + src2 * -src3
+-instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
+-  predicate(UseFMA);
+-  match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
+-  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
+-  ins_cost(VEC_COST);
+-  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
+-                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector fnmla
+-
+-// dst_src1 = -dst_src1 + -src2 * src3
+-// dst_src1 = -dst_src1 + src2 * -src3
+-instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
+-  predicate(UseFMA);
+-  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
+-  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
+-  ins_cost(VEC_COST);
+-  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
+-                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// dst_src1 = -dst_src1 + -src2 * src3
+-// dst_src1 = -dst_src1 + src2 * -src3
+-instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
+-  predicate(UseFMA);
+-  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
+-  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
+-  ins_cost(VEC_COST);
+-  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
+-                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector fnmls
+-
+-// dst_src1 = -dst_src1 + src2 * src3
+-instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
+-  predicate(UseFMA);
+-  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
+-  ins_cost(VEC_COST);
+-  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
+-                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// dst_src1 = -dst_src1 + src2 * src3
+-instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
+-  predicate(UseFMA);
+-  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
+-  ins_cost(VEC_COST);
+-  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
+-                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector mla
+-
+-// dst_src1 = dst_src1 + src2 * src3
+-instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{
+-  match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
+-  ins_cost(VEC_COST);
+-  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
+-                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// dst_src1 = dst_src1 + src2 * src3
+-instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{
+-  match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
+-  ins_cost(VEC_COST);
+-  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
+-                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// dst_src1 = dst_src1 + src2 * src3
+-instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{
+-  match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
+-  ins_cost(VEC_COST);
+-  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
+-                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// dst_src1 = dst_src1 + src2 * src3
+-instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{
+-  match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
+-  ins_cost(VEC_COST);
+-  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
+-                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector mls
+-
+-// dst_src1 = dst_src1 - src2 * src3
+-instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{
+-  match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
+-  ins_cost(VEC_COST);
+-  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
+-                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// dst_src1 = dst_src1 - src2 * src3
+-instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{
+-  match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
+-  ins_cost(VEC_COST);
+-  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
+-                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// dst_src1 = dst_src1 - src2 * src3
+-instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{
+-  match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
+-  ins_cost(VEC_COST);
+-  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
+-                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// dst_src1 = dst_src1 - src2 * src3
+-instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{
+-  match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
+-  ins_cost(VEC_COST);
+-  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
+-                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector mul
+-
+-instruct vmulB(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (MulVB src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+-               as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vmulS(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (MulVS src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+-               as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vmulI(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (MulVI src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+-               as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vmulL(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (MulVL src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+-               as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vmulF(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (MulVF src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+-                as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vmulD(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (MulVD src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+-                as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector fneg
+-
+-instruct vnegF(vReg dst, vReg src) %{
+-  match(Set dst (NegVF src));
+-  ins_cost(VEC_COST);
+-  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vnegD(vReg dst, vReg src) %{
+-  match(Set dst (NegVD src));
+-  ins_cost(VEC_COST);
+-  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// popcount vector
+-
+-instruct vpopcountI(iRegINoSp dst, vReg src) %{
+-  match(Set dst (PopCountVI src));
+-  format %{ "vpopc.m $dst, $src\t#@vpopcountI" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector add reduction
+-
+-instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+-  match(Set dst (AddReductionVI src1 src2));
+-  effect(TEMP tmp);
+-  ins_cost(VEC_COST);
+-  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t"
+-            "vredsum.vs $tmp, $src2, $tmp\n\t"
+-            "vmv.x.s  $dst, $tmp" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+-    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
+-                  as_VectorRegister($tmp$$reg));
+-    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+-  match(Set dst (AddReductionVI src1 src2));
+-  effect(TEMP tmp);
+-  ins_cost(VEC_COST);
+-  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t"
+-            "vredsum.vs $tmp, $src2, $tmp\n\t"
+-            "vmv.x.s  $dst, $tmp" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+-    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
+-                  as_VectorRegister($tmp$$reg));
+-    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+-  match(Set dst (AddReductionVI src1 src2));
+-  effect(TEMP tmp);
+-  ins_cost(VEC_COST);
+-  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t"
+-            "vredsum.vs $tmp, $src2, $tmp\n\t"
+-            "vmv.x.s  $dst, $tmp" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+-    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
+-                  as_VectorRegister($tmp$$reg));
+-    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+-  match(Set dst (AddReductionVL src1 src2));
+-  effect(TEMP tmp);
+-  ins_cost(VEC_COST);
+-  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t"
+-            "vredsum.vs $tmp, $src2, $tmp\n\t"
+-            "vmv.x.s  $dst, $tmp" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+-    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
+-                  as_VectorRegister($tmp$$reg));
+-    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{
+-  match(Set src1_dst (AddReductionVF src1_dst src2));
+-  effect(TEMP tmp);
+-  ins_cost(VEC_COST);
+-  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t"
+-            "vfredosum.vs $tmp, $src2, $tmp\n\t"
+-            "vfmv.f.s $src1_dst, $tmp" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
+-    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
+-                    as_VectorRegister($tmp$$reg));
+-    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{
+-  match(Set src1_dst (AddReductionVD src1_dst src2));
+-  effect(TEMP tmp);
+-  ins_cost(VEC_COST);
+-  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t"
+-            "vfredosum.vs $tmp, $src2, $tmp\n\t"
+-            "vfmv.f.s $src1_dst, $tmp" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
+-    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
+-                    as_VectorRegister($tmp$$reg));
+-    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector integer max reduction
+-instruct vreduce_maxB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+-  match(Set dst (MaxReductionV src1 src2));
+-  ins_cost(VEC_COST);
+-  effect(TEMP tmp);
+-  format %{ "vreduce_maxB $dst, $src1, $src2, $tmp" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
+-    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+-    Label Ldone;
+-    __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
+-    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
+-    __ bind(Ldone);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vreduce_maxS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+-  match(Set dst (MaxReductionV src1 src2));
+-  ins_cost(VEC_COST);
+-  effect(TEMP tmp);
+-  format %{ "vreduce_maxS $dst, $src1, $src2, $tmp" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
+-    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+-    Label Ldone;
+-    __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
+-    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
+-    __ bind(Ldone);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+-  match(Set dst (MaxReductionV src1 src2));
+-  ins_cost(VEC_COST);
+-  effect(TEMP tmp);
+-  format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+-    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
+-    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+-  match(Set dst (MaxReductionV src1 src2));
+-  ins_cost(VEC_COST);
+-  effect(TEMP tmp);
+-  format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+-    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
+-    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector integer min reduction
+-instruct vreduce_minB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+-  match(Set dst (MinReductionV src1 src2));
+-  ins_cost(VEC_COST);
+-  effect(TEMP tmp);
+-  format %{ "vreduce_minB $dst, $src1, $src2, $tmp" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
+-    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+-    Label Ldone;
+-    __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
+-    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
+-    __ bind(Ldone);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vreduce_minS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+-  match(Set dst (MinReductionV src1 src2));
+-  ins_cost(VEC_COST);
+-  effect(TEMP tmp);
+-  format %{ "vreduce_minS $dst, $src1, $src2, $tmp" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
+-    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+-    Label Ldone;
+-    __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
+-    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
+-    __ bind(Ldone);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+-  match(Set dst (MinReductionV src1 src2));
+-  ins_cost(VEC_COST);
+-  effect(TEMP tmp);
+-  format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+-    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
+-    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+-  match(Set dst (MinReductionV src1 src2));
+-  ins_cost(VEC_COST);
+-  effect(TEMP tmp);
+-  format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
+-    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
+-    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector float max reduction
+-
+-instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+-  match(Set dst (MaxReductionV src1 src2));
+-  ins_cost(VEC_COST);
+-  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
+-  format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %}
+-  ins_encode %{
+-    __ reduce_minmax_FD_v($dst$$FloatRegister,
+-                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
+-                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
+-                          false /* is_double */, false /* is_min */);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+-  match(Set dst (MaxReductionV src1 src2));
+-  ins_cost(VEC_COST);
+-  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
+-  format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %}
+-  ins_encode %{
+-    __ reduce_minmax_FD_v($dst$$FloatRegister,
+-                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
+-                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
+-                          true /* is_double */, false /* is_min */);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector float min reduction
+-
+-instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+-  match(Set dst (MinReductionV src1 src2));
+-  ins_cost(VEC_COST);
+-  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
+-  format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %}
+-  ins_encode %{
+-    __ reduce_minmax_FD_v($dst$$FloatRegister,
+-                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
+-                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
+-                          false /* is_double */, true /* is_min */);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
+-  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+-  match(Set dst (MinReductionV src1 src2));
+-  ins_cost(VEC_COST);
+-  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
+-  format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %}
+-  ins_encode %{
+-    __ reduce_minmax_FD_v($dst$$FloatRegister,
+-                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
+-                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
+-                          true /* is_double */, true /* is_min */);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector Math.rint, floor, ceil
+-
+-instruct vroundD(vReg dst, vReg src, immI rmode) %{
+-  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+-  match(Set dst (RoundDoubleModeV src rmode));
+-  format %{ "vroundD $dst, $src, $rmode" %}
+-  ins_encode %{
+-    switch ($rmode$$constant) {
+-      case RoundDoubleModeNode::rmode_rint:
+-        __ csrwi(CSR_FRM, C2_MacroAssembler::rne);
+-        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
+-        break;
+-      case RoundDoubleModeNode::rmode_floor:
+-        __ csrwi(CSR_FRM, C2_MacroAssembler::rdn);
+-        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
+-        break;
+-      case RoundDoubleModeNode::rmode_ceil:
+-        __ csrwi(CSR_FRM, C2_MacroAssembler::rup);
+-        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
+-        break;
+-      default:
+-        ShouldNotReachHere();
+-        break;
+-    }
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector replicate
+-
+-instruct replicateB(vReg dst, iRegIorL2I src) %{
+-  match(Set dst (ReplicateB src));
+-  ins_cost(VEC_COST);
+-  format %{ "vmv.v.x  $dst, $src\t#@replicateB" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct replicateS(vReg dst, iRegIorL2I src) %{
+-  match(Set dst (ReplicateS src));
+-  ins_cost(VEC_COST);
+-  format %{ "vmv.v.x  $dst, $src\t#@replicateS" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct replicateI(vReg dst, iRegIorL2I src) %{
+-  match(Set dst (ReplicateI src));
+-  ins_cost(VEC_COST);
+-  format %{ "vmv.v.x  $dst, $src\t#@replicateI" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct replicateL(vReg dst, iRegL src) %{
+-  match(Set dst (ReplicateL src));
+-  ins_cost(VEC_COST);
+-  format %{ "vmv.v.x  $dst, $src\t#@replicateL" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct replicateB_imm5(vReg dst, immI5 con) %{
+-  match(Set dst (ReplicateB con));
+-  ins_cost(VEC_COST);
+-  format %{ "vmv.v.i  $dst, $con\t#@replicateB_imm5" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct replicateS_imm5(vReg dst, immI5 con) %{
+-  match(Set dst (ReplicateS con));
+-  ins_cost(VEC_COST);
+-  format %{ "vmv.v.i  $dst, $con\t#@replicateS_imm5" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct replicateI_imm5(vReg dst, immI5 con) %{
+-  match(Set dst (ReplicateI con));
+-  ins_cost(VEC_COST);
+-  format %{ "vmv.v.i  $dst, $con\t#@replicateI_imm5" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct replicateL_imm5(vReg dst, immL5 con) %{
+-  match(Set dst (ReplicateL con));
+-  ins_cost(VEC_COST);
+-  format %{ "vmv.v.i  $dst, $con\t#@replicateL_imm5" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct replicateF(vReg dst, fRegF src) %{
+-  match(Set dst (ReplicateF src));
+-  ins_cost(VEC_COST);
+-  format %{ "vfmv.v.f  $dst, $src\t#@replicateF" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct replicateD(vReg dst, fRegD src) %{
+-  match(Set dst (ReplicateD src));
+-  ins_cost(VEC_COST);
+-  format %{ "vfmv.v.f  $dst, $src\t#@replicateD" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector shift
+-
+-instruct vasrB(vReg dst, vReg src, vReg shift) %{
+-  match(Set dst (RShiftVB src shift));
+-  ins_cost(VEC_COST);
+-  effect(TEMP_DEF dst);
+-  format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t"
+-            "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t"
+-            "vmnot.m v0, v0\n\t"
+-            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits
+-    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
+-    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               BitsPerByte - 1, Assembler::v0_t);
+-    // otherwise, shift
+-    __ vmnot_m(v0, v0);
+-    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               as_VectorRegister($shift$$reg), Assembler::v0_t);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vasrS(vReg dst, vReg src, vReg shift) %{
+-  match(Set dst (RShiftVS src shift));
+-  ins_cost(VEC_COST);
+-  effect(TEMP_DEF dst);
+-  format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t"
+-            "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t"
+-            "vmnot.m v0, v0\n\t"
+-            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits
+-    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
+-    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               BitsPerShort - 1, Assembler::v0_t);
+-    // otherwise, shift
+-    __ vmnot_m(v0, v0);
+-    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               as_VectorRegister($shift$$reg), Assembler::v0_t);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vasrI(vReg dst, vReg src, vReg shift) %{
+-  match(Set dst (RShiftVI src shift));
+-  ins_cost(VEC_COST);
+-  format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               as_VectorRegister($shift$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vasrL(vReg dst, vReg src, vReg shift) %{
+-  match(Set dst (RShiftVL src shift));
+-  ins_cost(VEC_COST);
+-  format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-         as_VectorRegister($shift$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vlslB(vReg dst, vReg src, vReg shift) %{
+-  match(Set dst (LShiftVB src shift));
+-  ins_cost(VEC_COST);
+-  effect( TEMP_DEF dst);
+-  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t"
+-            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
+-            "vmnot.m v0, v0\n\t"
+-            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    // if shift > BitsPerByte - 1, clear the element
+-    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
+-    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               as_VectorRegister($src$$reg), Assembler::v0_t);
+-    // otherwise, shift
+-    __ vmnot_m(v0, v0);
+-    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               as_VectorRegister($shift$$reg), Assembler::v0_t);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vlslS(vReg dst, vReg src, vReg shift) %{
+-  match(Set dst (LShiftVS src shift));
+-  ins_cost(VEC_COST);
+-  effect(TEMP_DEF dst);
+-  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t"
+-            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
+-            "vmnot.m v0, v0\n\t"
+-            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    // if shift > BitsPerShort - 1, clear the element
+-    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
+-    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               as_VectorRegister($src$$reg), Assembler::v0_t);
+-    // otherwise, shift
+-    __ vmnot_m(v0, v0);
+-    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               as_VectorRegister($shift$$reg), Assembler::v0_t);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vlslI(vReg dst, vReg src, vReg shift) %{
+-  match(Set dst (LShiftVI src shift));
+-  ins_cost(VEC_COST);
+-  format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               as_VectorRegister($shift$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vlslL(vReg dst, vReg src, vReg shift) %{
+-  match(Set dst (LShiftVL src shift));
+-  ins_cost(VEC_COST);
+-  format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               as_VectorRegister($shift$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vlsrB(vReg dst, vReg src, vReg shift) %{
+-  match(Set dst (URShiftVB src shift));
+-  ins_cost(VEC_COST);
+-  effect(TEMP_DEF dst);
+-  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t"
+-            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
+-            "vmnot.m v0, v0, v0\n\t"
+-            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    // if shift > BitsPerByte - 1, clear the element
+-    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
+-    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               as_VectorRegister($src$$reg), Assembler::v0_t);
+-    // otherwise, shift
+-    __ vmnot_m(v0, v0);
+-    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               as_VectorRegister($shift$$reg), Assembler::v0_t);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vlsrS(vReg dst, vReg src, vReg shift) %{
+-  match(Set dst (URShiftVS src shift));
+-  ins_cost(VEC_COST);
+-  effect(TEMP_DEF dst);
+-  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t"
+-            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
+-            "vmnot.m v0, v0\n\t"
+-            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    // if shift > BitsPerShort - 1, clear the element
+-    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
+-    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               as_VectorRegister($src$$reg), Assembler::v0_t);
+-    // otherwise, shift
+-    __ vmnot_m(v0, v0);
+-    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               as_VectorRegister($shift$$reg), Assembler::v0_t);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-
+-instruct vlsrI(vReg dst, vReg src, vReg shift) %{
+-  match(Set dst (URShiftVI src shift));
+-  ins_cost(VEC_COST);
+-  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               as_VectorRegister($shift$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-
+-instruct vlsrL(vReg dst, vReg src, vReg shift) %{
+-  match(Set dst (URShiftVL src shift));
+-  ins_cost(VEC_COST);
+-  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-               as_VectorRegister($shift$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
+-  match(Set dst (RShiftVB src (RShiftCntV shift)));
+-  ins_cost(VEC_COST);
+-  format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %}
+-  ins_encode %{
+-    uint32_t con = (unsigned)$shift$$constant & 0x1f;
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    if (con == 0) {
+-      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-                as_VectorRegister($src$$reg));
+-      return;
+-    }
+-    if (con >= BitsPerByte) con = BitsPerByte - 1;
+-    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
+-  match(Set dst (RShiftVS src (RShiftCntV shift)));
+-  ins_cost(VEC_COST);
+-  format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %}
+-  ins_encode %{
+-    uint32_t con = (unsigned)$shift$$constant & 0x1f;
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    if (con == 0) {
+-      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-                as_VectorRegister($src$$reg));
+-      return;
+-    }
+-    if (con >= BitsPerShort) con = BitsPerShort - 1;
+-    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
+-  match(Set dst (RShiftVI src (RShiftCntV shift)));
+-  ins_cost(VEC_COST);
+-  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %}
+-  ins_encode %{
+-    uint32_t con = (unsigned)$shift$$constant & 0x1f;
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    if (con == 0) {
+-      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-                as_VectorRegister($src$$reg));
+-      return;
+-    }
+-    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
+-  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
+-  match(Set dst (RShiftVL src (RShiftCntV shift)));
+-  ins_cost(VEC_COST);
+-  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %}
+-  ins_encode %{
+-    uint32_t con = (unsigned)$shift$$constant & 0x1f;
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    if (con == 0) {
+-      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-                as_VectorRegister($src$$reg));
+-      return;
+-    }
+-    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
+-  match(Set dst (URShiftVB src (RShiftCntV shift)));
+-  ins_cost(VEC_COST);
+-  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %}
+-  ins_encode %{
+-    uint32_t con = (unsigned)$shift$$constant & 0x1f;
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    if (con == 0) {
+-      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-                as_VectorRegister($src$$reg));
+-      return;
+-    }
+-    if (con >= BitsPerByte) {
+-      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-                 as_VectorRegister($src$$reg));
+-      return;
+-    }
+-    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
+-  match(Set dst (URShiftVS src (RShiftCntV shift)));
+-  ins_cost(VEC_COST);
+-  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %}
+-  ins_encode %{
+-    uint32_t con = (unsigned)$shift$$constant & 0x1f;
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    if (con == 0) {
+-      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-                as_VectorRegister($src$$reg));
+-      return;
+-    }
+-    if (con >= BitsPerShort) {
+-      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-                 as_VectorRegister($src$$reg));
+-      return;
+-    }
+-    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
+-  match(Set dst (URShiftVI src (RShiftCntV shift)));
+-  ins_cost(VEC_COST);
+-  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %}
+-  ins_encode %{
+-    uint32_t con = (unsigned)$shift$$constant & 0x1f;
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    if (con == 0) {
+-      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-                as_VectorRegister($src$$reg));
+-      return;
+-    }
+-    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
+-  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
+-  match(Set dst (URShiftVL src (RShiftCntV shift)));
+-  ins_cost(VEC_COST);
+-  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %}
+-  ins_encode %{
+-    uint32_t con = (unsigned)$shift$$constant & 0x1f;
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    if (con == 0) {
+-      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-                as_VectorRegister($src$$reg));
+-      return;
+-    }
+-    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
+-  match(Set dst (LShiftVB src (LShiftCntV shift)));
+-  ins_cost(VEC_COST);
+-  format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %}
+-  ins_encode %{
+-    uint32_t con = (unsigned)$shift$$constant & 0x1f;
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    if (con >= BitsPerByte) {
+-      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-                 as_VectorRegister($src$$reg));
+-      return;
+-    }
+-    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
+-  match(Set dst (LShiftVS src (LShiftCntV shift)));
+-  ins_cost(VEC_COST);
+-  format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %}
+-  ins_encode %{
+-    uint32_t con = (unsigned)$shift$$constant & 0x1f;
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    if (con >= BitsPerShort) {
+-      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
+-                 as_VectorRegister($src$$reg));
+-      return;
+-    }
+-    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
+-  match(Set dst (LShiftVI src (LShiftCntV shift)));
+-  ins_cost(VEC_COST);
+-  format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %}
+-  ins_encode %{
+-    uint32_t con = (unsigned)$shift$$constant & 0x1f;
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
+-  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
+-  match(Set dst (LShiftVL src (LShiftCntV shift)));
+-  ins_cost(VEC_COST);
+-  format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %}
+-  ins_encode %{
+-    uint32_t con = (unsigned)$shift$$constant & 0x1f;
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
+-  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+-  match(Set dst (LShiftCntV cnt));
+-  match(Set dst (RShiftCntV cnt));
+-  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
+-  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
+-            n->bottom_type()->is_vect()->element_basic_type() == T_CHAR);
+-  match(Set dst (LShiftCntV cnt));
+-  match(Set dst (RShiftCntV cnt));
+-  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
+-  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+-  match(Set dst (LShiftCntV cnt));
+-  match(Set dst (RShiftCntV cnt));
+-  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
+-  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+-  match(Set dst (LShiftCntV cnt));
+-  match(Set dst (RShiftCntV cnt));
+-  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector sqrt
+-
+-instruct vsqrtF(vReg dst, vReg src) %{
+-  match(Set dst (SqrtVF src));
+-  ins_cost(VEC_COST);
+-  format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vsqrtD(vReg dst, vReg src) %{
+-  match(Set dst (SqrtVD src));
+-  ins_cost(VEC_COST);
+-  format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-// vector sub
+-
+-instruct vsubB(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (SubVB src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e8);
+-    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+-               as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vsubS(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (SubVS src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e16);
+-    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+-               as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vsubI(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (SubVI src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+-               as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vsubL(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (SubVL src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+-               as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vsubF(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (SubVF src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e32);
+-    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+-                as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vsubD(vReg dst, vReg src1, vReg src2) %{
+-  match(Set dst (SubVD src1 src2));
+-  ins_cost(VEC_COST);
+-  format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %}
+-  ins_encode %{
+-    __ vsetvli(t0, x0, Assembler::e64);
+-    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
+-                as_VectorRegister($src2$$reg));
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
+-                         iRegI_R10 result, vReg_V1 v1,
+-                         vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
+-%{
+-  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
+-  match(Set result (StrEquals (Binary str1 str2) cnt));
+-  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
+-
+-  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
+-  ins_encode %{
+-    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
+-    __ string_equals_v($str1$$Register, $str2$$Register,
+-                       $result$$Register, $cnt$$Register, 1);
+-  %}
+-  ins_pipe(pipe_class_memory);
+-%}
+-
+-instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
+-                         iRegI_R10 result, vReg_V1 v1,
+-                         vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
+-%{
+-  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
+-  match(Set result (StrEquals (Binary str1 str2) cnt));
+-  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
+-
+-  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
+-  ins_encode %{
+-    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
+-    __ string_equals_v($str1$$Register, $str2$$Register,
+-                       $result$$Register, $cnt$$Register, 2);
+-  %}
+-  ins_pipe(pipe_class_memory);
+-%}
+-
+-instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
+-                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
+-%{
+-  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
+-  match(Set result (AryEq ary1 ary2));
+-  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
+-
+-  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
+-  ins_encode %{
+-    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
+-                       $result$$Register, $tmp$$Register, 1);
+-    %}
+-  ins_pipe(pipe_class_memory);
+-%}
+-
+-instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
+-                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
+-%{
+-  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
+-  match(Set result (AryEq ary1 ary2));
+-  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
+-
+-  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
+-  ins_encode %{
+-    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
+-                       $result$$Register, $tmp$$Register, 2);
+-  %}
+-  ins_pipe(pipe_class_memory);
+-%}
+-
+-instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+-                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
+-                          iRegP_R28 tmp1, iRegL_R29 tmp2)
+-%{
+-  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
+-  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+-  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
+-         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
+-
+-  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
+-  ins_encode %{
+-    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
+-    __ string_compare_v($str1$$Register, $str2$$Register,
+-                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
+-                        $tmp1$$Register, $tmp2$$Register,
+-                        StrIntrinsicNode::UU);
+-  %}
+-  ins_pipe(pipe_class_memory);
+-%}
+-instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+-                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
+-                          iRegP_R28 tmp1, iRegL_R29 tmp2)
+-%{
+-  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
+-  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+-  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
+-         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
+-
+-  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
+-  ins_encode %{
+-    __ string_compare_v($str1$$Register, $str2$$Register,
+-                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
+-                        $tmp1$$Register, $tmp2$$Register,
+-                        StrIntrinsicNode::LL);
+-  %}
+-  ins_pipe(pipe_class_memory);
+-%}
+-
+-instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+-                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
+-                           iRegP_R28 tmp1, iRegL_R29 tmp2)
+-%{
+-  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
+-  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+-  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
+-         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
+-
+-  format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
+-  ins_encode %{
+-    __ string_compare_v($str1$$Register, $str2$$Register,
+-                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
+-                        $tmp1$$Register, $tmp2$$Register,
+-                        StrIntrinsicNode::UL);
+-  %}
+-  ins_pipe(pipe_class_memory);
+-%}
+-instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+-                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
+-                           iRegP_R28 tmp1, iRegL_R29 tmp2)
+-%{
+-  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
+-  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+-  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
+-         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
+-
+-  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
+-  ins_encode %{
+-    __ string_compare_v($str1$$Register, $str2$$Register,
+-                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
+-                        $tmp1$$Register, $tmp2$$Register,
+-                        StrIntrinsicNode::LU);
+-  %}
+-  ins_pipe(pipe_class_memory);
+-%}
+-
+-// fast byte[] to char[] inflation
+-instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len,
+-                         vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
+-%{
+-  predicate(UseRVV);
+-  match(Set dummy (StrInflatedCopy src (Binary dst len)));
+-  effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len);
+-
+-  format %{ "String Inflate $src,$dst" %}
+-  ins_encode %{
+-    __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register);
+-  %}
+-  ins_pipe(pipe_class_memory);
+-%}
+-
+-// encode char[] to byte[] in ISO_8859_1
+-instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
+-                           vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
+-%{
+-  predicate(UseRVV);
+-  match(Set result (EncodeISOArray src (Binary dst len)));
+-  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
+-         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
+-
+-  format %{ "Encode array $src,$dst,$len -> $result" %}
+-  ins_encode %{
+-    __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register,
+-                          $result$$Register, $tmp$$Register);
+-  %}
+-  ins_pipe( pipe_class_memory );
+-%}
+-
+-// fast char[] to byte[] compression
+-instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
+-                          vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
+-%{
+-  predicate(UseRVV);
+-  match(Set result (StrCompressedCopy src (Binary dst len)));
+-  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
+-         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
+-
+-  format %{ "String Compress $src,$dst -> $result    // KILL R11, R12, R13" %}
+-  ins_encode %{
+-    __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register,
+-                             $result$$Register, $tmp$$Register);
+-  %}
+-  ins_pipe( pipe_slow );
+-%}
+-
+-instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, iRegL tmp)
+-%{
+-  predicate(UseRVV);
+-  match(Set result (CountPositives ary len));
+-  effect(USE_KILL ary, USE_KILL len, TEMP tmp);
+-
+-  format %{ "count positives byte[] $ary, $len -> $result" %}
+-  ins_encode %{
+-    __ count_positives_v($ary$$Register, $len$$Register, $result$$Register, $tmp$$Register);
+-  %}
+-
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
+-                               iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+-                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
+-%{
+-  predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
+-  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+-  effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
+-         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
+-
+-  format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
+-
+-  ins_encode %{
+-    __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
+-                             $result$$Register, $tmp1$$Register, $tmp2$$Register,
+-                             false /* isL */);
+-  %}
+-
+-  ins_pipe(pipe_class_memory);
+-%}
+-
+-instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
+-                               iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+-                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
+-%{
+-  predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
+-  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+-  effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
+-         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
+-
+-  format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
+-
+-  ins_encode %{
+-    __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
+-                             $result$$Register, $tmp1$$Register, $tmp2$$Register,
+-                             true /* isL */);
+-  %}
+-
+-  ins_pipe(pipe_class_memory);
+-%}
+-
+-// clearing of an array
+-instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
+-                             vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3)
+-%{
+-  predicate(UseRVV);
+-  match(Set dummy (ClearArray cnt base));
+-  effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3);
+-
+-  format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
+-
+-  ins_encode %{
+-    __ clear_array_v($base$$Register, $cnt$$Register);
+-  %}
+-
+-  ins_pipe(pipe_class_memory);
+-%}
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+index f85d4b25a76..4daed17df10 100644
+--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -80,9 +80,8 @@ class SimpleRuntimeFrame {
+ };
+ 
+ class RegisterSaver {
+-  const bool _save_vectors;
+  public:
+-  RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {}
++  RegisterSaver() {}
+   ~RegisterSaver() {}
+   OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
+   void restore_live_registers(MacroAssembler* masm);
+@@ -91,11 +90,7 @@ class RegisterSaver {
+   // Used by deoptimization when it is managing result register
+   // values on its own
+   // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4)
+-  // |---v0---|<---SP
+-  // |---v1---|save vectors only in generate_handler_blob
+-  // |-- .. --|
+-  // |---v31--|-----
+-  // |---f0---|
++  // |---f0---|<---SP
+   // |---f1---|
+   // |   ..   |
+   // |---f31--|
+@@ -106,16 +101,8 @@ class RegisterSaver {
+   // |---x31--|
+   // |---fp---|
+   // |---ra---|
+-  int v0_offset_in_bytes(void) { return 0; }
+   int f0_offset_in_bytes(void) {
+-    int f0_offset = 0;
+-#ifdef COMPILER2
+-    if (_save_vectors) {
+-      f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers *
+-                   BytesPerInt;
+-    }
+-#endif
+-    return f0_offset;
++    return 0;
+   }
+   int reserved_slot_offset_in_bytes(void) {
+     return f0_offset_in_bytes() +
+@@ -142,15 +129,6 @@ class RegisterSaver {
+ };
+ 
+ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
+-  int vector_size_in_bytes = 0;
+-  int vector_size_in_slots = 0;
+-#ifdef COMPILER2
+-  if (_save_vectors) {
+-    vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE);
+-    vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT);
+-  }
+-#endif
+-
+   assert_cond(masm != NULL && total_frame_words != NULL);
+   int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16);
+   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
+@@ -161,9 +139,9 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
+   int frame_size_in_words = frame_size_in_bytes / wordSize;
+   *total_frame_words = frame_size_in_words;
+ 
+-  // Save Integer, Float and Vector registers.
++  // Save Integer and Float registers.
+   __ enter();
+-  __ push_CPU_state(_save_vectors, vector_size_in_bytes);
++  __ push_CPU_state();
+ 
+   // Set an oopmap for the call site.  This oopmap will map all
+   // oop-registers and debug-info registers as callee-saved.  This
+@@ -176,13 +154,6 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
+ 
+   int sp_offset_in_slots = 0;
+   int step_in_slots = 0;
+-  if (_save_vectors) {
+-    step_in_slots = vector_size_in_slots;
+-    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
+-      VectorRegister r = as_VectorRegister(i);
+-      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
+-    }
+-  }
+ 
+   step_in_slots = FloatRegisterImpl::max_slots_per_register;
+   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
+@@ -207,18 +178,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
+ 
+ void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
+   assert_cond(masm != NULL);
+-#ifdef COMPILER2
+-  __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE));
+-#else
+-  __ pop_CPU_state(_save_vectors);
+-#endif
++  __ pop_CPU_state();
+   __ leave();
+ }
+ 
+ // Is vector's size (in bytes) bigger than a size saved by default?
+-// riscv does not ovlerlay the floating-point registers on vector registers like aarch64.
+ bool SharedRuntime::is_wide_vector(int size) {
+-  return UseRVV;
++  return false;
+ }
+ 
+ // The java_calling_convention describes stack locations as ideal slots on
+@@ -674,13 +640,6 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
+   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
+ }
+ 
+-int SharedRuntime::vector_calling_convention(VMRegPair *regs,
+-                                             uint num_bits,
+-                                             uint total_args_passed) {
+-  Unimplemented();
+-  return 0;
+-}
+-
+ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
+                                          VMRegPair *regs,
+                                          VMRegPair *regs2,
+@@ -1891,7 +1850,7 @@ void SharedRuntime::generate_deopt_blob() {
+   OopMap* map = NULL;
+   OopMapSet *oop_maps = new OopMapSet();
+   assert_cond(masm != NULL && oop_maps != NULL);
+-  RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0);
++  RegisterSaver reg_saver;
+ 
+   // -------------
+   // This code enters when returning to a de-optimized nmethod.  A return
+@@ -2423,7 +2382,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
+   address call_pc = NULL;
+   int frame_size_in_words = -1;
+   bool cause_return = (poll_type == POLL_AT_RETURN);
+-  RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
++  RegisterSaver reg_saver;
+ 
+   // Save Integer and Float registers.
+   map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
+@@ -2542,7 +2501,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
+   assert_cond(masm != NULL);
+ 
+   int frame_size_in_words = -1;
+-  RegisterSaver reg_saver(false /* save_vectors */);
++  RegisterSaver reg_saver;
+ 
+   OopMapSet *oop_maps = new OopMapSet();
+   assert_cond(oop_maps != NULL);
+diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+index b05edf7172c..39416441bdf 100644
+--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+@@ -2843,111 +2843,6 @@ class StubGenerator: public StubCodeGenerator {
+ 
+     return entry;
+   }
+-
+-  // Arguments:
+-  //
+-  // Input:
+-  //   c_rarg0   - newArr address
+-  //   c_rarg1   - oldArr address
+-  //   c_rarg2   - newIdx
+-  //   c_rarg3   - shiftCount
+-  //   c_rarg4   - numIter
+-  //
+-  address generate_bigIntegerLeftShift() {
+-    __ align(CodeEntryAlignment);
+-    StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker");
+-    address entry = __ pc();
+-
+-    Label loop, exit;
+-
+-    Register newArr        = c_rarg0;
+-    Register oldArr        = c_rarg1;
+-    Register newIdx        = c_rarg2;
+-    Register shiftCount    = c_rarg3;
+-    Register numIter       = c_rarg4;
+-
+-    Register shiftRevCount = c_rarg5;
+-    Register oldArrNext    = t1;
+-
+-    __ beqz(numIter, exit);
+-    __ shadd(newArr, newIdx, newArr, t0, 2);
+-
+-    __ li(shiftRevCount, 32);
+-    __ sub(shiftRevCount, shiftRevCount, shiftCount);
+-
+-    __ bind(loop);
+-    __ addi(oldArrNext, oldArr, 4);
+-    __ vsetvli(t0, numIter, Assembler::e32, Assembler::m4);
+-    __ vle32_v(v0, oldArr);
+-    __ vle32_v(v4, oldArrNext);
+-    __ vsll_vx(v0, v0, shiftCount);
+-    __ vsrl_vx(v4, v4, shiftRevCount);
+-    __ vor_vv(v0, v0, v4);
+-    __ vse32_v(v0, newArr);
+-    __ sub(numIter, numIter, t0);
+-    __ shadd(oldArr, t0, oldArr, t1, 2);
+-    __ shadd(newArr, t0, newArr, t1, 2);
+-    __ bnez(numIter, loop);
+-
+-    __ bind(exit);
+-    __ ret();
+-
+-    return entry;
+-  }
+-
+-  // Arguments:
+-  //
+-  // Input:
+-  //   c_rarg0   - newArr address
+-  //   c_rarg1   - oldArr address
+-  //   c_rarg2   - newIdx
+-  //   c_rarg3   - shiftCount
+-  //   c_rarg4   - numIter
+-  //
+-  address generate_bigIntegerRightShift() {
+-    __ align(CodeEntryAlignment);
+-    StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
+-    address entry = __ pc();
+-
+-    Label loop, exit;
+-
+-    Register newArr        = c_rarg0;
+-    Register oldArr        = c_rarg1;
+-    Register newIdx        = c_rarg2;
+-    Register shiftCount    = c_rarg3;
+-    Register numIter       = c_rarg4;
+-    Register idx           = numIter;
+-
+-    Register shiftRevCount = c_rarg5;
+-    Register oldArrNext    = c_rarg6;
+-    Register newArrCur     = t0;
+-    Register oldArrCur     = t1;
+-
+-    __ beqz(idx, exit);
+-    __ shadd(newArr, newIdx, newArr, t0, 2);
+-
+-    __ li(shiftRevCount, 32);
+-    __ sub(shiftRevCount, shiftRevCount, shiftCount);
+-
+-    __ bind(loop);
+-    __ vsetvli(t0, idx, Assembler::e32, Assembler::m4);
+-    __ sub(idx, idx, t0);
+-    __ shadd(oldArrNext, idx, oldArr, t1, 2);
+-    __ shadd(newArrCur, idx, newArr, t1, 2);
+-    __ addi(oldArrCur, oldArrNext, 4);
+-    __ vle32_v(v0, oldArrCur);
+-    __ vle32_v(v4, oldArrNext);
+-    __ vsrl_vx(v0, v0, shiftCount);
+-    __ vsll_vx(v4, v4, shiftRevCount);
+-    __ vor_vv(v0, v0, v4);
+-    __ vse32_v(v0, newArrCur);
+-    __ bnez(idx, loop);
+-
+-    __ bind(exit);
+-    __ ret();
+-
+-    return entry;
+-  }
+ #endif
+ 
+ #ifdef COMPILER2
+@@ -3813,11 +3708,6 @@ class StubGenerator: public StubCodeGenerator {
+       MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
+       StubRoutines::_montgomerySquare = g.generate_square();
+     }
+-
+-    if (UseRVVForBigIntegerShiftIntrinsics) {
+-      StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
+-      StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
+-    }
+ #endif
+ 
+     generate_compare_long_strings();
+diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+index 768c7633ca6..2c15a834542 100644
+--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+@@ -167,10 +167,6 @@ void VM_Version::c2_initialize() {
+     FLAG_SET_DEFAULT(MaxVectorSize, 0);
+   }
+ 
+-  if (!UseRVV) {
+-    FLAG_SET_DEFAULT(UseRVVForBigIntegerShiftIntrinsics, false);
+-  }
+-
+   if (UseRVV) {
+     if (FLAG_IS_DEFAULT(MaxVectorSize)) {
+       MaxVectorSize = _initial_vector_length;
+diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
+index aa7222dc64a..1f6eff96cba 100644
+--- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp
++++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
+@@ -45,16 +45,8 @@ void VMRegImpl::set_regName() {
+     freg = freg->successor();
+   }
+ 
+-  VectorRegister vreg = ::as_VectorRegister(0);
+-  for ( ; i < ConcreteRegisterImpl::max_vpr ; ) {
+-    for (int j = 0 ; j < VectorRegisterImpl::max_slots_per_register ; j++) {
+-      regName[i++] = reg->name();
+-    }
+-    vreg = vreg->successor();
+-  }
+-
+   for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) {
+-    regName[i] = "NON-GPR-FPR-VPR";
++    regName[i] = "NON-GPR-FPR";
+   }
+ }
+ 
+diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
+index 9e611b1f671..6f613a8f11a 100644
+--- a/src/hotspot/cpu/riscv/vmreg_riscv.hpp
++++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
+@@ -34,10 +34,6 @@ inline bool is_FloatRegister() {
+   return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
+ }
+ 
+-inline bool is_VectorRegister() {
+-  return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr;
+-}
+-
+ inline Register as_Register() {
+   assert(is_Register(), "must be");
+   return ::as_Register(value() / RegisterImpl::max_slots_per_register);
+@@ -49,20 +45,9 @@ inline FloatRegister as_FloatRegister() {
+                             FloatRegisterImpl::max_slots_per_register);
+ }
+ 
+-inline VectorRegister as_VectorRegister() {
+-  assert(is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be");
+-  return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) /
+-                             VectorRegisterImpl::max_slots_per_register);
+-}
+-
+ inline bool is_concrete() {
+   assert(is_reg(), "must be");
+-  if (is_VectorRegister()) {
+-    int base = value() - ConcreteRegisterImpl::max_fpr;
+-    return (base % VectorRegisterImpl::max_slots_per_register) == 0;
+-  } else {
+-    return is_even(value());
+-  }
++  return is_even(value());
+ }
+ 
+ #endif // CPU_RISCV_VMREG_RISCV_HPP
+
+From b2011bad9b7404c1f6d0c1aa3176569d7f07d7a9 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Mon, 27 Mar 2023 16:05:55 +0800
+Subject: [PATCH 004/140] Revert: JDK-8253180: ZGC: Implementation of JEP 376:
+ ZGC: Concurrent Thread-Stack Processing JDK-8220051: Remove global safepoint
+ code
+
+---
+ src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp  | 14 ------
+ .../cpu/riscv/c1_LIRAssembler_riscv.cpp       | 14 +++---
+ .../riscv/c2_safepointPollStubTable_riscv.cpp | 47 ------------------
+ src/hotspot/cpu/riscv/frame_riscv.cpp         |  9 +---
+ src/hotspot/cpu/riscv/interp_masm_riscv.cpp   | 19 +-------
+ .../cpu/riscv/macroAssembler_riscv.cpp        | 48 +++++++++++--------
+ .../cpu/riscv/macroAssembler_riscv.hpp        |  5 +-
+ src/hotspot/cpu/riscv/riscv.ad                | 14 ++----
+ src/hotspot/cpu/riscv/vm_version_riscv.hpp    |  2 -
+ 9 files changed, 45 insertions(+), 127 deletions(-)
+ delete mode 100644 src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
+
+diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
+index dcd0472c540..af7bd067f33 100644
+--- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
+@@ -39,20 +39,6 @@
+ 
+ #define __ ce->masm()->
+ 
+-void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
+-  __ bind(_entry);
+-  InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset());
+-  __ code_section()->relocate(__ pc(), safepoint_pc.rspec());
+-  __ la(t0, safepoint_pc.target());
+-  __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
+-
+-  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
+-         "polling page return stub not created yet");
+-  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
+-
+-  __ far_jump(RuntimeAddress(stub));
+-}
+-
+ void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
+   __ bind(_entry);
+   Metadata *m = _method->as_constant_ptr()->as_metadata();
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+index bba3bd4709c..0e383a3c139 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+@@ -424,7 +424,7 @@ int LIR_Assembler::emit_deopt_handler() {
+   return offset;
+ }
+ 
+-void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
++void LIR_Assembler::return_op(LIR_Opr result) {
+   assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10");
+ 
+   // Pop the stack before the safepoint code
+@@ -434,18 +434,20 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
+     __ reserved_stack_check();
+   }
+ 
+-  code_stub->set_safepoint_offset(__ offset());
+-  __ relocate(relocInfo::poll_return_type);
+-  __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */);
++  address polling_page(os::get_polling_page());
++  __ read_polling_page(t0, polling_page, relocInfo::poll_return_type);
+   __ ret();
+ }
+ 
+ int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
++  address polling_page(os::get_polling_page());
+   guarantee(info != NULL, "Shouldn't be NULL");
+-  __ get_polling_page(t0, relocInfo::poll_type);
++  assert(os::is_poll_address(polling_page), "should be");
++  int32_t offset = 0;
++  __ get_polling_page(t0, polling_page, offset, relocInfo::poll_type);
+   add_debug_info_for_branch(info);  // This isn't just debug info:
+                                     // it's the oop map
+-  __ read_polling_page(t0, 0, relocInfo::poll_type);
++  __ read_polling_page(t0, offset, relocInfo::poll_type);
+   return __ offset();
+ }
+ 
+diff --git a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
+deleted file mode 100644
+index a90d9fdc160..00000000000
+--- a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
++++ /dev/null
+@@ -1,47 +0,0 @@
+-/*
+- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#include "precompiled.hpp"
+-#include "asm/macroAssembler.hpp"
+-#include "opto/compile.hpp"
+-#include "opto/node.hpp"
+-#include "opto/output.hpp"
+-#include "runtime/sharedRuntime.hpp"
+-
+-#define __ masm.
+-void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const {
+-  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
+-         "polling page return stub not created yet");
+-  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
+-  RuntimeAddress callback_addr(stub);
+-
+-  __ bind(entry->_stub_label);
+-  InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
+-  masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec());
+-  __ la(t0, safepoint_pc.target());
+-  __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
+-  __ far_jump(callback_addr);
+-}
+-#undef __
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
+index 6e38960598a..41e52a4d491 100644
+--- a/src/hotspot/cpu/riscv/frame_riscv.cpp
++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
+@@ -39,7 +39,6 @@
+ #include "runtime/monitorChunk.hpp"
+ #include "runtime/os.inline.hpp"
+ #include "runtime/signature.hpp"
+-#include "runtime/stackWatermarkSet.hpp"
+ #include "runtime/stubCodeGenerator.hpp"
+ #include "runtime/stubRoutines.hpp"
+ #include "vmreg_riscv.inline.hpp"
+@@ -509,13 +508,7 @@ frame frame::sender_raw(RegisterMap* map) const {
+ }
+ 
+ frame frame::sender(RegisterMap* map) const {
+-  frame result = sender_raw(map);
+-
+-  if (map->process_frames()) {
+-    StackWatermarkSet::on_iteration(map->thread(), result);
+-  }
+-
+-  return result;
++  return sender_raw(map);
+ }
+ 
+ bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+index d12dcb2af19..9090ad0c058 100644
+--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+@@ -519,7 +519,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
+ 
+   if (needs_thread_local_poll) {
+     NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
+-    ld(t1, Address(xthread, JavaThread::polling_word_offset()));
++    ld(t1, Address(xthread, Thread::polling_page_offset()));
+     andi(t1, t1, SafepointMechanism::poll_bit());
+     bnez(t1, safepoint);
+   }
+@@ -591,23 +591,6 @@ void InterpreterMacroAssembler::remove_activation(
+   // result check if synchronized method
+   Label unlocked, unlock, no_unlock;
+ 
+-  // The below poll is for the stack watermark barrier. It allows fixing up frames lazily,
+-  // that would normally not be safe to use. Such bad returns into unsafe territory of
+-  // the stack, will call InterpreterRuntime::at_unwind.
+-  Label slow_path;
+-  Label fast_path;
+-  safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */);
+-  j(fast_path);
+-
+-  bind(slow_path);
+-  push(state);
+-  set_last_Java_frame(esp, fp, (address)pc(), t0);
+-  super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread);
+-  reset_last_Java_frame(true);
+-  pop(state);
+-
+-  bind(fast_path);
+-
+   // get the value of _do_not_unlock_if_synchronized into x13
+   const Address do_not_unlock_if_synchronized(xthread,
+     in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index 8b8d126f6c9..4b6136ae36b 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -2122,15 +2122,16 @@ void MacroAssembler::check_klass_subtype(Register sub_klass,
+ }
+ 
+ void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
+-  ld(t0, Address(xthread, JavaThread::polling_word_offset()));
+-  if (acquire) {
+-    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+-  }
+-  if (at_return) {
+-    bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */);
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld(t1, Address(xthread, Thread::polling_page_offset()));
++    andi(t0, t1, SafepointMechanism::poll_bit());
++    bnez(t0, slow_path);
+   } else {
+-    andi(t0, t0, SafepointMechanism::poll_bit());
+-    bnez(t0, slow_path, true /* is_far */);
++    int32_t offset = 0;
++    la_patchable(t0, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
++    lwu(t0, Address(t0, offset));
++    assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
++    bnez(t0, slow_path);
+   }
+ }
+ 
+@@ -2752,22 +2753,29 @@ void MacroAssembler::reserved_stack_check() {
+ }
+ 
+ // Move the address of the polling page into dest.
+-void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
+-  ld(dest, Address(xthread, JavaThread::polling_page_offset()));
++void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld(dest, Address(xthread, Thread::polling_page_offset()));
++  } else {
++    uint64_t align = (uint64_t)page & 0xfff;
++    assert(align == 0, "polling page must be page aligned");
++    la_patchable(dest, Address(page, rtype), offset);
++  }
+ }
+ 
+ // Read the polling page.  The address of the polling page must
+ // already be in r.
+-address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
+-  address mark;
+-  {
+-    InstructionMark im(this);
+-    code_section()->relocate(inst_mark(), rtype);
+-    lwu(zr, Address(r, offset));
+-    mark = inst_mark();
+-  }
+-  verify_cross_modify_fence_not_required();
+-  return mark;
++void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) {
++  int32_t offset = 0;
++  get_polling_page(dest, page, offset, rtype);
++  read_polling_page(dest, offset, rtype);
++}
++
++// Read the polling page.  The address of the polling page must
++// already be in r.
++void MacroAssembler::read_polling_page(Register dest, int32_t offset, relocInfo::relocType rtype) {
++  code_section()->relocate(pc(), rtype);
++  lwu(zr, Address(dest, offset));
+ }
+ 
+ void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index b43131514c1..041c696add6 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -625,8 +625,9 @@ class MacroAssembler: public Assembler {
+ 
+   void reserved_stack_check();
+ 
+-  void get_polling_page(Register dest, relocInfo::relocType rtype);
+-  address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
++  void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype);
++  void read_polling_page(Register r, address page, relocInfo::relocType rtype);
++  void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
+ 
+   address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
+   address ic_call(address entry, jint method_index = 0);
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 85593a942e9..996fa1fb68f 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1132,9 +1132,9 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+   }
+ 
+   if (do_polling() && C->is_method_compilation()) {
+-    st->print("# test polling word\n\t");
+-    st->print("ld t0, [xthread,#%d]\n\t", in_bytes(JavaThread::polling_word_offset()));
+-    st->print("bgtu sp, t0, #slow_path");
++    st->print("# touch polling page\n\t");
++    st->print("li  t0, #0x%lx\n\t", p2i(os::get_polling_page()));
++    st->print("ld  zr, [t0]");
+   }
+ }
+ #endif
+@@ -1153,13 +1153,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+   }
+ 
+   if (do_polling() && C->is_method_compilation()) {
+-    Label dummy_label;
+-    Label* code_stub = &dummy_label;
+-    if (!C->output()->in_scratch_emit_size()) {
+-      code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
+-    }
+-    __ relocate(relocInfo::poll_return_type);
+-    __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
++    __ read_polling_page(t0, os::get_polling_page(), relocInfo::poll_return_type);
+   }
+ }
+ 
+diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
+index 8e35530359a..7586af01d99 100644
+--- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp
++++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
+@@ -48,8 +48,6 @@ class VM_Version : public Abstract_VM_Version {
+   // Initialization
+   static void initialize();
+ 
+-  constexpr static bool supports_stack_watermark_barrier() { return true; }
+-
+   enum Feature_Flag {
+ #define CPU_FEATURE_FLAGS(decl)               \
+     decl(I,            "i",            8)     \
+
+From a032c615883fe2bd557baf40f1439cbae55be206 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Mon, 1 May 2023 15:42:09 +0800
+Subject: [PATCH 005/140] Revert JDK-8221554: aarch64 cross-modifying code
+
+---
+ .../cpu/riscv/macroAssembler_riscv.cpp        | 22 -------------------
+ .../cpu/riscv/macroAssembler_riscv.hpp        |  2 --
+ 2 files changed, 24 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index 4b6136ae36b..269d76ba69e 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -2716,7 +2716,6 @@ void MacroAssembler::build_frame(int framesize) {
+   sd(fp, Address(sp, framesize - 2 * wordSize));
+   sd(ra, Address(sp, framesize - wordSize));
+   if (PreserveFramePointer) { add(fp, sp, framesize); }
+-  verify_cross_modify_fence_not_required();
+ }
+ 
+ void MacroAssembler::remove_frame(int framesize) {
+@@ -3935,26 +3934,5 @@ void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Registe
+ 
+ void MacroAssembler::safepoint_ifence() {
+   ifence();
+-#ifndef PRODUCT
+-  if (VerifyCrossModifyFence) {
+-    // Clear the thread state.
+-    sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
+-  }
+-#endif
+ }
+ 
+-#ifndef PRODUCT
+-void MacroAssembler::verify_cross_modify_fence_not_required() {
+-  if (VerifyCrossModifyFence) {
+-    // Check if thread needs a cross modify fence.
+-    lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
+-    Label fence_not_required;
+-    beqz(t0, fence_not_required);
+-    // If it does then fail.
+-    la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure)));
+-    mv(c_rarg0, xthread);
+-    jalr(t0);
+-    bind(fence_not_required);
+-  }
+-}
+-#endif
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index 041c696add6..b59bdadb8bf 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -821,8 +821,6 @@ class MacroAssembler: public Assembler {
+   void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
+   void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
+ 
+-  // Check the current thread doesn't need a cross modify fence.
+-  void verify_cross_modify_fence_not_required() PRODUCT_RETURN;
+ };
+ 
+ #ifdef ASSERT
+
+From fd89cf689015649a5cb850e1e24dcbb7bb59735a Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 21:11:30 +0800
+Subject: [PATCH 006/140] Revert JDK-8242263: Diagnose synchronization on
+ primitive wrappers
+
+---
+ src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 7 -------
+ src/hotspot/cpu/riscv/interp_masm_riscv.cpp       | 7 -------
+ src/hotspot/cpu/riscv/riscv.ad                    | 7 -------
+ 3 files changed, 21 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+index 6f656c8c533..348546a9ea0 100644
+--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+@@ -64,13 +64,6 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
+ 
+   null_check_offset = offset();
+ 
+-  if (DiagnoseSyncOnValueBasedClasses != 0) {
+-    load_klass(hdr, obj);
+-    lwu(hdr, Address(hdr, Klass::access_flags_offset()));
+-    andi(t0, hdr, JVM_ACC_IS_VALUE_BASED_CLASS);
+-    bnez(t0, slow_case, true /* is_far */);
+-  }
+-
+   // Load object header
+   ld(hdr, Address(obj, hdr_offset));
+   // and mark it as unlocked
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+index 9090ad0c058..8adc7b1320d 100644
+--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+@@ -782,13 +782,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
+     // Load object pointer into obj_reg c_rarg3
+     ld(obj_reg, Address(lock_reg, obj_offset));
+ 
+-    if (DiagnoseSyncOnValueBasedClasses != 0) {
+-      load_klass(tmp, obj_reg);
+-      lwu(tmp, Address(tmp, Klass::access_flags_offset()));
+-      andi(tmp, tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
+-      bnez(tmp, slow_case);
+-    }
+-
+     // Load (object->mark() | 1) into swap_reg
+     ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+     ori(swap_reg, t0, 1);
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 996fa1fb68f..2eefc71dde0 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1965,13 +1965,6 @@ encode %{
+     // Load markWord from object into displaced_header.
+     __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
+ 
+-    if (DiagnoseSyncOnValueBasedClasses != 0) {
+-      __ load_klass(flag, oop);
+-      __ lwu(flag, Address(flag, Klass::access_flags_offset()));
+-      __ andi(flag, flag, JVM_ACC_IS_VALUE_BASED_CLASS, tmp /* tmp */);
+-      __ bnez(flag, cont, true /* is_far */);
+-    }
+-
+     // Check for existing monitor
+     __ andi(t0, disp_hdr, markWord::monitor_value);
+     __ bnez(t0, object_has_monitor);
+
+From feea78c5a227c0a57e57d6d1d544a14682310053 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 16:24:12 +0800
+Subject: [PATCH 007/140] Revert JDK-8278104: C1 should support the compiler
+ directive 'BreakAtExecute'
+
+---
+ src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+index 348546a9ea0..e5ed25616d6 100644
+--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+@@ -311,7 +311,7 @@ void C1_MacroAssembler::remove_frame(int framesize) {
+ }
+ 
+ 
+-void C1_MacroAssembler::verified_entry(bool breakAtEntry) {
++void C1_MacroAssembler::verified_entry() {
+   // If we have to make this method not-entrant we'll overwrite its
+   // first instruction with a jump. For this action to be legal we
+   // must ensure that this first instruction is a J, JAL or NOP.
+
+From 651009a5783f6f5150b3e75a50069dc841622d33 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sat, 29 Apr 2023 15:57:14 +0800
+Subject: [PATCH 008/140] Revert: JDK-8234562: Move
+ OrderAccess::release_store*/load_acquire to Atomic JDK-8234736: Harmonize
+ parameter order in Atomic - store JDK-8234737: Harmonize parameter order in
+ Atomic - add JDK-8234740: Harmonize parameter order in Atomic - cmpxchg
+ JDK-8234739: Harmonize parameter order in Atomic - xchg JDK-8236778: Add
+ Atomic::fetch_and_add
+
+---
+ .../os_cpu/linux_riscv/atomic_linux_riscv.hpp | 51 +++++++------------
+ .../linux_riscv/orderAccess_linux_riscv.hpp   | 31 +++++++----
+ 2 files changed, 39 insertions(+), 43 deletions(-)
+
+diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
+index 761da5d743e..9b8b1a31774 100644
+--- a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
++++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
+@@ -33,25 +33,31 @@
+ // Note that memory_order_conservative requires a full barrier after atomic stores.
+ // See https://patchwork.kernel.org/patch/3575821/
+ 
++#define FULL_MEM_BARRIER  __sync_synchronize()
++#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
++#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
++
+ template<size_t byte_size>
+-struct Atomic::PlatformAdd {
+-  template<typename D, typename I>
+-  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const {
++struct Atomic::PlatformAdd
++  : Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
++{
++  template<typename I, typename D>
++  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
+     D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
+     FULL_MEM_BARRIER;
+     return res;
+   }
+ 
+-  template<typename D, typename I>
+-  D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const {
+-    return add_and_fetch(dest, add_value, order) - add_value;
++  template<typename I, typename D>
++  D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order order) const {
++    return add_and_fetch(add_value, dest, order) - add_value;
+   }
+ };
+ 
+ template<size_t byte_size>
+ template<typename T>
+-inline T Atomic::PlatformXchg<byte_size>::operator()(T volatile* dest,
+-                                                     T exchange_value,
++inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value,
++                                                     T volatile* dest,
+                                                      atomic_memory_order order) const {
+   STATIC_ASSERT(byte_size == sizeof(T));
+   T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
+@@ -62,9 +68,9 @@ inline T Atomic::PlatformXchg<byte_size>::operator()(T volatile* dest,
+ // __attribute__((unused)) on dest is to get rid of spurious GCC warnings.
+ template<size_t byte_size>
+ template<typename T>
+-inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T volatile* dest __attribute__((unused)),
++inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
++                                                        T volatile* dest __attribute__((unused)),
+                                                         T compare_value,
+-                                                        T exchange_value,
+                                                         atomic_memory_order order) const {
+   STATIC_ASSERT(byte_size == sizeof(T));
+   T value = compare_value;
+@@ -83,9 +89,9 @@ inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T volatile* dest __attri
+ 
+ template<>
+ template<typename T>
+-inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((unused)),
++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
++                                                T volatile* dest __attribute__((unused)),
+                                                 T compare_value,
+-                                                T exchange_value,
+                                                 atomic_memory_order order) const {
+   STATIC_ASSERT(4 == sizeof(T));
+   if (order != memory_order_relaxed) {
+@@ -110,25 +116,4 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((
+   return rv;
+ }
+ 
+-template<size_t byte_size>
+-struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE>
+-{
+-  template <typename T>
+-  T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; }
+-};
+-
+-template<size_t byte_size>
+-struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X>
+-{
+-  template <typename T>
+-  void operator()(volatile T* p, T v) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); }
+-};
+-
+-template<size_t byte_size>
+-struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
+-{
+-  template <typename T>
+-  void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); }
+-};
+-
+ #endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
+index 1c33dc1e87f..5b5d35553f7 100644
+--- a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
++++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
+@@ -37,10 +37,6 @@ inline void OrderAccess::storestore() { release(); }
+ inline void OrderAccess::loadstore()  { acquire(); }
+ inline void OrderAccess::storeload()  { fence(); }
+ 
+-#define FULL_MEM_BARRIER  __sync_synchronize()
+-#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
+-#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
+-
+ inline void OrderAccess::acquire() {
+   READ_MEM_BARRIER;
+ }
+@@ -53,11 +49,26 @@ inline void OrderAccess::fence() {
+   FULL_MEM_BARRIER;
+ }
+ 
+-inline void OrderAccess::cross_modify_fence_impl() {
+-  asm volatile("fence.i" : : : "memory");
+-  if (UseConservativeFence) {
+-    asm volatile("fence ir, ir" : : : "memory");
+-  }
+-}
++
++template<size_t byte_size>
++struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
++{
++  template <typename T>
++  T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; }
++};
++
++template<size_t byte_size>
++struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X>
++{
++  template <typename T>
++  void operator()(T v, volatile T* p) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); }
++};
++
++template<size_t byte_size>
++struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
++{
++  template <typename T>
++  void operator()(T v, volatile T* p) const { release_store(p, v); OrderAccess::fence(); }
++};
+ 
+ #endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
+
+From b078a2ec01598fbcd99aea61af15d44f9c884aaa Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 21:07:42 +0800
+Subject: [PATCH 009/140] Revert JDK-8229258: Rework markOop and markOopDesc
+ into a simpler mark word value carrier
+
+---
+ .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |  4 ++--
+ .../shenandoahBarrierSetAssembler_riscv.cpp   |  4 ++--
+ src/hotspot/cpu/riscv/riscv.ad                | 22 +++++++++----------
+ src/hotspot/cpu/riscv/templateTable_riscv.cpp |  2 +-
+ 4 files changed, 16 insertions(+), 16 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+index e5ed25616d6..2d52343587e 100644
+--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+@@ -67,7 +67,7 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
+   // Load object header
+   ld(hdr, Address(obj, hdr_offset));
+   // and mark it as unlocked
+-  ori(hdr, hdr, markWord::unlocked_value);
++  ori(hdr, hdr, markOopDesc::unlocked_value);
+   // save unlocked object header into the displaced header location on the stack
+   sd(hdr, Address(disp_hdr, 0));
+   // test if object header is still the same (i.e. unlocked), and if so, store the
+@@ -141,7 +141,7 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i
+ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) {
+   assert_different_registers(obj, klass, len);
+   // This assumes that all prototype bits fitr in an int32_t
+-  mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value());
++  mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype());
+   sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes()));
+ 
+   if (UseCompressedClassPointers) { // Take care not to kill klass
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
+index d0ac6e52436..84e1205bc25 100644
+--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
+@@ -216,9 +216,9 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb
+   Label done;
+   __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
+   __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1
+-  __ andi(t2, tmp, markWord::lock_mask_in_place);
++  __ andi(t2, tmp, markOopDesc::lock_mask_in_place);
+   __ bnez(t2, done);
+-  __ ori(tmp, tmp, markWord::marked_value);
++  __ ori(tmp, tmp, markOopDesc::marked_value);
+   __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1
+   __ bind(done);
+ 
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 2eefc71dde0..44ab44dece1 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1966,12 +1966,12 @@ encode %{
+     __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
+ 
+     // Check for existing monitor
+-    __ andi(t0, disp_hdr, markWord::monitor_value);
++    __ andi(t0, disp_hdr, markOopDesc::monitor_value);
+     __ bnez(t0, object_has_monitor);
+ 
+     if (!UseHeavyMonitors) {
+       // Set tmp to be (markWord of object | UNLOCK_VALUE).
+-      __ ori(tmp, disp_hdr, markWord::unlocked_value);
++      __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
+ 
+       // Initialize the box. (Must happen before we update the object mark!)
+       __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+@@ -1993,7 +1993,7 @@ encode %{
+       // Check if the owner is self by comparing the value in the
+       // markWord of object (disp_hdr) with the stack pointer.
+       __ sub(disp_hdr, disp_hdr, sp);
+-      __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place));
++      __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
+       // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
+       // hence we can store 0 as the displaced header in the box, which indicates that it is a
+       // recursive lock.
+@@ -2012,15 +2012,15 @@ encode %{
+     // otherwise m->owner may contain a thread or a stack address.
+     //
+     // Try to CAS m->owner from NULL to current thread.
+-    __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value));
++    __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
+     __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
+              Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
+ 
+     // Store a non-null value into the box to avoid looking like a re-entrant
+     // lock. The fast-path monitor unlock code checks for
+-    // markWord::monitor_value so use markWord::unused_mark which has the
++    // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
+     // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
+-    __ mv(tmp, (address)markWord::unused_mark().value());
++    __ mv(tmp, (address)markOopDesc::unused_mark());
+     __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+ 
+     __ beqz(flag, cont); // CAS success means locking succeeded
+@@ -2029,9 +2029,9 @@ encode %{
+ 
+     // Recursive lock case
+     __ mv(flag, zr);
+-    __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value));
++    __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value));
+     __ add(tmp, tmp, 1u);
+-    __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value));
++    __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value));
+ 
+     __ bind(cont);
+   %}
+@@ -2060,7 +2060,7 @@ encode %{
+ 
+     // Handle existing monitor.
+     __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
+-    __ andi(t0, disp_hdr, markWord::monitor_value);
++    __ andi(t0, disp_hdr, markOopDesc::monitor_value);
+     __ bnez(t0, object_has_monitor);
+ 
+     if (!UseHeavyMonitors) {
+@@ -2080,8 +2080,8 @@ encode %{
+ 
+     // Handle existing monitor.
+     __ bind(object_has_monitor);
+-    STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
+-    __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor
++    STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
++    __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
+     __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
+ 
+     Label notRecursive;
+diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+index d2a301c6e74..4e388ac4eaa 100644
+--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+@@ -3559,7 +3559,7 @@ void TemplateTable::_new() {
+ 
+     // initialize object hader only.
+     __ bind(initialize_header);
+-    __ mv(t0, (intptr_t)markWord::prototype().value());
++    __ mv(t0, (intptr_t)markOopDesc::prototype());
+     __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes()));
+     __ store_klass_gap(x10, zr);   // zero klass gap for compressed oops
+     __ store_klass(x10, x14);      // store klass last
+
+From 4b27cd8d4cfa8fb5f0f78aecaebb17d19362f300 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Fri, 31 Mar 2023 16:24:36 +0800
+Subject: [PATCH 010/140] Revert: JDK-8239895: assert(_stack_base != 0LL)
+ failed: Sanity check JDK-8238988: Rename thread "in stack" methods and add
+ in_stack_range JDK-8234372: Investigate use of Thread::stack_base() and
+ queries for "in stack" JDK-8203481: Incorrect constraint for unextended_sp in
+ frame:safe_for_sender
+
+---
+ src/hotspot/cpu/riscv/frame_riscv.cpp | 32 +++++++++++++++++++--------
+ 1 file changed, 23 insertions(+), 9 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
+index 41e52a4d491..8e7babe2c61 100644
+--- a/src/hotspot/cpu/riscv/frame_riscv.cpp
++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
+@@ -56,13 +56,21 @@ void RegisterMap::check_location_valid() {
+ // Profiling/safepoint support
+ 
+ bool frame::safe_for_sender(JavaThread *thread) {
+-  address   addr_sp = (address)_sp;
+-  address   addr_fp = (address)_fp;
++  address   sp = (address)_sp;
++  address   fp = (address)_fp;
+   address   unextended_sp = (address)_unextended_sp;
+ 
+   // consider stack guards when trying to determine "safe" stack pointers
++  static size_t stack_guard_size = os::uses_stack_guard_pages() ?
++                                   (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0;
++  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
++
+   // sp must be within the usable part of the stack (not in guards)
+-  if (!thread->is_in_usable_stack(addr_sp)) {
++  bool sp_safe = (sp < thread->stack_base()) &&
++                 (sp >= thread->stack_base() - usable_stack_size);
++
++
++  if (!sp_safe) {
+     return false;
+   }
+ 
+@@ -79,14 +87,15 @@ bool frame::safe_for_sender(JavaThread *thread) {
+   // So unextended sp must be within the stack but we need not to check
+   // that unextended sp >= sp
+ 
+-  if (!thread->is_in_full_stack_checked(unextended_sp)) {
++  bool unextended_sp_safe = (unextended_sp < thread->stack_base());
++
++  if (!unextended_sp_safe) {
+     return false;
+   }
+ 
+   // an fp must be within the stack and above (but not equal) sp
+   // second evaluation on fp+ is added to handle situation where fp is -1
+-  bool fp_safe = thread->is_in_stack_range_excl(addr_fp, addr_sp) &&
+-                 thread->is_in_full_stack_checked(addr_fp + (return_addr_offset * sizeof(void*)));
++  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
+ 
+   // We know sp/unextended_sp are safe only fp is questionable here
+ 
+@@ -147,7 +156,7 @@ bool frame::safe_for_sender(JavaThread *thread) {
+ 
+       sender_sp = _unextended_sp + _cb->frame_size();
+       // Is sender_sp safe?
+-      if (!thread->is_in_full_stack_checked((address)sender_sp)) {
++      if ((address)sender_sp >= thread->stack_base()) {
+         return false;
+       }
+ 
+@@ -163,7 +172,10 @@ bool frame::safe_for_sender(JavaThread *thread) {
+       // fp is always saved in a recognizable place in any code we generate. However
+       // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp
+       // is really a frame pointer.
+-      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
++
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
+         return false;
+       }
+ 
+@@ -196,7 +208,9 @@ bool frame::safe_for_sender(JavaThread *thread) {
+ 
+     // Could be the call_stub
+     if (StubRoutines::returns_to_call_stub(sender_pc)) {
+-      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
+         return false;
+       }
+ 
+
+From d1b463b6c00c75664a49719f75bef8e6408f12df Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Fri, 31 Mar 2023 17:10:33 +0800
+Subject: [PATCH 011/140] Revert JDK-8173585: Intrinsify
+ StringLatin1.indexOf(char)
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 19 -------------------
+ 1 file changed, 19 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 44ab44dece1..8c7a8ede815 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -9826,7 +9826,6 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
+                               iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+ %{
+   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+-  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
+   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
+          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+ 
+@@ -9840,24 +9839,6 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
+ %}
+ 
+ 
+-instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
+-                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+-                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+-%{
+-  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+-  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
+-  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
+-         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+-
+-  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
+-  ins_encode %{
+-    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
+-                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
+-                           $tmp3$$Register, $tmp4$$Register, true /* isL */);
+-  %}
+-  ins_pipe(pipe_class_memory);
+-%}
+-
+ // clearing of an array
+ instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
+ %{
+
+From a0cdf8dfb05dbff34d2ca23104d08ae21b2d7f70 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 4 Apr 2023 12:25:36 +0800
+Subject: [PATCH 012/140] Revert JDK-8281632: riscv: Improve interpreter stack
+ banging, and change the register t1->t0
+
+---
+ .../templateInterpreterGenerator_riscv.cpp    | 42 ++++---------------
+ 1 file changed, 8 insertions(+), 34 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+index 6537b2dbd94..76ae6f89e27 100644
+--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+@@ -895,42 +895,16 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract
+ }
+ 
+ void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
+-  // See more discussion in stackOverflow.hpp.
+-
+-  const int shadow_zone_size = checked_cast<int>(StackOverflow::stack_shadow_zone_size());
++  // Bang each page in the shadow zone. We can't assume it's been done for
++  // an interpreter frame with greater than a page of locals, so each page
++  // needs to be checked.  Only true for non-native.
++  const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size();
++  const int start_page = native_call ? n_shadow_pages : 1;
+   const int page_size = os::vm_page_size();
+-  const int n_shadow_pages = shadow_zone_size / page_size;
+-
+-#ifdef ASSERT
+-  Label L_good_limit;
+-  __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit()));
+-  __ bnez(t0, L_good_limit);
+-  __ stop("shadow zone safe limit is not initialized");
+-  __ bind(L_good_limit);
+-
+-  Label L_good_watermark;
+-  __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
+-  __ bnez(t0, L_good_watermark);
+-  __ stop("shadow zone growth watermark is not initialized");
+-  __ bind(L_good_watermark);
+-#endif
+-
+-  Label L_done;
+-
+-  __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
+-  __ bgtu(sp, t0, L_done);
+-
+-  for (int p = 1; p <= n_shadow_pages; p++) {
+-    __ bang_stack_with_offset(p * page_size);
++  for (int pages = start_page; pages <= n_shadow_pages ; pages++) {
++    __ sub(t0, sp, pages * page_size);
++    __ sd(zr, Address(t0));
+   }
+-
+-  // Record the new watermark, but only if the update is above the safe limit.
+-  // Otherwise, the next time around the check above would pass the safe limit.
+-  __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit()));
+-  __ bleu(sp, t0, L_done);
+-  __ sd(sp, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
+-
+-  __ bind(L_done);
+ }
+ 
+ // Interpreter stub for calling a native method. (asm interpreter)
+
+From 8db4bf1400d92c80a0adef8a5ec12adbf595c03f Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 4 Apr 2023 14:56:25 +0800
+Subject: [PATCH 013/140] Port aarch64 style sig handler from
+ os_linux_aarch64.cpp
+
+---
+ .../os_cpu/linux_riscv/os_linux_riscv.cpp     | 224 +++++++++++++-----
+ 1 file changed, 168 insertions(+), 56 deletions(-)
+
+diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
+index 1f46bbab0a2..db15f1946e2 100644
+--- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
+@@ -48,7 +48,6 @@
+ #include "runtime/stubRoutines.hpp"
+ #include "runtime/thread.inline.hpp"
+ #include "runtime/timer.hpp"
+-#include "signals_posix.hpp"
+ #include "utilities/debug.hpp"
+ #include "utilities/events.hpp"
+ #include "utilities/vmError.hpp"
+@@ -172,31 +171,138 @@ NOINLINE frame os::current_frame() {
+ }
+ 
+ // Utility functions
+-bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
+-                                             ucontext_t* uc, JavaThread* thread) {
++extern "C" JNIEXPORT int
++JVM_handle_linux_signal(int sig,
++                        siginfo_t* info,
++                        void* ucVoid,
++                        int abort_if_unrecognized) {
++  ucontext_t* uc = (ucontext_t*) ucVoid;
++
++  Thread* t = Thread::current_or_null_safe();
++
++  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
++  // (no destructors can be run)
++  os::ThreadCrashProtection::check_crash_protection(sig, t);
++
++  SignalHandlerMark shm(t);
++
++  // Note: it's not uncommon that JNI code uses signal/sigset to install
++  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
++  // or have a SIGILL handler when detecting CPU type). When that happens,
++  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
++  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
++  // that do not require siginfo/ucontext first.
++
++  if (sig == SIGPIPE || sig == SIGXFSZ) {
++    // allow chained handler to go first
++    if (os::Linux::chained_handler(sig, info, ucVoid)) {
++      return true;
++    } else {
++      // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
++      return true;
++    }
++  }
++
++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
++  if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
++    if (handle_assert_poison_fault(ucVoid, info->si_addr)) {
++      return 1;
++    }
++  }
++#endif
++
++  JavaThread* thread = NULL;
++  VMThread* vmthread = NULL;
++  if (os::Linux::signal_handlers_are_installed) {
++    if (t != NULL ){
++      if(t->is_Java_thread()) {
++        thread = (JavaThread *) t;
++      }
++      else if(t->is_VM_thread()){
++        vmthread = (VMThread *)t;
++      }
++    }
++  }
++
++  // Handle SafeFetch faults
++  if ((sig == SIGSEGV || sig == SIGBUS) && uc != NULL) {
++    address const pc = (address) os::Linux::ucontext_get_pc(uc);
++    if (pc && StubRoutines::is_safefetch_fault(pc)) {
++      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
++      return 1;
++    }
++  }
+ 
+   // decide if this trap can be handled by a stub
+   address stub = NULL;
+ 
+-  address pc = NULL;
++  address pc          = NULL;
+ 
+   //%note os_trap_1
+   if (info != NULL && uc != NULL && thread != NULL) {
+-    pc = (address) os::Posix::ucontext_get_pc(uc);
+-
+-    address addr = (address) info->si_addr;
+-
+-    // Make sure the high order byte is sign extended, as it may be masked away by the hardware.
+-    if ((uintptr_t(addr) & (uintptr_t(1) << 55)) != 0) {
+-      addr = address(uintptr_t(addr) | (uintptr_t(0xFF) << 56));
+-    }
++    pc = (address) os::Linux::ucontext_get_pc(uc);
+ 
+     // Handle ALL stack overflow variations here
+     if (sig == SIGSEGV) {
++      address addr = (address) info->si_addr;
++
+       // check if fault address is within thread stack
+-      if (thread->is_in_full_stack(addr)) {
+-        if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
+-          return true; // continue
++      if (thread->on_local_stack(addr)) {
++        // stack overflow
++        if (thread->in_stack_yellow_reserved_zone(addr)) {
++          if (thread->thread_state() == _thread_in_Java) {
++            if (thread->in_stack_reserved_zone(addr)) {
++              frame fr;
++              if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
++                assert(fr.is_java_frame(), "Must be a Java frame");
++                frame activation =
++                  SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
++                if (activation.sp() != NULL) {
++                  thread->disable_stack_reserved_zone();
++                  if (activation.is_interpreted_frame()) {
++                    thread->set_reserved_stack_activation((address)(
++                      activation.fp() + frame::interpreter_frame_initial_sp_offset));
++                  } else {
++                    thread->set_reserved_stack_activation((address)activation.unextended_sp());
++                  }
++                  return 1;
++                }
++              }
++            }
++            // Throw a stack overflow exception.  Guard pages will be reenabled
++            // while unwinding the stack.
++            thread->disable_stack_yellow_reserved_zone();
++            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
++          } else {
++            // Thread was in the vm or native code.  Return and try to finish.
++            thread->disable_stack_yellow_reserved_zone();
++            return 1;
++          }
++        } else if (thread->in_stack_red_zone(addr)) {
++          // Fatal red zone violation.  Disable the guard pages and fall through
++          // to handle_unexpected_exception way down below.
++          thread->disable_stack_red_zone();
++          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
++
++          // This is a likely cause, but hard to verify. Let's just print
++          // it as a hint.
++          tty->print_raw_cr("Please check if any of your loaded .so files has "
++                            "enabled executable stack (see man page execstack(8))");
++        } else {
++          // Accessing stack address below sp may cause SEGV if current
++          // thread has MAP_GROWSDOWN stack. This should only happen when
++          // current thread was created by user code with MAP_GROWSDOWN flag
++          // and then attached to VM. See notes in os_linux.cpp.
++          if (thread->osthread()->expanding_stack() == 0) {
++             thread->osthread()->set_expanding_stack();
++             if (os::Linux::manually_expand_stack(thread, addr)) {
++               thread->osthread()->clear_expanding_stack();
++               return 1;
++             }
++             thread->osthread()->clear_expanding_stack();
++          } else {
++             fatal("recursive segv. expanding stack.");
++          }
+         }
+       }
+     }
+@@ -212,7 +318,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
+           tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
+         }
+         stub = SharedRuntime::get_handle_wrong_method_stub();
+-      } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) {
++      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
+         stub = SharedRuntime::get_poll_stub(pc);
+       } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
+         // BugId 4454115: A read from a MappedByteBuffer can fault
+@@ -220,34 +326,12 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
+         // Do not crash the VM in such a case.
+         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
+         CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
+-        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
+-        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
++        if (nm != NULL && nm->has_unsafe_access()) {
+           address next_pc = pc + NativeCall::instruction_size;
+-          if (is_unsafe_arraycopy) {
+-            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+-          }
+           stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
+         }
+-      } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) {
+-        // Pull a pointer to the error message out of the instruction
+-        // stream.
+-        const uint64_t *detail_msg_ptr
+-          = (uint64_t*)(pc + NativeInstruction::instruction_size);
+-        const char *detail_msg = (const char *)*detail_msg_ptr;
+-        const char *msg = "stop";
+-        if (TraceTraps) {
+-          tty->print_cr("trap: %s: (SIGILL)", msg);
+-        }
+-
+-        // End life with a fatal error, message and detail message and the context.
+-        // Note: no need to do any post-processing here (e.g. signal chaining)
+-        va_list va_dummy;
+-        VMError::report_and_die(thread, uc, NULL, 0, msg, detail_msg, va_dummy);
+-        va_end(va_dummy);
+-
+-        ShouldNotReachHere();
+       } else if (sig == SIGFPE  &&
+-          (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
++                 (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
+         stub =
+           SharedRuntime::
+           continuation_for_implicit_exception(thread,
+@@ -255,42 +339,70 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
+                                               SharedRuntime::
+                                               IMPLICIT_DIVIDE_BY_ZERO);
+       } else if (sig == SIGSEGV &&
+-                 MacroAssembler::uses_implicit_null_check((void*)addr)) {
++               !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
+           // Determination of interpreter/vtable stub/compiled code null exception
+           stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
+       }
+-    } else if ((thread->thread_state() == _thread_in_vm ||
+-                thread->thread_state() == _thread_in_native) &&
+-                sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
+-                thread->doing_unsafe_access()) {
++    } else if (thread->thread_state() == _thread_in_vm &&
++               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
++               thread->doing_unsafe_access()) {
+       address next_pc = pc + NativeCall::instruction_size;
+-      if (UnsafeCopyMemory::contains_pc(pc)) {
+-        next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+-      }
+       stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
+     }
+ 
+     // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
+     // and the heap gets shrunk before the field access.
+     if ((sig == SIGSEGV) || (sig == SIGBUS)) {
+-      address addr_slow = JNI_FastGetField::find_slowcase_pc(pc);
+-      if (addr_slow != (address)-1) {
+-        stub = addr_slow;
++      address addr = JNI_FastGetField::find_slowcase_pc(pc);
++      if (addr != (address)-1) {
++        stub = addr;
+       }
+     }
++
++    // Check to see if we caught the safepoint code in the
++    // process of write protecting the memory serialization page.
++    // It write enables the page immediately after protecting it
++    // so we can just return to retry the write.
++    if ((sig == SIGSEGV) &&
++        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
++      // Block current thread until the memory serialize page permission restored.
++      os::block_on_serialize_page_trap();
++      return true;
++    }
+   }
+ 
+   if (stub != NULL) {
+     // save all thread context in case we need to restore it
+-    if (thread != NULL) {
+-      thread->set_saved_exception_pc(pc);
+-    }
++    if (thread != NULL) thread->set_saved_exception_pc(pc);
+ 
+-    os::Posix::ucontext_set_pc(uc, stub);
++    os::Linux::ucontext_set_pc(uc, stub);
+     return true;
+   }
+ 
+-  return false; // Mute compiler
++  // signal-chaining
++  if (os::Linux::chained_handler(sig, info, ucVoid)) {
++     return true;
++  }
++
++  if (!abort_if_unrecognized) {
++    // caller wants another chance, so give it to him
++    return false;
++  }
++
++  if (pc == NULL && uc != NULL) {
++    pc = os::Linux::ucontext_get_pc(uc);
++  }
++
++  // unmask current signal
++  sigset_t newset;
++  sigemptyset(&newset);
++  sigaddset(&newset, sig);
++  sigprocmask(SIG_UNBLOCK, &newset, NULL);
++
++  VMError::report_and_die(t, sig, pc, info, ucVoid);
++
++  ShouldNotReachHere();
++  return true; // Mute compiler
+ }
+ 
+ void os::Linux::init_thread_fpu_state(void) {
+
+From fd3897410308e2fc54d84a9bd453b1b375e6aace Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 4 Apr 2023 15:24:57 +0800
+Subject: [PATCH 014/140] Revert: JDK-8248240: Remove extendedPC.hpp and
+ fetch_frame_from_ucontext JDK-8253742: POSIX signal code cleanup
+
+---
+ .../os_cpu/linux_riscv/os_linux_riscv.cpp     | 38 ++++++++++++++-----
+ .../os_cpu/linux_riscv/thread_linux_riscv.cpp |  9 +++--
+ 2 files changed, 33 insertions(+), 14 deletions(-)
+
+diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
+index db15f1946e2..4f1c84c60a0 100644
+--- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
+@@ -37,6 +37,7 @@
+ #include "prims/jniFastGetField.hpp"
+ #include "prims/jvm_misc.hpp"
+ #include "runtime/arguments.hpp"
++#include "runtime/extendedPC.hpp"
+ #include "runtime/frame.inline.hpp"
+ #include "runtime/interfaceSupport.inline.hpp"
+ #include "runtime/java.hpp"
+@@ -85,11 +86,11 @@ char* os::non_memory_address_word() {
+   return (char*) -1;
+ }
+ 
+-address os::Posix::ucontext_get_pc(const ucontext_t * uc) {
++address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
+   return (address)uc->uc_mcontext.__gregs[REG_PC];
+ }
+ 
+-void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) {
++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
+   uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc;
+ }
+ 
+@@ -101,13 +102,29 @@ intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
+   return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
+ }
+ 
+-address os::fetch_frame_from_context(const void* ucVoid,
+-                                     intptr_t** ret_sp, intptr_t** ret_fp) {
+-  address epc;
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread
++// is currently interrupted by SIGPROF.
++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
++// frames. Currently we don't do that on Linux, so it's the same as
++// os::fetch_frame_from_context().
++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
++  const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  assert(thread != NULL, "just checking");
++  assert(ret_sp != NULL, "just checking");
++  assert(ret_fp != NULL, "just checking");
++
++  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
++}
++
++ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
++                    intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  ExtendedPC epc;
+   const ucontext_t* uc = (const ucontext_t*)ucVoid;
+ 
+   if (uc != NULL) {
+-    epc = os::Posix::ucontext_get_pc(uc);
++    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
+     if (ret_sp != NULL) {
+       *ret_sp = os::Linux::ucontext_get_sp(uc);
+     }
+@@ -115,7 +132,8 @@ address os::fetch_frame_from_context(const void* ucVoid,
+       *ret_fp = os::Linux::ucontext_get_fp(uc);
+     }
+   } else {
+-    epc = NULL;
++    // construct empty ExtendedPC for return value checking
++    epc = ExtendedPC(NULL);
+     if (ret_sp != NULL) {
+       *ret_sp = (intptr_t *)NULL;
+     }
+@@ -142,8 +160,8 @@ frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
+ frame os::fetch_frame_from_context(const void* ucVoid) {
+   intptr_t* frame_sp = NULL;
+   intptr_t* frame_fp = NULL;
+-  address epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp);
+-  return frame(frame_sp, frame_fp, epc);
++  ExtendedPC epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp);
++  return frame(frame_sp, frame_fp, epc.pc());
+ }
+ 
+ // By default, gcc always saves frame pointer rfp on this stack. This
+@@ -465,7 +483,7 @@ void os::print_context(outputStream *st, const void *context) {
+   // Note: it may be unsafe to inspect memory near pc. For example, pc may
+   // point to garbage if entry point in an nmethod is corrupted. Leave
+   // this at the end, and hope for the best.
+-  address pc = os::Posix::ucontext_get_pc(uc);
++  address pc = os::Linux::ucontext_get_pc(uc);
+   print_instructions(st, pc, sizeof(char));
+   st->cr();
+ }
+diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
+index 3100572e9fd..e46efc420b0 100644
+--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
+@@ -61,16 +61,17 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
+ 
+     intptr_t* ret_fp = NULL;
+     intptr_t* ret_sp = NULL;
+-    address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp);
+-    if (addr == NULL || ret_sp == NULL ) {
++    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
++      &ret_sp, &ret_fp);
++    if (addr.pc() == NULL || ret_sp == NULL ) {
+       // ucontext wasn't useful
+       return false;
+     }
+ 
+-    frame ret_frame(ret_sp, ret_fp, addr);
++    frame ret_frame(ret_sp, ret_fp, addr.pc());
+     if (!ret_frame.safe_for_sender(this)) {
+ #ifdef COMPILER2
+-      frame ret_frame2(ret_sp, NULL, addr);
++      frame ret_frame2(ret_sp, NULL, addr.pc());
+       if (!ret_frame2.safe_for_sender(this)) {
+         // nothing else to try if the frame isn't good
+         return false;
+
+From 892b40a435ae3f7e85659100ef68db1aeda7ef23 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 4 Apr 2023 15:33:50 +0800
+Subject: [PATCH 015/140] Revert JDK-8263002: Remove CDS MiscCode region
+
+---
+ src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp         | 10 ++++++++++
+ src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp |  6 ++++++
+ 2 files changed, 16 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+index 4daed17df10..21aa3b58c09 100644
+--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -187,6 +187,16 @@ bool SharedRuntime::is_wide_vector(int size) {
+   return false;
+ }
+ 
++size_t SharedRuntime::trampoline_size() {
++  return 6 * NativeInstruction::instruction_size;
++}
++
++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
++  int32_t offset = 0;
++  __ movptr_with_offset(t0, destination, offset);
++  __ jalr(x0, t0, offset);
++}
++
+ // The java_calling_convention describes stack locations as ideal slots on
+ // a frame with no abi restrictions. Since we must observe abi restrictions
+ // (like the placement of the register window) the slots must be biased by
+diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
+index e46efc420b0..31d9254d8ad 100644
+--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
+@@ -68,6 +68,12 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
+       return false;
+     }
+ 
++    if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) {
++      // In the middle of a trampoline call. Bail out for safety.
++      // This happens rarely so shouldn't affect profiling.
++      return false;
++    }
++
+     frame ret_frame(ret_sp, ret_fp, addr.pc());
+     if (!ret_frame.safe_for_sender(this)) {
+ #ifdef COMPILER2
+
+From 945a317797bc96efe3f0717ca7258f081b96b14d Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 4 Apr 2023 15:52:43 +0800
+Subject: [PATCH 016/140] Revert JDK-8254158: Consolidate per-platform stack
+ overflow handling code
+
+---
+ .../os_cpu/linux_riscv/os_linux_riscv.cpp     | 52 ++++++++++++++-----
+ 1 file changed, 40 insertions(+), 12 deletions(-)
+
+diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
+index 4f1c84c60a0..8b772892b4b 100644
+--- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
+@@ -145,18 +145,6 @@ ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
+   return epc;
+ }
+ 
+-frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
+-  const ucontext_t* uc = (const ucontext_t*)ucVoid;
+-  // In compiled code, the stack banging is performed before RA
+-  // has been saved in the frame. RA is live, and SP and FP
+-  // belong to the caller.
+-  intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc);
+-  intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc);
+-  address frame_pc = (address)(uc->uc_mcontext.__gregs[REG_LR]
+-                         - NativeInstruction::instruction_size);
+-  return frame(frame_sp, frame_fp, frame_pc);
+-}
+-
+ frame os::fetch_frame_from_context(const void* ucVoid) {
+   intptr_t* frame_sp = NULL;
+   intptr_t* frame_fp = NULL;
+@@ -164,6 +152,46 @@ frame os::fetch_frame_from_context(const void* ucVoid) {
+   return frame(frame_sp, frame_fp, epc.pc());
+ }
+ 
++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
++  address pc = (address) os::Linux::ucontext_get_pc(uc);
++  if (Interpreter::contains(pc)) {
++    // interpreter performs stack banging after the fixed frame header has
++    // been generated while the compilers perform it before. To maintain
++    // semantic consistency between interpreted and compiled frames, the
++    // method returns the Java sender of the current frame.
++    *fr = os::fetch_frame_from_context(uc);
++    if (!fr->is_first_java_frame()) {
++      assert(fr->safe_for_sender(thread), "Safety check");
++      *fr = fr->java_sender();
++    }
++  } else {
++    // more complex code with compiled code
++    assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
++    CodeBlob* cb = CodeCache::find_blob(pc);
++    if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
++      // Not sure where the pc points to, fallback to default
++      // stack overflow handling
++      return false;
++    } else {
++      // In compiled code, the stack banging is performed before RA
++      // has been saved in the frame.  RA is live, and SP and FP
++      // belong to the caller.
++      intptr_t* fp = os::Linux::ucontext_get_fp(uc);
++      intptr_t* sp = os::Linux::ucontext_get_sp(uc);
++      address pc = (address)(uc->uc_mcontext.__gregs[REG_LR]
++                         - NativeInstruction::instruction_size);
++      *fr = frame(sp, fp, pc);
++      if (!fr->is_java_frame()) {
++        assert(fr->safe_for_sender(thread), "Safety check");
++        assert(!fr->is_first_frame(), "Safety check");
++        *fr = fr->java_sender();
++      }
++    }
++  }
++  assert(fr->is_java_frame(), "Safety check");
++  return true;
++}
++
+ // By default, gcc always saves frame pointer rfp on this stack. This
+ // may get turned off by -fomit-frame-pointer.
+ frame os::get_sender_for_C_frame(frame* fr) {
+
+From c1a03e0a376cc2c8748d83d66b576b66ee2e6962 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 4 Apr 2023 16:14:19 +0800
+Subject: [PATCH 017/140] Revert JDK-8202579: Revisit VM_Version and
+ VM_Version_ext for overlap and consolidation
+
+---
+ .../cpu/riscv/vm_version_ext_riscv.cpp        | 87 +++++++++++++++++++
+ .../cpu/riscv/vm_version_ext_riscv.hpp        | 55 ++++++++++++
+ src/hotspot/cpu/riscv/vm_version_riscv.cpp    | 14 ---
+ 3 files changed, 142 insertions(+), 14 deletions(-)
+ create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
+ create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
+
+diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
+new file mode 100644
+index 00000000000..6bdce51506e
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
+@@ -0,0 +1,87 @@
++/*
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "memory/allocation.hpp"
++#include "memory/allocation.inline.hpp"
++#include "runtime/os.inline.hpp"
++#include "vm_version_ext_riscv.hpp"
++
++// VM_Version_Ext statics
++int VM_Version_Ext::_no_of_threads = 0;
++int VM_Version_Ext::_no_of_cores = 0;
++int VM_Version_Ext::_no_of_sockets = 0;
++bool VM_Version_Ext::_initialized = false;
++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
++
++void VM_Version_Ext::initialize_cpu_information(void) {
++  // do nothing if cpu info has been initialized
++  if (_initialized) {
++    return;
++  }
++
++  _no_of_cores  = os::processor_count();
++  _no_of_threads = _no_of_cores;
++  _no_of_sockets = _no_of_cores;
++  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
++  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
++  _initialized = true;
++}
++
++int VM_Version_Ext::number_of_threads(void) {
++  initialize_cpu_information();
++  return _no_of_threads;
++}
++
++int VM_Version_Ext::number_of_cores(void) {
++  initialize_cpu_information();
++  return _no_of_cores;
++}
++
++int VM_Version_Ext::number_of_sockets(void) {
++  initialize_cpu_information();
++  return _no_of_sockets;
++}
++
++const char* VM_Version_Ext::cpu_name(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
++  return tmp;
++}
++
++const char* VM_Version_Ext::cpu_description(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
++  return tmp;
++}
+diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
+new file mode 100644
+index 00000000000..711e4aeaf68
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
++#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
++
++#include "runtime/vm_version.hpp"
++#include "utilities/macros.hpp"
++
++class VM_Version_Ext : public VM_Version {
++ private:
++  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
++  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
++
++  static int               _no_of_threads;
++  static int               _no_of_cores;
++  static int               _no_of_sockets;
++  static bool              _initialized;
++  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
++  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
++
++ public:
++  static int number_of_threads(void);
++  static int number_of_cores(void);
++  static int number_of_sockets(void);
++
++  static const char* cpu_name(void);
++  static const char* cpu_description(void);
++  static void initialize_cpu_information(void);
++
++};
++
++#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+index 2c15a834542..dd65f32277f 100644
+--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+@@ -210,17 +210,3 @@ void VM_Version::c2_initialize() {
+   }
+ }
+ #endif // COMPILER2
+-
+-void VM_Version::initialize_cpu_information(void) {
+-  // do nothing if cpu info has been initialized
+-  if (_initialized) {
+-    return;
+-  }
+-
+-  _no_of_cores  = os::processor_count();
+-  _no_of_threads = _no_of_cores;
+-  _no_of_sockets = _no_of_cores;
+-  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
+-  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
+-  _initialized = true;
+-}
+
+From 0cfdbd8595c710b71be008bb531b59acf9c4b016 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 4 Apr 2023 17:16:05 +0800
+Subject: [PATCH 018/140] Revert JDK-8191278: MappedByteBuffer bulk access
+ memory failures are not handled gracefully
+
+---
+ src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 19 ++-----------------
+ 1 file changed, 2 insertions(+), 17 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+index 39416441bdf..8392b768847 100644
+--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+@@ -1049,12 +1049,7 @@ class StubGenerator: public StubCodeGenerator {
+       __ push_reg(RegSet::of(d, count), sp);
+     }
+ 
+-    {
+-      // UnsafeCopyMemory page error: continue after ucm
+-      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
+-      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
+-      copy_memory(aligned, s, d, count, t0, size);
+-    }
++    copy_memory(aligned, s, d, count, t0, size);
+ 
+     if (is_oop) {
+       __ pop_reg(RegSet::of(d, count), sp);
+@@ -1122,12 +1117,7 @@ class StubGenerator: public StubCodeGenerator {
+       __ push_reg(RegSet::of(d, count), sp);
+     }
+ 
+-    {
+-      // UnsafeCopyMemory page error: continue after ucm
+-      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
+-      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
+-      copy_memory(aligned, s, d, count, t0, -size);
+-    }
++    copy_memory(aligned, s, d, count, t0, -size);
+ 
+     if (is_oop) {
+       __ pop_reg(RegSet::of(d, count), sp);
+@@ -3734,11 +3724,6 @@ class StubGenerator: public StubCodeGenerator {
+   ~StubGenerator() {}
+ }; // end class declaration
+ 
+-#define UCM_TABLE_MAX_ENTRIES 8
+ void StubGenerator_generate(CodeBuffer* code, bool all) {
+-  if (UnsafeCopyMemory::_table == NULL) {
+-    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
+-  }
+-
+   StubGenerator g(code, all);
+ }
+
+From dd6a7c520a5adeef5b6686c161554adcba61113f Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 15:55:09 +0800
+Subject: [PATCH 019/140] Revert JDK-8282085: The REGISTER_DEFINITION macro is
+ useless after JDK-8269122
+
+---
+ .../cpu/riscv/register_definitions_riscv.cpp  | 192 ++++++++++++++++++
+ 1 file changed, 192 insertions(+)
+ create mode 100644 src/hotspot/cpu/riscv/register_definitions_riscv.cpp
+
+diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
+new file mode 100644
+index 00000000000..583f67573ca
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
+@@ -0,0 +1,192 @@
++/*
++ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/register.hpp"
++#include "interp_masm_riscv.hpp"
++#include "register_riscv.hpp"
++
++REGISTER_DEFINITION(Register, noreg);
++
++REGISTER_DEFINITION(Register, x0);
++REGISTER_DEFINITION(Register, x1);
++REGISTER_DEFINITION(Register, x2);
++REGISTER_DEFINITION(Register, x3);
++REGISTER_DEFINITION(Register, x4);
++REGISTER_DEFINITION(Register, x5);
++REGISTER_DEFINITION(Register, x6);
++REGISTER_DEFINITION(Register, x7);
++REGISTER_DEFINITION(Register, x8);
++REGISTER_DEFINITION(Register, x9);
++REGISTER_DEFINITION(Register, x10);
++REGISTER_DEFINITION(Register, x11);
++REGISTER_DEFINITION(Register, x12);
++REGISTER_DEFINITION(Register, x13);
++REGISTER_DEFINITION(Register, x14);
++REGISTER_DEFINITION(Register, x15);
++REGISTER_DEFINITION(Register, x16);
++REGISTER_DEFINITION(Register, x17);
++REGISTER_DEFINITION(Register, x18);
++REGISTER_DEFINITION(Register, x19);
++REGISTER_DEFINITION(Register, x20);
++REGISTER_DEFINITION(Register, x21);
++REGISTER_DEFINITION(Register, x22);
++REGISTER_DEFINITION(Register, x23);
++REGISTER_DEFINITION(Register, x24);
++REGISTER_DEFINITION(Register, x25);
++REGISTER_DEFINITION(Register, x26);
++REGISTER_DEFINITION(Register, x27);
++REGISTER_DEFINITION(Register, x28);
++REGISTER_DEFINITION(Register, x29);
++REGISTER_DEFINITION(Register, x30);
++REGISTER_DEFINITION(Register, x31);
++
++REGISTER_DEFINITION(FloatRegister, fnoreg);
++
++REGISTER_DEFINITION(FloatRegister, f0);
++REGISTER_DEFINITION(FloatRegister, f1);
++REGISTER_DEFINITION(FloatRegister, f2);
++REGISTER_DEFINITION(FloatRegister, f3);
++REGISTER_DEFINITION(FloatRegister, f4);
++REGISTER_DEFINITION(FloatRegister, f5);
++REGISTER_DEFINITION(FloatRegister, f6);
++REGISTER_DEFINITION(FloatRegister, f7);
++REGISTER_DEFINITION(FloatRegister, f8);
++REGISTER_DEFINITION(FloatRegister, f9);
++REGISTER_DEFINITION(FloatRegister, f10);
++REGISTER_DEFINITION(FloatRegister, f11);
++REGISTER_DEFINITION(FloatRegister, f12);
++REGISTER_DEFINITION(FloatRegister, f13);
++REGISTER_DEFINITION(FloatRegister, f14);
++REGISTER_DEFINITION(FloatRegister, f15);
++REGISTER_DEFINITION(FloatRegister, f16);
++REGISTER_DEFINITION(FloatRegister, f17);
++REGISTER_DEFINITION(FloatRegister, f18);
++REGISTER_DEFINITION(FloatRegister, f19);
++REGISTER_DEFINITION(FloatRegister, f20);
++REGISTER_DEFINITION(FloatRegister, f21);
++REGISTER_DEFINITION(FloatRegister, f22);
++REGISTER_DEFINITION(FloatRegister, f23);
++REGISTER_DEFINITION(FloatRegister, f24);
++REGISTER_DEFINITION(FloatRegister, f25);
++REGISTER_DEFINITION(FloatRegister, f26);
++REGISTER_DEFINITION(FloatRegister, f27);
++REGISTER_DEFINITION(FloatRegister, f28);
++REGISTER_DEFINITION(FloatRegister, f29);
++REGISTER_DEFINITION(FloatRegister, f30);
++REGISTER_DEFINITION(FloatRegister, f31);
++
++REGISTER_DEFINITION(VectorRegister, vnoreg);
++
++REGISTER_DEFINITION(VectorRegister, v0);
++REGISTER_DEFINITION(VectorRegister, v1);
++REGISTER_DEFINITION(VectorRegister, v2);
++REGISTER_DEFINITION(VectorRegister, v3);
++REGISTER_DEFINITION(VectorRegister, v4);
++REGISTER_DEFINITION(VectorRegister, v5);
++REGISTER_DEFINITION(VectorRegister, v6);
++REGISTER_DEFINITION(VectorRegister, v7);
++REGISTER_DEFINITION(VectorRegister, v8);
++REGISTER_DEFINITION(VectorRegister, v9);
++REGISTER_DEFINITION(VectorRegister, v10);
++REGISTER_DEFINITION(VectorRegister, v11);
++REGISTER_DEFINITION(VectorRegister, v12);
++REGISTER_DEFINITION(VectorRegister, v13);
++REGISTER_DEFINITION(VectorRegister, v14);
++REGISTER_DEFINITION(VectorRegister, v15);
++REGISTER_DEFINITION(VectorRegister, v16);
++REGISTER_DEFINITION(VectorRegister, v17);
++REGISTER_DEFINITION(VectorRegister, v18);
++REGISTER_DEFINITION(VectorRegister, v19);
++REGISTER_DEFINITION(VectorRegister, v20);
++REGISTER_DEFINITION(VectorRegister, v21);
++REGISTER_DEFINITION(VectorRegister, v22);
++REGISTER_DEFINITION(VectorRegister, v23);
++REGISTER_DEFINITION(VectorRegister, v24);
++REGISTER_DEFINITION(VectorRegister, v25);
++REGISTER_DEFINITION(VectorRegister, v26);
++REGISTER_DEFINITION(VectorRegister, v27);
++REGISTER_DEFINITION(VectorRegister, v28);
++REGISTER_DEFINITION(VectorRegister, v29);
++REGISTER_DEFINITION(VectorRegister, v30);
++REGISTER_DEFINITION(VectorRegister, v31);
++
++REGISTER_DEFINITION(Register, c_rarg0);
++REGISTER_DEFINITION(Register, c_rarg1);
++REGISTER_DEFINITION(Register, c_rarg2);
++REGISTER_DEFINITION(Register, c_rarg3);
++REGISTER_DEFINITION(Register, c_rarg4);
++REGISTER_DEFINITION(Register, c_rarg5);
++REGISTER_DEFINITION(Register, c_rarg6);
++REGISTER_DEFINITION(Register, c_rarg7);
++
++REGISTER_DEFINITION(FloatRegister, c_farg0);
++REGISTER_DEFINITION(FloatRegister, c_farg1);
++REGISTER_DEFINITION(FloatRegister, c_farg2);
++REGISTER_DEFINITION(FloatRegister, c_farg3);
++REGISTER_DEFINITION(FloatRegister, c_farg4);
++REGISTER_DEFINITION(FloatRegister, c_farg5);
++REGISTER_DEFINITION(FloatRegister, c_farg6);
++REGISTER_DEFINITION(FloatRegister, c_farg7);
++
++REGISTER_DEFINITION(Register, j_rarg0);
++REGISTER_DEFINITION(Register, j_rarg1);
++REGISTER_DEFINITION(Register, j_rarg2);
++REGISTER_DEFINITION(Register, j_rarg3);
++REGISTER_DEFINITION(Register, j_rarg4);
++REGISTER_DEFINITION(Register, j_rarg5);
++REGISTER_DEFINITION(Register, j_rarg6);
++REGISTER_DEFINITION(Register, j_rarg7);
++
++REGISTER_DEFINITION(FloatRegister, j_farg0);
++REGISTER_DEFINITION(FloatRegister, j_farg1);
++REGISTER_DEFINITION(FloatRegister, j_farg2);
++REGISTER_DEFINITION(FloatRegister, j_farg3);
++REGISTER_DEFINITION(FloatRegister, j_farg4);
++REGISTER_DEFINITION(FloatRegister, j_farg5);
++REGISTER_DEFINITION(FloatRegister, j_farg6);
++REGISTER_DEFINITION(FloatRegister, j_farg7);
++
++REGISTER_DEFINITION(Register, zr);
++REGISTER_DEFINITION(Register, gp);
++REGISTER_DEFINITION(Register, tp);
++REGISTER_DEFINITION(Register, xmethod);
++REGISTER_DEFINITION(Register, ra);
++REGISTER_DEFINITION(Register, sp);
++REGISTER_DEFINITION(Register, fp);
++REGISTER_DEFINITION(Register, xheapbase);
++REGISTER_DEFINITION(Register, xcpool);
++REGISTER_DEFINITION(Register, xmonitors);
++REGISTER_DEFINITION(Register, xlocals);
++REGISTER_DEFINITION(Register, xthread);
++REGISTER_DEFINITION(Register, xbcp);
++REGISTER_DEFINITION(Register, xdispatch);
++REGISTER_DEFINITION(Register, esp);
++
++REGISTER_DEFINITION(Register, t0);
++REGISTER_DEFINITION(Register, t1);
++REGISTER_DEFINITION(Register, t2);
+
+From 561261b051d88ddb0053733f03cbefc75dedcea8 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 16:41:03 +0800
+Subject: [PATCH 020/140] Revert JDK-7175279: Don't use x87 FPU on x86-64
+
+---
+ src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+index 0e383a3c139..977563fe5f4 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+@@ -2019,6 +2019,18 @@ address LIR_Assembler::int_constant(jlong n) {
+   }
+ }
+ 
++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
++
++void LIR_Assembler::reset_FPU() { Unimplemented(); }
++
++void LIR_Assembler::fpop() { Unimplemented(); }
++
++void LIR_Assembler::fxch(int i) { Unimplemented(); }
++
++void LIR_Assembler::fld(int i) { Unimplemented(); }
++
++void LIR_Assembler::ffree(int i) { Unimplemented(); }
++
+ void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
+   __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */,
+              Assembler::rl /* release */, t0, true /* result as bool */);
+
+From ff4e1443fd000208714b506d52c0fab1c91e4ac8 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 16:41:15 +0800
+Subject: [PATCH 021/140] Revert JDK-8255909: Remove unused delayed_value
+ methods
+
+---
+ src/hotspot/cpu/riscv/assembler_riscv.hpp      |  7 +++++++
+ src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 16 ++++++++++++++++
+ src/hotspot/cpu/riscv/macroAssembler_riscv.hpp |  4 ++++
+ 3 files changed, 27 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
+index 4923962a496..44e8d4b4ff1 100644
+--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
+@@ -3027,6 +3027,13 @@ enum Nf {
+   Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) {
+   }
+ 
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset) {
++    ShouldNotCallThis();
++    return RegisterOrConstant();
++  }
++
+   // Stack overflow checking
+   virtual void bang_stack_with_offset(int offset) { Unimplemented(); }
+ 
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index 269d76ba69e..878957cbede 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -191,6 +191,22 @@ void MacroAssembler::call_VM(Register oop_result,
+ void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
+ void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
+ 
++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
++                                                      Register tmp,
++                                                      int offset) {
++  intptr_t value = *delayed_value_addr;
++  if (value != 0)
++    return RegisterOrConstant(value + offset);
++
++  // load indirectly to solve generation ordering problem
++  ld(tmp, ExternalAddress((address) delayed_value_addr));
++
++  if (offset != 0)
++    add(tmp, tmp, offset);
++
++  return RegisterOrConstant(tmp);
++}
++
+ // Calls to C land
+ //
+ // When entering C land, the fp, & esp of the last Java frame have to be recorded
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index b59bdadb8bf..f23f7e7d1e6 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -625,6 +625,10 @@ class MacroAssembler: public Assembler {
+ 
+   void reserved_stack_check();
+ 
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset);
++
+   void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype);
+   void read_polling_page(Register r, address page, relocInfo::relocType rtype);
+   void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
+
+From afe35a3fdc705645bfe2a2e797a95ce1d5203872 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 16:51:39 +0800
+Subject: [PATCH 022/140] Revert JDK-8263679: C1: Remove vtable call
+
+---
+ src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+index 977563fe5f4..a0ecc63d851 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+@@ -1382,6 +1382,11 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
+   add_call_info(code_offset(), op->info());
+ }
+ 
++/* Currently, vtable-dispatch is only enabled for sparc platforms */
++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
++  ShouldNotReachHere();
++}
++
+ void LIR_Assembler::emit_static_call_stub() {
+   address call_pc = __ pc();
+   assert((__ offset() % 4) == 0, "bad alignment");
+
+From 655b34c00ec5ff6fa7e82de96a78a0c58ba91985 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 16:55:57 +0800
+Subject: [PATCH 023/140] Revert JDK-8264063: Outer Safepoint poll load should
+ not reference the head of inner strip mined loop.
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 8c7a8ede815..fcddf752564 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -952,6 +952,20 @@ int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
+   return align_up(current_offset, alignment_required()) - current_offset;
+ }
+ 
++// Indicate if the safepoint node needs the polling page as an input
++
++// the shared code plants the oop data at the start of the generated
++// code for the safepoint node and that needs ot be at the load
++// instruction itself. so we cannot plant a mov of the safepoint poll
++// address followed by a load. setting this to true means the mov is
++// scheduled as a prior instruction. that's better for scheduling
++// anyway.
++
++bool SafePointNode::needs_polling_address_input()
++{
++  return true;
++}
++
+ //=============================================================================
+ 
+ #ifndef PRODUCT
+
+From 4a6f7dafdb4e0cf054b7867de60f789d4ca1d9f3 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 17:26:29 +0800
+Subject: [PATCH 024/140] Revert: JDK-8266810: Move trivial Matcher code to
+ cpu-specific header files JDK-8254966: Remove unused code from Matcher
+
+---
+ src/hotspot/cpu/riscv/matcher_riscv.hpp | 129 ------------------------
+ src/hotspot/cpu/riscv/riscv.ad          | 108 +++++++++++++++++++-
+ 2 files changed, 107 insertions(+), 130 deletions(-)
+ delete mode 100644 src/hotspot/cpu/riscv/matcher_riscv.hpp
+
+diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp
+deleted file mode 100644
+index 4c7fabd7240..00000000000
+--- a/src/hotspot/cpu/riscv/matcher_riscv.hpp
++++ /dev/null
+@@ -1,129 +0,0 @@
+-/*
+- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#ifndef CPU_RISCV_MATCHER_RISCV_HPP
+-#define CPU_RISCV_MATCHER_RISCV_HPP
+-
+-  // Defined within class Matcher
+-
+-  // false => size gets scaled to BytesPerLong, ok.
+-  static const bool init_array_count_is_in_bytes = false;
+-
+-  // riscv doesn't support misaligned vectors store/load on JDK11.
+-  static constexpr bool misaligned_vectors_ok() {
+-    return false;
+-  }
+-
+-  // Whether code generation need accurate ConvI2L types.
+-  static const bool convi2l_type_required = false;
+-
+-  // Does the CPU require late expand (see block.cpp for description of late expand)?
+-  static const bool require_postalloc_expand = false;
+-
+-  // Do we need to mask the count passed to shift instructions or does
+-  // the cpu only look at the lower 5/6 bits anyway?
+-  static const bool need_masked_shift_count = false;
+-
+-  static constexpr bool isSimpleConstant64(jlong value) {
+-    // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
+-    // Probably always true, even if a temp register is required.
+-    return true;
+-  }
+-
+-  // Use conditional move (CMOVL)
+-  static constexpr int long_cmove_cost() {
+-    // long cmoves are no more expensive than int cmoves
+-    return 0;
+-  }
+-
+-  static constexpr int float_cmove_cost() {
+-    // float cmoves are no more expensive than int cmoves
+-    return 0;
+-  }
+-
+-  // This affects two different things:
+-  //  - how Decode nodes are matched
+-  //  - how ImplicitNullCheck opportunities are recognized
+-  // If true, the matcher will try to remove all Decodes and match them
+-  // (as operands) into nodes. NullChecks are not prepared to deal with
+-  // Decodes by final_graph_reshaping().
+-  // If false, final_graph_reshaping() forces the decode behind the Cmp
+-  // for a NullCheck. The matcher matches the Decode node into a register.
+-  // Implicit_null_check optimization moves the Decode along with the
+-  // memory operation back up before the NullCheck.
+-  static bool narrow_oop_use_complex_address() {
+-    return CompressedOops::shift() == 0;
+-  }
+-
+-  static bool narrow_klass_use_complex_address() {
+-    return false;
+-  }
+-
+-  static bool const_oop_prefer_decode() {
+-    // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
+-    return CompressedOops::base() == NULL;
+-  }
+-
+-  static bool const_klass_prefer_decode() {
+-    // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
+-    return CompressedKlassPointers::base() == NULL;
+-  }
+-
+-  // Is it better to copy float constants, or load them directly from
+-  // memory?  Intel can load a float constant from a direct address,
+-  // requiring no extra registers.  Most RISCs will have to materialize
+-  // an address into a register first, so they would do better to copy
+-  // the constant from stack.
+-  static const bool rematerialize_float_constants = false;
+-
+-  // If CPU can load and store mis-aligned doubles directly then no
+-  // fixup is needed.  Else we split the double into 2 integer pieces
+-  // and move it piece-by-piece.  Only happens when passing doubles into
+-  // C code as the Java calling convention forces doubles to be aligned.
+-  static const bool misaligned_doubles_ok = true;
+-
+-  // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
+-  static const bool strict_fp_requires_explicit_rounding = false;
+-
+-  // Are floats converted to double when stored to stack during
+-  // deoptimization?
+-  static constexpr bool float_in_double() { return false; }
+-
+-  // Do ints take an entire long register or just half?
+-  // The relevant question is how the int is callee-saved:
+-  // the whole long is written but de-opt'ing will have to extract
+-  // the relevant 32 bits.
+-  static const bool int_in_long = true;
+-
+-  // true means we have fast l2f convers
+-  // false means that conversion is done by runtime call
+-  static constexpr bool convL2FSupported(void) {
+-      return true;
+-  }
+-
+-  // Implements a variant of EncodeISOArrayNode that encode ASCII only
+-  static const bool supports_encode_ascii_array = false;
+-
+-#endif // CPU_RISCV_MATCHER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index fcddf752564..a9e5f2e6841 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -330,7 +330,9 @@ alloc_class chunk2(RFLAGS);
+ // Several register classes are automatically defined based upon information in
+ // this architecture description.
+ // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
+-// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
++// 2) reg_class compiler_method_reg        ( /* as def'd in frame section */ )
++// 2) reg_class interpreter_method_reg     ( /* as def'd in frame section */ )
++// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+ //
+ 
+ // Class for all 32 bit general purpose registers
+@@ -1548,6 +1550,17 @@ bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
+   return (-4096 <= offs && offs < 4096);
+ }
+ 
++const bool Matcher::isSimpleConstant64(jlong value) {
++  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
++  // Probably always true, even if a temp register is required.
++  return true;
++}
++
++// true just means we have fast l2f conversion
++const bool Matcher::convL2FSupported(void) {
++  return true;
++}
++
+ // Vector width in bytes.
+ const int Matcher::vector_width_in_bytes(BasicType bt) {
+   return 0;
+@@ -1567,6 +1580,94 @@ const uint Matcher::vector_ideal_reg(int len) {
+   return 0;
+ }
+ 
++// RISC-V supports misaligned vectors store/load.
++const bool Matcher::misaligned_vectors_ok() {
++  return true;
++}
++
++// false => size gets scaled to BytesPerLong, ok.
++const bool Matcher::init_array_count_is_in_bytes = false;
++
++// Use conditional move (CMOVL)
++const int Matcher::long_cmove_cost() {
++  // long cmoves are no more expensive than int cmoves
++  return 0;
++}
++
++const int Matcher::float_cmove_cost() {
++  // float cmoves are no more expensive than int cmoves
++  return 0;
++}
++
++// Does the CPU require late expand (see block.cpp for description of late expand)?
++const bool Matcher::require_postalloc_expand = false;
++
++// Do we need to mask the count passed to shift instructions or does
++// the cpu only look at the lower 5/6 bits anyway?
++const bool Matcher::need_masked_shift_count = false;
++
++// This affects two different things:
++//  - how Decode nodes are matched
++//  - how ImplicitNullCheck opportunities are recognized
++// If true, the matcher will try to remove all Decodes and match them
++// (as operands) into nodes. NullChecks are not prepared to deal with
++// Decodes by final_graph_reshaping().
++// If false, final_graph_reshaping() forces the decode behind the Cmp
++// for a NullCheck. The matcher matches the Decode node into a register.
++// Implicit_null_check optimization moves the Decode along with the
++// memory operation back up before the NullCheck.
++bool Matcher::narrow_oop_use_complex_address() {
++  return Universe::narrow_oop_shift() == 0;
++}
++
++bool Matcher::narrow_klass_use_complex_address() {
++// TODO
++// decide whether we need to set this to true
++  return false;
++}
++
++bool Matcher::const_oop_prefer_decode() {
++  // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
++  return Universe::narrow_oop_base() == NULL;
++}
++
++bool Matcher::const_klass_prefer_decode() {
++  // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
++  return Universe::narrow_klass_base() == NULL;
++}
++
++// Is it better to copy float constants, or load them directly from
++// memory?  Intel can load a float constant from a direct address,
++// requiring no extra registers.  Most RISCs will have to materialize
++// an address into a register first, so they would do better to copy
++// the constant from stack.
++const bool Matcher::rematerialize_float_constants = false;
++
++// If CPU can load and store mis-aligned doubles directly then no
++// fixup is needed.  Else we split the double into 2 integer pieces
++// and move it piece-by-piece.  Only happens when passing doubles into
++// C code as the Java calling convention forces doubles to be aligned.
++const bool Matcher::misaligned_doubles_ok = true;
++
++// No-op on amd64
++void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
++  Unimplemented();
++}
++
++// Advertise here if the CPU requires explicit rounding operations to
++// implement the UseStrictFP mode.
++const bool Matcher::strict_fp_requires_explicit_rounding = false;
++
++// Are floats converted to double when stored to stack during
++// deoptimization?
++bool Matcher::float_in_double() { return false; }
++
++// Do ints take an entire long register or just half?
++// The relevant question is how the int is callee-saved:
++// the whole long is written but de-opt'ing will have to extract
++// the relevant 32 bits.
++const bool Matcher::int_in_long = true;
++
+ // Return whether or not this register is ever used as an argument.
+ // This function is used on startup to build the trampoline stubs in
+ // generateOptoStub.  Registers not mentioned will be killed by the VM
+@@ -1671,6 +1772,8 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
+   return true;
+ }
+ 
++const bool Matcher::convi2l_type_required = false;
++
+ // Should the Matcher clone input 'm' of node 'n'?
+ bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
+   assert_cond(m != NULL);
+@@ -2250,6 +2353,9 @@ frame %{
+   // Inline Cache Register or methodOop for I2C.
+   inline_cache_reg(R31);
+ 
++  // Method Oop Register when calling interpreter.
++  interpreter_method_oop_reg(R31);
++
+   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
+   cisc_spilling_operand_name(indOffset);
+ 
+
+From 4b0f20882cd9b5e5da92d61c2fa02e0cbea0ef0c Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 17:30:42 +0800
+Subject: [PATCH 025/140] Revert JDK-8256238: Remove
+ Matcher::pass_original_key_for_aes
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index a9e5f2e6841..0d1afd5584a 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1580,6 +1580,11 @@ const uint Matcher::vector_ideal_reg(int len) {
+   return 0;
+ }
+ 
++// AES support not yet implemented
++const bool Matcher::pass_original_key_for_aes() {
++  return false;
++}
++
+ // RISC-V supports misaligned vectors store/load.
+ const bool Matcher::misaligned_vectors_ok() {
+   return true;
+
+From 36d7ecedbcd95911d1b355bbab3e8fdf81b36e7d Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 17:42:37 +0800
+Subject: [PATCH 026/140] Revert JDK-8242492: C2: Remove
+ Matcher::vector_shift_count_ideal_reg()
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 0d1afd5584a..c10e91633a5 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1580,6 +1580,11 @@ const uint Matcher::vector_ideal_reg(int len) {
+   return 0;
+ }
+ 
++const uint Matcher::vector_shift_count_ideal_reg(int size) {
++  fatal("vector shift is not supported");
++  return Node::NotAMachineReg;
++}
++
+ // AES support not yet implemented
+ const bool Matcher::pass_original_key_for_aes() {
+   return false;
+
+From b78e448a460fcdc66553e66342e93e5ac87c0c61 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 17:47:13 +0800
+Subject: [PATCH 027/140] Revert JDK-8266937: Remove Compile::reshape_address
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index c10e91633a5..2c5ec0451b8 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1801,6 +1801,9 @@ bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack,
+   return clone_base_plus_offset_address(m, mstack, address_visited);
+ }
+ 
++void Compile::reshape_address(AddPNode* addp) {
++}
++
+ %}
+ 
+ 
+
+From cd34a5ce5d120cdac939217976d1e7b7e98bf654 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 17:49:09 +0800
+Subject: [PATCH 028/140] Revert JDK-8272771: frame::pd_ps() is not implemented
+ on any platform
+
+---
+ src/hotspot/cpu/riscv/frame_riscv.cpp | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
+index 8e7babe2c61..8e4f20fe561 100644
+--- a/src/hotspot/cpu/riscv/frame_riscv.cpp
++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
+@@ -683,6 +683,7 @@ frame::frame(void* ptr_sp, void* ptr_fp, void* pc) {
+   init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc);
+ }
+ 
++void frame::pd_ps() {}
+ #endif
+ 
+ void JavaFrameAnchor::make_walkable(JavaThread* thread) {
+
+From bdb16daf6d809d0c38256be99ecbe922d24b889b Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 17:56:27 +0800
+Subject: [PATCH 029/140] Revert JDK-8268858: Determine register pressure
+ automatically by the number of available registers for allocation
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 2c5ec0451b8..a6aa52de29e 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1527,6 +1527,10 @@ const bool Matcher::has_predicated_vectors(void) {
+   return false;
+ }
+ 
++const int Matcher::float_pressure(int default_pressure_threshold) {
++  return default_pressure_threshold;
++}
++
+ // Is this branch offset short enough that a short branch can be used?
+ //
+ // NOTE: If the platform does not provide any short branch variants, then
+
+From bbaa7a97b5d8110ead9dc44f31e2c5fe3bcd83d5 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 17:58:16 +0800
+Subject: [PATCH 030/140] Revert JDK-8253040: Remove unused
+ Matcher::regnum_to_fpu_offset()
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index a6aa52de29e..2d847cb6454 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1531,6 +1531,12 @@ const int Matcher::float_pressure(int default_pressure_threshold) {
+   return default_pressure_threshold;
+ }
+ 
++int Matcher::regnum_to_fpu_offset(int regnum)
++{
++  Unimplemented();
++  return 0;
++}
++
+ // Is this branch offset short enough that a short branch can be used?
+ //
+ // NOTE: If the platform does not provide any short branch variants, then
+
+From ce9ad0af72e405153534369bff1b1725697f3e40 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 18:03:23 +0800
+Subject: [PATCH 031/140] Revert JDK-8254084: Remove
+ TemplateTable::pd_initialize
+
+---
+ src/hotspot/cpu/riscv/templateTable_riscv.cpp | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+index 4e388ac4eaa..c9d399ccdaf 100644
+--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+@@ -48,6 +48,12 @@
+ 
+ #define __ _masm->
+ 
++// Platform-dependent initialization
++
++void TemplateTable::pd_initialize() {
++  // No RISC-V specific initialization
++}
++
+ // Address computation: local variables
+ 
+ static inline Address iaddress(int n) {
+
+From 49429187846e6f2b00ab2853e27097eae274a947 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 20:17:07 +0800
+Subject: [PATCH 032/140] Revert JDK-8224815: 8224815: Remove non-GC uses of
+ CollectedHeap::is_in_reserved()
+
+---
+ src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index 878957cbede..cf01d7d74bb 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -1632,7 +1632,7 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
+ #ifdef ASSERT
+     {
+       ThreadInVMfromUnknown tiv;
+-      assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
++      assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
+     }
+ #endif
+     oop_index = oop_recorder()->find_index(obj);
+@@ -2800,7 +2800,7 @@ void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
+     assert (UseCompressedOops, "should only be used for compressed oops");
+     assert (Universe::heap() != NULL, "java heap should be initialized");
+     assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+-    assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
++    assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
+   }
+ #endif
+   int oop_index = oop_recorder()->find_index(obj);
+@@ -2815,7 +2815,7 @@ void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
+   assert (UseCompressedClassPointers, "should only be used for compressed headers");
+   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+   int index = oop_recorder()->find_index(k);
+-  assert(!Universe::heap()->is_in(k), "should not be an oop");
++  assert(!Universe::heap()->is_in_reserved(k), "should not be an oop");
+ 
+   InstructionMark im(this);
+   RelocationHolder rspec = metadata_Relocation::spec(index);
+
+From a71fabb1ff05db9955557a888be6cd1b5f87deea Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 21:14:30 +0800
+Subject: [PATCH 033/140] Revert JDK-8253540: InterpreterRuntime::monitorexit
+ should be a JRT_LEAF function
+
+---
+ src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+index 8adc7b1320d..48957803fdc 100644
+--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+@@ -839,7 +839,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
+   assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
+ 
+   if (UseHeavyMonitors) {
+-    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++            lock_reg);
+   } else {
+     Label done;
+ 
+@@ -871,7 +873,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
+ 
+     // Call the runtime routine for slow case.
+     sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
+-    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++            lock_reg);
+ 
+     bind(done);
+ 
+
+From a0b18eea3c83ef8f1de2c1b3cd55452f0f6b9af2 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Fri, 7 Apr 2023 12:51:33 +0800
+Subject: [PATCH 034/140] Revert JDK-8278387: riscv: Implement UseHeavyMonitors
+ consistently && JDK-8279826: riscv: Preserve result in native wrapper with
+ +UseHeavyMonitors
+
+---
+ .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |  8 +-
+ src/hotspot/cpu/riscv/riscv.ad                | 92 +++++++++----------
+ src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 85 ++++++++---------
+ 3 files changed, 80 insertions(+), 105 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+index a0ecc63d851..dd657963438 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+@@ -367,11 +367,7 @@ int LIR_Assembler::emit_unwind_handler() {
+   if (method()->is_synchronized()) {
+     monitor_address(0, FrameMap::r10_opr);
+     stub = new MonitorExitStub(FrameMap::r10_opr, true, 0);
+-    if (UseHeavyMonitors) {
+-      __ j(*stub->entry());
+-    } else {
+-      __ unlock_object(x15, x14, x10, *stub->entry());
+-    }
++    __ unlock_object(x15, x14, x10, *stub->entry());
+     __ bind(*stub->continuation());
+   }
+ 
+@@ -1512,7 +1508,7 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
+   Register obj = op->obj_opr()->as_register();  // may not be an oop
+   Register hdr = op->hdr_opr()->as_register();
+   Register lock = op->lock_opr()->as_register();
+-  if (UseHeavyMonitors) {
++  if (!UseFastLocking) {
+     __ j(*op->stub()->entry());
+   } else if (op->code() == lir_lock) {
+     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 2d847cb6454..29027d594a0 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -2109,40 +2109,36 @@ encode %{
+     __ andi(t0, disp_hdr, markOopDesc::monitor_value);
+     __ bnez(t0, object_has_monitor);
+ 
+-    if (!UseHeavyMonitors) {
+-      // Set tmp to be (markWord of object | UNLOCK_VALUE).
+-      __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
+-
+-      // Initialize the box. (Must happen before we update the object mark!)
+-      __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+-
+-      // Compare object markWord with an unlocked value (tmp) and if
+-      // equal exchange the stack address of our box with object markWord.
+-      // On failure disp_hdr contains the possibly locked markWord.
+-      __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
+-                 Assembler::rl, /*result*/disp_hdr);
+-      __ mv(flag, zr);
+-      __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
+-
+-      assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+-
+-      // If the compare-and-exchange succeeded, then we found an unlocked
+-      // object, will have now locked it will continue at label cont
+-      // We did not see an unlocked object so try the fast recursive case.
+-
+-      // Check if the owner is self by comparing the value in the
+-      // markWord of object (disp_hdr) with the stack pointer.
+-      __ sub(disp_hdr, disp_hdr, sp);
+-      __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
+-      // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
+-      // hence we can store 0 as the displaced header in the box, which indicates that it is a
+-      // recursive lock.
+-      __ andr(tmp/*==0?*/, disp_hdr, tmp);
+-      __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+-      __ mv(flag, tmp); // we can use the value of tmp as the result here
+-    } else {
+-      __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path
+-    }
++    // Set tmp to be (markWord of object | UNLOCK_VALUE).
++    __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
++
++    // Initialize the box. (Must happen before we update the object mark!)
++    __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++
++    // Compare object markWord with an unlocked value (tmp) and if
++    // equal exchange the stack address of our box with object markWord.
++    // On failure disp_hdr contains the possibly locked markWord.
++    __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
++               Assembler::rl, /*result*/disp_hdr);
++    __ mv(flag, zr);
++    __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
++
++    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
++
++    // If the compare-and-exchange succeeded, then we found an unlocked
++    // object, will have now locked it will continue at label cont
++    // We did not see an unlocked object so try the fast recursive case.
++
++    // Check if the owner is self by comparing the value in the
++    // markWord of object (disp_hdr) with the stack pointer.
++    __ sub(disp_hdr, disp_hdr, sp);
++    __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
++    // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
++    // hence we can store 0 as the displaced header in the box, which indicates that it is a
++    // recursive lock.
++    __ andr(tmp/*==0?*/, disp_hdr, tmp);
++    __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++    __ mv(flag, tmp); // we can use the value of tmp as the result here
+ 
+     __ j(cont);
+ 
+@@ -2189,31 +2185,25 @@ encode %{
+ 
+     assert_different_registers(oop, box, tmp, disp_hdr, flag);
+ 
+-    if (!UseHeavyMonitors) {
+-      // Find the lock address and load the displaced header from the stack.
+-      __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++    // Find the lock address and load the displaced header from the stack.
++    __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+ 
+-      // If the displaced header is 0, we have a recursive unlock.
+-      __ mv(flag, disp_hdr);
+-      __ beqz(disp_hdr, cont);
+-    }
++    // If the displaced header is 0, we have a recursive unlock.
++    __ mv(flag, disp_hdr);
++    __ beqz(disp_hdr, cont);
+ 
+     // Handle existing monitor.
+     __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
+     __ andi(t0, disp_hdr, markOopDesc::monitor_value);
+     __ bnez(t0, object_has_monitor);
+ 
+-    if (!UseHeavyMonitors) {
+-      // Check if it is still a light weight lock, this is true if we
+-      // see the stack address of the basicLock in the markWord of the
+-      // object.
++    // Check if it is still a light weight lock, this is true if we
++    // see the stack address of the basicLock in the markWord of the
++    // object.
+ 
+-      __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
+-                 Assembler::rl, /*result*/tmp);
+-      __ xorr(flag, box, tmp); // box == tmp if cas succeeds
+-    } else {
+-      __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path
+-    }
++    __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
++               Assembler::rl, /*result*/tmp);
++    __ xorr(flag, box, tmp); // box == tmp if cas succeeds
+     __ j(cont);
+ 
+     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+index 21aa3b58c09..5203200b068 100644
+--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -1488,39 +1488,35 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+     // Load the oop from the handle
+     __ ld(obj_reg, Address(oop_handle_reg, 0));
+ 
+-    if (!UseHeavyMonitors) {
+-      // Load (object->mark() | 1) into swap_reg % x10
+-      __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+-      __ ori(swap_reg, t0, 1);
+-
+-      // Save (object->mark() | 1) into BasicLock's displaced header
+-      __ sd(swap_reg, Address(lock_reg, mark_word_offset));
+-
+-      // src -> dest if dest == x10 else x10 <- dest
+-      {
+-        Label here;
+-        __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
+-      }
++    // Load (object->mark() | 1) into swap_reg % x10
++    __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++    __ ori(swap_reg, t0, 1);
+ 
+-      // Test if the oopMark is an obvious stack pointer, i.e.,
+-      //  1) (mark & 3) == 0, and
+-      //  2) sp <= mark < mark + os::pagesize()
+-      // These 3 tests can be done by evaluating the following
+-      // expression: ((mark - sp) & (3 - os::vm_page_size())),
+-      // assuming both stack pointer and pagesize have their
+-      // least significant 2 bits clear.
+-      // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
+-
+-      __ sub(swap_reg, swap_reg, sp);
+-      __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
+-
+-      // Save the test result, for recursive case, the result is zero
+-      __ sd(swap_reg, Address(lock_reg, mark_word_offset));
+-      __ bnez(swap_reg, slow_path_lock);
+-    } else {
+-      __ j(slow_path_lock);
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    __ sd(swap_reg, Address(lock_reg, mark_word_offset));
++
++    // src -> dest if dest == x10 else x10 <- dest
++    {
++      Label here;
++      __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
+     }
+ 
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) sp <= mark < mark + os::pagesize()
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
++
++    __ sub(swap_reg, swap_reg, sp);
++    __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
++
++    // Save the test result, for recursive case, the result is zero
++    __ sd(swap_reg, Address(lock_reg, mark_word_offset));
++    __ bnez(swap_reg, slow_path_lock);
++
+     // Slow path will re-enter here
+     __ bind(lock_done);
+   }
+@@ -1608,31 +1604,24 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+ 
+     Label done;
+ 
+-    if (!UseHeavyMonitors) {
+-      // Simple recursive lock?
+-      __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+-      __ beqz(t0, done);
+-    }
+-
++    // Simple recursive lock?
++    __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
++    __ beqz(t0, done);
+ 
+     // Must save x10 if if it is live now because cmpxchg must use it
+     if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+       save_native_result(masm, ret_type, stack_slots);
+     }
+ 
+-    if (!UseHeavyMonitors) {
+-      // get address of the stack lock
+-      __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+-      //  get old displaced header
+-      __ ld(old_hdr, Address(x10, 0));
++    // get address of the stack lock
++    __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
++    //  get old displaced header
++    __ ld(old_hdr, Address(x10, 0));
+ 
+-      // Atomic swap old header if oop still contains the stack lock
+-      Label succeed;
+-      __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
+-      __ bind(succeed);
+-    } else {
+-      __ j(slow_path_unlock);
+-    }
++    // Atomic swap old header if oop still contains the stack lock
++    Label succeed;
++    __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
++    __ bind(succeed);
+ 
+     // slow path re-enters here
+     __ bind(unlock_done);
+
+From 1e844b8019cb3516c0843826de2bd3fcd2222f41 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 4 Apr 2023 16:49:19 +0800
+Subject: [PATCH 035/140] Revert JDK-8258192: Obsolete the CriticalJNINatives
+ flag. CriticalJNINatives is unimplemented() even on AArch64. See
+ https://bugs.openjdk.org/browse/JDK-8254694.
+
+Also following up 8191129: AARCH64: Invalid value passed to critical JNI function
+---
+ src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp                  | 3 ++-
+ src/hotspot/cpu/riscv/vm_version_riscv.cpp                     | 2 ++
+ .../criticalnatives/argumentcorruption/CheckLongArgs.java      | 2 +-
+ .../jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java  | 2 +-
+ 4 files changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+index 5203200b068..f8585afbdc2 100644
+--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -1111,7 +1111,8 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+                                                 int compile_id,
+                                                 BasicType* in_sig_bt,
+                                                 VMRegPair* in_regs,
+-                                                BasicType ret_type) {
++                                                BasicType ret_type,
++                                                address critical_entry) {
+   if (method->is_method_handle_intrinsic()) {
+     vmIntrinsics::ID iid = method->intrinsic_id();
+     intptr_t start = (intptr_t)__ pc();
+diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+index dd65f32277f..c0491d23fa6 100644
+--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+@@ -147,6 +147,8 @@ void VM_Version::initialize() {
+ #ifdef COMPILER2
+   c2_initialize();
+ #endif // COMPILER2
++
++  UNSUPPORTED_OPTION(CriticalJNINatives);
+ }
+ 
+ #ifdef COMPILER2
+diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
+index acb86812d25..2c866f26f08 100644
+--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
+@@ -24,7 +24,7 @@
+ 
+ /* @test
+  * @bug 8167409
+- * @requires (os.arch != "aarch64") & (os.arch != "arm")
++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
+  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs
+  */
+ package compiler.runtime.criticalnatives.argumentcorruption;
+diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
+index eab36f93113..1da369fde23 100644
+--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
+@@ -24,7 +24,7 @@
+ 
+ /* @test
+  * @bug 8167408
+- * @requires (os.arch != "aarch64") & (os.arch != "arm")
++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
+  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp
+  */
+ package compiler.runtime.criticalnatives.lookup;
+
+From 58ad930e78501c6fad024e7ef05066ec19eb6219 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 11 Apr 2023 11:45:04 +0800
+Subject: [PATCH 036/140] 8202976: Add C1 lea patching support for x86 (RISC-V
+ part)
+
+---
+ src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+index dd657963438..46a20a64194 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+@@ -1818,6 +1818,7 @@ void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, C
+     return;
+   }
+ 
++  assert(patch_code == lir_patch_none, "Patch code not supported");
+   LIR_Address* adr = addr->as_address_ptr();
+   Register dst = dest->as_register_lo();
+ 
+
+From 2074b8ec0ea3562f3999b4f4010b3f5b57dbe502 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 11 Apr 2023 12:15:44 +0800
+Subject: [PATCH 037/140] Revert 8232365: Implementation for JEP 363: Remove
+ the Concurrent Mark Sweep (CMS) Garbage Collector
+
+---
+ src/hotspot/cpu/riscv/globals_riscv.hpp |  3 +++
+ src/hotspot/cpu/riscv/riscv.ad          | 27 +++++++++++++++++++++++++
+ 2 files changed, 30 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
+index 845064d6cbc..50bbb6a77b8 100644
+--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
+@@ -63,6 +63,9 @@ define_pd_global(bool, RewriteFrequentPairs, true);
+ 
+ define_pd_global(bool, PreserveFramePointer, false);
+ 
++// GC Ergo Flags
++define_pd_global(uintx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
++
+ define_pd_global(uintx, TypeProfileLevel, 111);
+ 
+ define_pd_global(bool, CompactStrings, true);
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 29027d594a0..386ef731696 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -752,6 +752,9 @@ bool is_CAS(int opcode, bool maybe_volatile);
+ // predicate controlling translation of CompareAndSwapX
+ bool needs_acquiring_load_reserved(const Node *load);
+ 
++// predicate controlling translation of StoreCM
++bool unnecessary_storestore(const Node *storecm);
++
+ // predicate controlling addressing modes
+ bool size_fits_all_mem_uses(AddPNode* addp, int shift);
+ %}
+@@ -874,6 +877,29 @@ bool needs_acquiring_load_reserved(const Node *n)
+   // so we can just return true here
+   return true;
+ }
++
++// predicate controlling translation of StoreCM
++//
++// returns true if a StoreStore must precede the card write otherwise
++// false
++
++bool unnecessary_storestore(const Node *storecm)
++{
++  assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
++
++  // we need to generate a dmb ishst between an object put and the
++  // associated card mark when we are using CMS without conditional
++  // card marking
++
++  if (UseConcMarkSweepGC && !UseCondCardMark) {
++    return false;
++  }
++
++  // a storestore is unnecesary in all other cases
++
++  return true;
++}
++
+ #define __ _masm.
+ 
+ // advance declarations for helper functions to convert register
+@@ -4566,6 +4592,7 @@ instruct loadConD0(fRegD dst, immD0 con) %{
+ instruct storeimmCM0(immI0 zero, memory mem)
+ %{
+   match(Set mem (StoreCM mem zero));
++  predicate(unnecessary_storestore(n));
+ 
+   ins_cost(STORE_COST);
+   format %{ "storestore (elided)\n\t"
+
+From f838cf41b48c6bc17d052531ab5594de236b1302 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 11 Apr 2023 22:06:58 +0800
+Subject: [PATCH 038/140] Revert 8220051: Remove global safepoint code
+
+---
+ src/hotspot/cpu/riscv/interp_masm_riscv.cpp   |  3 +-
+ .../cpu/riscv/macroAssembler_riscv.cpp        | 26 ++++++++++-
+ .../cpu/riscv/macroAssembler_riscv.hpp        |  3 +-
+ src/hotspot/cpu/riscv/riscv.ad                | 43 +++++++++++++++++++
+ src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |  4 +-
+ .../templateInterpreterGenerator_riscv.cpp    |  2 +-
+ 6 files changed, 75 insertions(+), 6 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+index 48957803fdc..74dded77d19 100644
+--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+@@ -515,7 +515,8 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
+ 
+   Label safepoint;
+   address* const safepoint_table = Interpreter::safept_table(state);
+-  bool needs_thread_local_poll = generate_poll && table != safepoint_table;
++  bool needs_thread_local_poll = generate_poll &&
++    SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
+ 
+   if (needs_thread_local_poll) {
+     NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index cf01d7d74bb..73629e3dba3 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -264,6 +264,30 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp,
+   }
+ }
+ 
++// Just like safepoint_poll, but use an acquiring load for thread-
++// local polling.
++//
++// We need an acquire here to ensure that any subsequent load of the
++// global SafepointSynchronize::_state flag is ordered after this load
++// of the local Thread::_polling page.  We don't want this poll to
++// return false (i.e. not safepointing) and a later poll of the global
++// SafepointSynchronize::_state spuriously to return true.
++//
++// This is to avoid a race when we're in a native->Java transition
++// racing the code which wakes up from a safepoint.
++//
++void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    membar(MacroAssembler::AnyAny);
++    ld(t1, Address(xthread, Thread::polling_page_offset()));
++    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++    andi(t0, t1, SafepointMechanism::poll_bit());
++    bnez(t0, slow_path);
++  } else {
++    safepoint_poll(slow_path);
++  }
++}
++
+ void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
+   // we must set sp to zero to clear frame
+   sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
+@@ -2137,7 +2161,7 @@ void MacroAssembler::check_klass_subtype(Register sub_klass,
+   bind(L_failure);
+ }
+ 
+-void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
++void MacroAssembler::safepoint_poll(Label& slow_path) {
+   if (SafepointMechanism::uses_thread_local_poll()) {
+     ld(t1, Address(xthread, Thread::polling_page_offset()));
+     andi(t0, t1, SafepointMechanism::poll_bit());
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index f23f7e7d1e6..8a2c6e07d88 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -44,7 +44,8 @@ class MacroAssembler: public Assembler {
+   }
+   virtual ~MacroAssembler() {}
+ 
+-  void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
++  void safepoint_poll(Label& slow_path);
++  void safepoint_poll_acquire(Label& slow_path);
+ 
+   // Place a fence.i after code may have been modified due to a safepoint.
+   void safepoint_ifence();
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 386ef731696..2dde4453dac 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1213,6 +1213,14 @@ const Pipeline * MachEpilogNode::pipeline() const {
+   return MachNode::pipeline_class();
+ }
+ 
++// This method seems to be obsolete. It is declared in machnode.hpp
++// and defined in all *.ad files, but it is never called. Should we
++// get rid of it?
++int MachEpilogNode::safepoint_offset() const {
++  assert(do_polling(), "no return for this epilog node");
++  return 4;
++}
++
+ //=============================================================================
+ 
+ // Figure out which register class each belongs in: rc_int, rc_float or
+@@ -1907,6 +1915,17 @@ encode %{
+     __ li(dst_reg, 1);
+   %}
+ 
++  enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{
++    MacroAssembler _masm(&cbuf);
++    int32_t offset = 0;
++    address page = (address)$src$$constant;
++    unsigned long align = (unsigned long)page & 0xfff;
++    assert(align == 0, "polling page must be page aligned");
++    Register dst_reg = as_Register($dst$$reg);
++    __ la_patchable(dst_reg, Address(page, relocInfo::poll_type), offset);
++    __ addi(dst_reg, dst_reg, offset);
++  %}
++
+   enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
+     C2_MacroAssembler _masm(&cbuf);
+     __ load_byte_map_base($dst$$Register);
+@@ -2688,6 +2707,17 @@ operand immP_1()
+   interface(CONST_INTER);
+ %}
+ 
++// Polling Page Pointer Immediate
++operand immPollPage()
++%{
++  predicate((address)n->get_ptr() == os::get_polling_page());
++  match(ConP);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
+ // Card Table Byte Map Base
+ operand immByteMapBase()
+ %{
+@@ -4476,6 +4506,19 @@ instruct loadConP1(iRegPNoSp dst, immP_1 con)
+   ins_pipe(ialu_imm);
+ %}
+ 
++// Load Poll Page Constant
++instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
++%{
++  match(Set dst con);
++
++  ins_cost(ALU_COST * 6);
++  format %{ "movptr  $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %}
++
++  ins_encode(riscv_enc_mov_poll_page(dst, con));
++
++  ins_pipe(ialu_imm);
++%}
++
+ // Load Byte Map Base Constant
+ instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
+ %{
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+index f8585afbdc2..c501c8f7bac 100644
+--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -1573,7 +1573,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+     // This is to avoid a race when we're in a native->Java transition
+     // racing the code which wakes up from a safepoint.
+ 
+-    __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */);
++    __ safepoint_poll_acquire(safepoint_in_progress);
+     __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
+     __ bnez(t0, safepoint_in_progress);
+     __ bind(safepoint_in_progress_done);
+@@ -2439,7 +2439,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
+   __ bind(noException);
+ 
+   Label no_adjust, bail;
+-  if (!cause_return) {
++  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
+     // If our stashed return pc was modified by the runtime we avoid touching it
+     __ ld(t0, Address(fp, frame::return_addr_offset * wordSize));
+     __ bne(x18, t0, no_adjust);
+diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+index 76ae6f89e27..2d4baab2ab7 100644
+--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+@@ -1143,7 +1143,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
+     //
+     // This is to avoid a race when we're in a native->Java transition
+     // racing the code which wakes up from a safepoint.
+-    __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */);
++    __ safepoint_poll_acquire(L);
+     __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset()));
+     __ beqz(t1, Continue);
+     __ bind(L);
+
+From 13faeae35312c59a1366d4f9c84da7157f06efc7 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 11 Apr 2023 22:15:14 +0800
+Subject: [PATCH 039/140] Revert 8253180: ZGC: Implementation of JEP 376: ZGC:
+ Concurrent Thread-Stack Processing
+
+---
+ src/hotspot/cpu/riscv/frame_riscv.cpp                    | 8 ++------
+ src/hotspot/cpu/riscv/frame_riscv.hpp                    | 3 ---
+ src/hotspot/cpu/riscv/interp_masm_riscv.cpp              | 1 -
+ src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp            | 8 --------
+ .../cpu/riscv/templateInterpreterGenerator_riscv.cpp     | 9 ---------
+ 5 files changed, 2 insertions(+), 27 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
+index 8e4f20fe561..b056eb2488a 100644
+--- a/src/hotspot/cpu/riscv/frame_riscv.cpp
++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
+@@ -495,8 +495,8 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
+ }
+ 
+ //------------------------------------------------------------------------------
+-// frame::sender_raw
+-frame frame::sender_raw(RegisterMap* map) const {
++// frame::sender
++frame frame::sender(RegisterMap* map) const {
+   // Default is we done have to follow them. The sender_for_xxx will
+   // update it accordingly
+   assert(map != NULL, "map must be set");
+@@ -521,10 +521,6 @@ frame frame::sender_raw(RegisterMap* map) const {
+   return frame(sender_sp(), link(), sender_pc());
+ }
+ 
+-frame frame::sender(RegisterMap* map) const {
+-  return sender_raw(map);
+-}
+-
+ bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
+   assert(is_interpreted_frame(), "Not an interpreted frame");
+   // These are reasonable sanity checks
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
+index c06aaa9e391..3b88f6d5a1a 100644
+--- a/src/hotspot/cpu/riscv/frame_riscv.hpp
++++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
+@@ -196,7 +196,4 @@
+ 
+   static jint interpreter_frame_expression_stack_direction() { return -1; }
+ 
+-  // returns the sending frame, without applying any barriers
+-  frame sender_raw(RegisterMap* map) const;
+-
+ #endif // CPU_RISCV_FRAME_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+index 74dded77d19..4e642af87c4 100644
+--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+@@ -571,7 +571,6 @@ void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
+ 
+ // remove activation
+ //
+-// Apply stack watermark barrier.
+ // Unlock the receiver if this is a synchronized method.
+ // Unlock any Java monitors from syncronized blocks.
+ // Remove the activation from the stack.
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+index c501c8f7bac..d740c99c979 100644
+--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -1565,14 +1565,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+ 
+   // check for safepoint operation in progress and/or pending suspend requests
+   {
+-    // We need an acquire here to ensure that any subsequent load of the
+-    // global SafepointSynchronize::_state flag is ordered after this load
+-    // of the thread-local polling word. We don't want this poll to
+-    // return false (i.e. not safepointing) and a later poll of the global
+-    // SafepointSynchronize::_state spuriously to return true.
+-    // This is to avoid a race when we're in a native->Java transition
+-    // racing the code which wakes up from a safepoint.
+-
+     __ safepoint_poll_acquire(safepoint_in_progress);
+     __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
+     __ bnez(t0, safepoint_in_progress);
+diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+index 2d4baab2ab7..a07dea35b73 100644
+--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+@@ -1134,15 +1134,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
+   // check for safepoint operation in progress and/or pending suspend requests
+   {
+     Label L, Continue;
+-
+-    // We need an acquire here to ensure that any subsequent load of the
+-    // global SafepointSynchronize::_state flag is ordered after this load
+-    // of the thread-local polling word. We don't want this poll to
+-    // return false (i.e. not safepointing) and a later poll of the global
+-    // SafepointSynchronize::_state spuriously to return true.
+-    //
+-    // This is to avoid a race when we're in a native->Java transition
+-    // racing the code which wakes up from a safepoint.
+     __ safepoint_poll_acquire(L);
+     __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset()));
+     __ beqz(t1, Continue);
+
+From 99ca43f1e7e74f161b40466f49fc61aa734d334d Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Wed, 12 Apr 2023 12:35:33 +0800
+Subject: [PATCH 040/140] JDK-8243155: AArch64: Add support for SqrtVF
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 2dde4453dac..9da8a76c190 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -7206,7 +7206,7 @@ instruct absD_reg(fRegD dst, fRegD src) %{
+ %}
+ 
+ instruct sqrtF_reg(fRegF dst, fRegF src) %{
+-  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
++  match(Set dst (SqrtF src));
+ 
+   ins_cost(FSQRT_COST);
+   format %{ "fsqrt.s  $dst, $src\t#@sqrtF_reg" %}
+
+From 4bbd814dfbc33d3f1277dbb64f19a18f9f8c1a81 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Wed, 12 Apr 2023 15:11:49 +0800
+Subject: [PATCH 041/140] Revert JDK-8267098: AArch64: C1 StubFrames end
+ confusingly
+
+---
+ src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 52 ++++++++++-----------
+ 1 file changed, 24 insertions(+), 28 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
+index f523c9ed50a..1f58bde4df5 100644
+--- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
+@@ -167,19 +167,14 @@ int StubAssembler::call_RT(Register oop_result, Register metadata_result, addres
+   return call_RT(oop_result, metadata_result, entry, arg_num);
+ }
+ 
+-enum return_state_t {
+-  does_not_return, requires_return
+-};
+-
+ // Implementation of StubFrame
+ 
+ class StubFrame: public StackObj {
+  private:
+   StubAssembler* _sasm;
+-  bool _return_state;
+ 
+  public:
+-  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return);
++  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
+   void load_argument(int offset_in_words, Register reg);
+ 
+   ~StubFrame();
+@@ -197,9 +192,8 @@ void StubAssembler::epilogue() {
+ 
+ #define __ _sasm->
+ 
+-StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state) {
++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
+   _sasm = sasm;
+-  _return_state = return_state;
+   __ prologue(name, must_gc_arguments);
+ }
+ 
+@@ -211,11 +205,7 @@ void StubFrame::load_argument(int offset_in_words, Register reg) {
+ 
+ 
+ StubFrame::~StubFrame() {
+-  if (_return_state == requires_return) {
+-    __ epilogue();
+-  } else {
+-    __ should_not_reach_here();
+-  }
++  __ epilogue();
+   _sasm = NULL;
+ }
+ 
+@@ -378,6 +368,7 @@ OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address targe
+   assert_cond(oop_maps != NULL);
+   oop_maps->add_gc_map(call_offset, oop_map);
+ 
++  __ should_not_reach_here();
+   return oop_maps;
+ }
+ 
+@@ -425,7 +416,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
+       sasm->set_frame_size(frame_size);
+       break;
+     }
+-    default: ShouldNotReachHere();
++    default:
++      __ should_not_reach_here();
++      break;
+   }
+ 
+   // verify that only x10 and x13 are valid at this time
+@@ -481,6 +474,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
+       restore_live_registers(sasm, id != handle_exception_nofpu_id);
+       break;
+     case handle_exception_from_callee_id:
++      // Pop the return address.
++      __ leave();
++      __ ret();  // jump to exception handler
+       break;
+     default: ShouldNotReachHere();
+   }
+@@ -641,13 +637,13 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+ 
+     case throw_div0_exception_id:
+       {
+-        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
+         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
+       }
+       break;
+ 
+     case throw_null_pointer_exception_id:
+-      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return);
++      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
+         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
+       }
+       break;
+@@ -926,14 +922,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+ 
+     case throw_class_cast_exception_id:
+       {
+-        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
+         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
+       }
+       break;
+ 
+     case throw_incompatible_class_change_error_id:
+       {
+-        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
+         oop_maps = generate_exception_throw(sasm,
+                                             CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
+       }
+@@ -1027,7 +1023,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+ 
+     case deoptimize_id:
+       {
+-        StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "deoptimize", dont_gc_arguments);
+         OopMap* oop_map = save_live_registers(sasm);
+         assert_cond(oop_map != NULL);
+         f.load_argument(0, c_rarg1);
+@@ -1046,7 +1042,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+ 
+     case throw_range_check_failed_id:
+       {
+-        StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
+         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
+       }
+       break;
+@@ -1062,7 +1058,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+ 
+     case access_field_patching_id:
+       {
+-        StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
+         // we should set up register map
+         oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
+       }
+@@ -1070,7 +1066,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+ 
+     case load_klass_patching_id:
+       {
+-        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
+         // we should set up register map
+         oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
+       }
+@@ -1078,7 +1074,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+ 
+     case load_mirror_patching_id:
+       {
+-        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments);
+         // we should set up register map
+         oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
+       }
+@@ -1086,7 +1082,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+ 
+     case load_appendix_patching_id:
+       {
+-        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
+         // we should set up register map
+         oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
+       }
+@@ -1109,14 +1105,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+ 
+     case throw_index_exception_id:
+       {
+-        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
+         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
+       }
+       break;
+ 
+     case throw_array_store_exception_id:
+       {
+-        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
+         // tos + 0: link
+         //     + 1: return address
+         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
+@@ -1125,7 +1121,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+ 
+     case predicate_failed_trap_id:
+       {
+-        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments);
+ 
+         OopMap* map = save_live_registers(sasm);
+         assert_cond(map != NULL);
+@@ -1156,7 +1152,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+ 
+     default:
+       {
+-        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
+         __ li(x10, (int) id);
+         __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10);
+         __ should_not_reach_here();
+
+From eb37cfd42e7801c5ce64666c3cd25d40cfb22e76 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Wed, 12 Apr 2023 18:06:40 +0800
+Subject: [PATCH 042/140] Revert JDK-8247691: [aarch64] Incorrect handling of
+ VM exceptions in C1 deopt stub/traps
+
+---
+ src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 87 +++++++++++++++------
+ 1 file changed, 65 insertions(+), 22 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
+index 1f58bde4df5..1f45fba9de0 100644
+--- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
+@@ -581,37 +581,80 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
+ #endif
+   __ reset_last_Java_frame(true);
+ 
+-#ifdef ASSERT
+-  // Check that fields in JavaThread for exception oop and issuing pc are empty
+-  Label oop_empty;
+-  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+-  __ beqz(t0, oop_empty);
+-  __ stop("exception oop must be empty");
+-  __ bind(oop_empty);
++  // check for pending exceptions
++  { Label L;
++    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++    __ beqz(t0, L);
++    // exception pending => remove activation and forward to exception handler
+ 
+-  Label pc_empty;
+-  __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
+-  __ beqz(t0, pc_empty);
+-  __ stop("exception pc must be empty");
+-  __ bind(pc_empty);
++    { Label L1;
++      __ bnez(x10, L1);                                 // have we deoptimized?
++      __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
++      __ bind(L1);
++    }
++
++    // the deopt blob expects exceptions in the special fields of
++    // JavaThread, so copy and clear pending exception.
++
++    // load and clear pending exception
++    __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
++    __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
++
++    // check that there is really a valid exception
++    __ verify_not_null_oop(x10);
++
++    // load throwing pc: this is the return address of the stub
++    __ ld(x13, Address(fp, wordSize));
++
++#ifdef ASSERT
++    // Check that fields in JavaThread for exception oop and issuing pc are empty
++    Label oop_empty;
++    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++    __ beqz(t0, oop_empty);
++    __ stop("exception oop must be empty");
++    __ bind(oop_empty);
++
++    Label pc_empty;
++    __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
++    __ beqz(t0, pc_empty);
++    __ stop("exception pc must be empty");
++    __ bind(pc_empty);
+ #endif
+ 
+-  // Runtime will return true if the nmethod has been deoptimized, this is the
+-  // expected scenario and anything else is an error. Note that we maintain a
+-  // check on the result purely as a defensive measure.
+-  Label no_deopt;
+-  __ beqz(x10, no_deopt);                                // Have we deoptimized?
++    // store exception oop and throwing pc to JavaThread
++    __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
++    __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
++
++    restore_live_registers(sasm);
+ 
+-  // Perform a re-execute. The proper return address is already on the stack,
+-  // we just need to restore registers, pop all of our frames but the return
+-  // address and jump to the deopt blob.
++    __ leave();
++
++    // Forward the exception directly to deopt blob. We can blow no
++    // registers and must leave throwing pc on the stack.  A patch may
++    // have values live in registers so the entry point with the
++    // exception in tls.
++    __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
++
++    __ bind(L);
++  }
++
++  // Runtime will return true if the nmethod has been deoptimized during
++  // the patching process. In that case we must do a deopt reexecute instead.
++  Label cont;
++
++  __ beqz(x10, cont);                                 // have we deoptimized?
++
++  // Will reexecute. Proper return address is already on the stack we just restore
++  // registers, pop all of our frame but the return address and jump to the deopt blob
+ 
+   restore_live_registers(sasm);
+   __ leave();
+   __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
+ 
+-  __ bind(no_deopt);
+-  __ stop("deopt not performed");
++  __ bind(cont);
++  restore_live_registers(sasm);
++  __ leave();
++  __ ret();
+ 
+   return oop_maps;
+ }
+
+From 3fa279b459fffd1bd1ce158a7fdaa9d8704450a8 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 13 Apr 2023 18:29:27 +0800
+Subject: [PATCH 043/140] Revert JDK-8212681: Refactor IC locking to use a fine
+ grained CompiledICLocker
+
+---
+ src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 2 +-
+ src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 3 +--
+ 2 files changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+index 75bc4be7840..4d1687301fc 100644
+--- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+@@ -113,10 +113,10 @@ void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, ad
+ }
+ 
+ void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
+   // Reset stub.
+   address stub = static_stub->addr();
+   assert(stub != NULL, "stub not found");
+-  assert(CompiledICLocker::is_safe(stub), "mt unsafe call");
+   // Creation also verifies the object.
+   NativeMovConstReg* method_holder
+     = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
+diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
+index 0a05c577860..459683735e9 100644
+--- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
+@@ -146,8 +146,7 @@ address NativeCall::destination() const {
+ // during code generation, where no patching lock is needed.
+ void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
+   assert(!assert_lock ||
+-         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) ||
+-         CompiledICLocker::is_safe(addr_at(0)),
++         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
+          "concurrent code patching");
+ 
+   ResourceMark rm;
+
+From 727f1a8f9b4a6dfbb0cf2002f12b86b5d5f23362 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 13 Apr 2023 18:36:11 +0800
+Subject: [PATCH 044/140] Revert JDK-8225681:
+ vmTestbase/nsk/jvmti/RedefineClasses/StressRedefine fails due a) MT-unsafe
+ modification of inline cache
+
+---
+ src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+index 4d1687301fc..0b13e44c8d6 100644
+--- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+@@ -99,10 +99,15 @@ void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, ad
+   // Creation also verifies the object.
+   NativeMovConstReg* method_holder
+     = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
+-#ifdef ASSERT
++#ifndef PRODUCT
+   NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
+ 
+-  verify_mt_safe(callee, entry, method_holder, jump);
++  // read the value once
++  volatile intptr_t data = method_holder->data();
++  assert(data == 0 || data == (intptr_t)callee(),
++         "a) MT-unsafe modification of inline cache");
++  assert(data == 0 || jump->jump_destination() == entry,
++         "b) MT-unsafe modification of inline cache");
+ #endif
+   // Update stub.
+   method_holder->set_data((intptr_t)callee());
+
+From 26e37551ecc41db0cf8eeb775a5501b4f45b4ffa Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 13 Apr 2023 18:39:52 +0800
+Subject: [PATCH 045/140] Revert JDK-8232046: AArch64 build failure after
+ JDK-8225681
+
+---
+ src/hotspot/cpu/riscv/compiledIC_riscv.cpp |  2 --
+ src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 19 ++++---------------
+ 2 files changed, 4 insertions(+), 17 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+index 0b13e44c8d6..1cfc92b28fa 100644
+--- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+@@ -126,8 +126,6 @@ void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_
+   NativeMovConstReg* method_holder
+     = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
+   method_holder->set_data(0);
+-  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
+-  jump->set_jump_destination((address)-1);
+ }
+ 
+ //-----------------------------------------------------------------------------
+diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
+index 459683735e9..bfe84fa4e30 100644
+--- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
+@@ -272,15 +272,9 @@ address NativeJump::jump_destination() const {
+ 
+   // We use jump to self as the unresolved address which the inline
+   // cache code (and relocs) know about
+-  // As a special case we also use sequence movptr_with_offset(r,0), jalr(r,0)
+-  // i.e. jump to 0 when we need leave space for a wide immediate
+-  // load
+-
+-  // return -1 if jump to self or to 0
+-  if ((dest == (address) this) || dest == 0) {
+-    dest = (address) -1;
+-  }
+ 
++  // return -1 if jump to self
++  dest = (dest == (address) this) ? (address) -1 : dest;
+   return dest;
+ };
+ 
+@@ -302,14 +296,9 @@ address NativeGeneralJump::jump_destination() const {
+ 
+   // We use jump to self as the unresolved address which the inline
+   // cache code (and relocs) know about
+-  // As a special case we also use jump to 0 when first generating
+-  // a general jump
+-
+-  // return -1 if jump to self or to 0
+-  if ((dest == (address) this) || dest == 0) {
+-    dest = (address) -1;
+-  }
+ 
++  // return -1 if jump to self
++  dest = (dest == (address) this) ? (address) -1 : dest;
+   return dest;
+ }
+ 
+
+From 4fc68bc3cd13e623276965947d6c8cb14da15873 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 13 Apr 2023 18:47:08 +0800
+Subject: [PATCH 046/140] Revert JDK-8213084: Rework and enhance
+ Print[Opto]Assembly output
+
+---
+ src/hotspot/cpu/riscv/assembler_riscv.hpp    |  8 --------
+ src/hotspot/cpu/riscv/disassembler_riscv.hpp | 20 --------------------
+ 2 files changed, 28 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
+index 44e8d4b4ff1..b4e7287ce08 100644
+--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
+@@ -268,14 +268,6 @@ class Assembler : public AbstractAssembler {
+ 
+   enum { instruction_size = 4 };
+ 
+-  //---<  calculate length of instruction  >---
+-  // We just use the values set above.
+-  // instruction must start at passed address
+-  static unsigned int instr_len(unsigned char *instr) { return instruction_size; }
+-
+-  //---<  longest instructions  >---
+-  static unsigned int instr_maxlen() { return instruction_size; }
+-
+   enum RoundingMode {
+     rne = 0b000,     // round to Nearest, ties to Even
+     rtz = 0b001,     // round towards Zero
+diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
+index b0e5560c906..06bca5298cd 100644
+--- a/src/hotspot/cpu/riscv/disassembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
+@@ -35,24 +35,4 @@ static const char* pd_cpu_opts() {
+   return "";
+ }
+ 
+-// Returns address of n-th instruction preceding addr,
+-// NULL if no preceding instruction can be found.
+-// On riscv, we assume a constant instruction length.
+-// It might be beneficial to check "is_readable" as we do on ppc and s390.
+-static address find_prev_instr(address addr, int n_instr) {
+-  return addr - Assembler::instruction_size * n_instr;
+-}
+-
+-// special-case instruction decoding.
+-// There may be cases where the binutils disassembler doesn't do
+-// the perfect job. In those cases, decode_instruction0 may kick in
+-// and do it right.
+-// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)"
+-static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) {
+-  return here;
+-}
+-
+-// platform-specific instruction annotations (like value of loaded constants)
+-static void annotate(address pc, outputStream* st) {}
+-
+ #endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP
+
+From f660c594eccb174c9779ebdc9ba40fe579aa50cc Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 13 Apr 2023 19:44:28 +0800
+Subject: [PATCH 047/140] Revert JDK-8241909: Remove useless code cache lookup
+ in frame::patch_pc
+
+---
+ src/hotspot/cpu/riscv/frame_riscv.cpp | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
+index b056eb2488a..d03adc0bff4 100644
+--- a/src/hotspot/cpu/riscv/frame_riscv.cpp
++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
+@@ -270,7 +270,6 @@ bool frame::safe_for_sender(JavaThread *thread) {
+ }
+ 
+ void frame::patch_pc(Thread* thread, address pc) {
+-  assert(_cb == CodeCache::find_blob(pc), "unexpected pc");
+   address* pc_addr = &(((address*) sp())[-1]);
+   if (TracePcPatching) {
+     tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
+@@ -280,6 +279,7 @@ void frame::patch_pc(Thread* thread, address pc) {
+   // patch in the same address that's already there.
+   assert(_pc == *pc_addr || pc == *pc_addr, "must be");
+   *pc_addr = pc;
++  _cb = CodeCache::find_blob(pc);
+   address original_pc = CompiledMethod::get_deopt_original_pc(this);
+   if (original_pc != NULL) {
+     assert(original_pc == _pc, "expected original PC to be stored before patching");
+
+From 0d1ed436d9b70c9244c5de42fb492bbfa5e785e8 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 16 Apr 2023 21:10:06 +0800
+Subject: [PATCH 048/140] Revert JDK-8277411: C2 fast_unlock intrinsic on
+ AArch64 has unnecessary ownership check & JDK-8277180: Intrinsify recursive
+ ObjectMonitor locking for C2 x64 and A64
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 24 ++++--------------------
+ 1 file changed, 4 insertions(+), 20 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 9da8a76c190..c0fbda4f3f9 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -2204,16 +2204,6 @@ encode %{
+     __ mv(tmp, (address)markOopDesc::unused_mark());
+     __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+ 
+-    __ beqz(flag, cont); // CAS success means locking succeeded
+-
+-    __ bne(flag, xthread, cont); // Check for recursive locking
+-
+-    // Recursive lock case
+-    __ mv(flag, zr);
+-    __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value));
+-    __ add(tmp, tmp, 1u);
+-    __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value));
+-
+     __ bind(cont);
+   %}
+ 
+@@ -2257,18 +2247,12 @@ encode %{
+     __ bind(object_has_monitor);
+     STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
+     __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
++    __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
+     __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
++    __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
++    __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
++    __ bnez(flag, cont);
+ 
+-    Label notRecursive;
+-    __ beqz(disp_hdr, notRecursive); // Will be 0 if not recursive.
+-
+-    // Recursive lock
+-    __ addi(disp_hdr, disp_hdr, -1);
+-    __ sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
+-    __ mv(flag, zr);
+-    __ j(cont);
+-
+-    __ bind(notRecursive);
+     __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
+     __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
+     __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
+
+From cac7117dfc03023a81030e274944921df07bbead Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 16 Apr 2023 21:13:21 +0800
+Subject: [PATCH 049/140] Revert JDK-8210381: Obsolete EmitSync
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 100 ++++++++++++++++++++-------------
+ 1 file changed, 60 insertions(+), 40 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index c0fbda4f3f9..c3ef648b21d 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -2150,9 +2150,17 @@ encode %{
+     // Load markWord from object into displaced_header.
+     __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
+ 
++    // Always do locking in runtime.
++    if (EmitSync & 0x01) {
++      __ mv(flag, 1);
++      return;
++    }
++
+     // Check for existing monitor
+-    __ andi(t0, disp_hdr, markOopDesc::monitor_value);
+-    __ bnez(t0, object_has_monitor);
++    if ((EmitSync & 0x02) == 0) {
++      __ andi(t0, disp_hdr, markOopDesc::monitor_value);
++      __ bnez(t0, object_has_monitor);
++    }
+ 
+     // Set tmp to be (markWord of object | UNLOCK_VALUE).
+     __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
+@@ -2185,24 +2193,26 @@ encode %{
+     __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+     __ mv(flag, tmp); // we can use the value of tmp as the result here
+ 
+-    __ j(cont);
+-
+-    // Handle existing monitor.
+-    __ bind(object_has_monitor);
+-    // The object's monitor m is unlocked iff m->owner == NULL,
+-    // otherwise m->owner may contain a thread or a stack address.
+-    //
+-    // Try to CAS m->owner from NULL to current thread.
+-    __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
+-    __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
+-             Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
+-
+-    // Store a non-null value into the box to avoid looking like a re-entrant
+-    // lock. The fast-path monitor unlock code checks for
+-    // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
+-    // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
+-    __ mv(tmp, (address)markOopDesc::unused_mark());
+-    __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++    if ((EmitSync & 0x02) == 0) {
++      __ j(cont);
++
++      // Handle existing monitor.
++      __ bind(object_has_monitor);
++      // The object's monitor m is unlocked iff m->owner == NULL,
++      // otherwise m->owner may contain a thread or a stack address.
++      //
++      // Try to CAS m->owner from NULL to current thread.
++      __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
++      __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
++                 Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
++
++      // Store a non-null value into the box to avoid looking like a re-entrant
++      // lock. The fast-path monitor unlock code checks for
++      // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
++      // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
++      __ mv(tmp, (address)markOopDesc::unused_mark());
++      __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++    }
+ 
+     __ bind(cont);
+   %}
+@@ -2220,6 +2230,12 @@ encode %{
+ 
+     assert_different_registers(oop, box, tmp, disp_hdr, flag);
+ 
++    // Always do locking in runtime.
++    if (EmitSync & 0x01) {
++      __ mv(flag, 1);
++      return;
++    }
++
+     // Find the lock address and load the displaced header from the stack.
+     __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+ 
+@@ -2228,9 +2244,11 @@ encode %{
+     __ beqz(disp_hdr, cont);
+ 
+     // Handle existing monitor.
+-    __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
+-    __ andi(t0, disp_hdr, markOopDesc::monitor_value);
+-    __ bnez(t0, object_has_monitor);
++    if ((EmitSync & 0x02) == 0) {
++      __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
++      __ andi(t0, disp_hdr, markOopDesc::monitor_value);
++      __ bnez(t0, object_has_monitor);
++    }
+ 
+     // Check if it is still a light weight lock, this is true if we
+     // see the stack address of the basicLock in the markWord of the
+@@ -2244,23 +2262,25 @@ encode %{
+     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+ 
+     // Handle existing monitor.
+-    __ bind(object_has_monitor);
+-    STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
+-    __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
+-    __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
+-    __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
+-    __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
+-    __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
+-    __ bnez(flag, cont);
+-
+-    __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
+-    __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
+-    __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
+-    __ bnez(flag, cont);
+-    // need a release store here
+-    __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
+-    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+-    __ sd(zr, Address(tmp)); // set unowned
++    if ((EmitSync & 0x02) == 0) {
++      __ bind(object_has_monitor);
++      STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
++      __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
++      __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
++      __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
++      __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
++      __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
++      __ bnez(flag, cont);
++
++      __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
++      __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
++      __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
++      __ bnez(flag, cont);
++      // need a release store here
++      __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
++      __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++      __ sd(zr, Address(tmp)); // set unowned
++    }
+ 
+     __ bind(cont);
+   %}
+
+From ca7ab86ee886233651e1a79faff631fd7e226d57 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 16 Apr 2023 22:07:21 +0800
+Subject: [PATCH 050/140] Revert JDK-8256425: Obsolete Biased Locking in JDK 18
+
+---
+ src/hotspot/cpu/riscv/assembler_riscv.hpp     |   2 +
+ .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |   6 +-
+ .../cpu/riscv/c1_LIRGenerator_riscv.cpp       |   7 +-
+ .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |  35 ++-
+ .../cpu/riscv/c1_MacroAssembler_riscv.hpp     |   3 +-
+ src/hotspot/cpu/riscv/interp_masm_riscv.cpp   |  27 ++-
+ .../cpu/riscv/macroAssembler_riscv.cpp        | 217 ++++++++++++++++++
+ .../cpu/riscv/macroAssembler_riscv.hpp        |  28 +++
+ src/hotspot/cpu/riscv/riscv.ad                |  12 +
+ src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |   8 +
+ src/hotspot/cpu/riscv/templateTable_riscv.cpp |   8 +-
+ 11 files changed, 341 insertions(+), 12 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
+index b4e7287ce08..51aa052a0c7 100644
+--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
+@@ -3043,4 +3043,6 @@ enum Nf {
+   virtual ~Assembler() {}
+ };
+ 
++class BiasedLockingCounters;
++
+ #endif // CPU_RISCV_ASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+index 46a20a64194..6a961ee2307 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+@@ -1511,9 +1511,13 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
+   if (!UseFastLocking) {
+     __ j(*op->stub()->entry());
+   } else if (op->code() == lir_lock) {
++    Register scratch = noreg;
++    if (UseBiasedLocking) {
++      scratch = op->scratch_opr()->as_register();
++    }
+     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+     // add debug info for NullPointerException only if one is possible
+-    int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry());
++    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
+     if (op->info() != NULL) {
+       add_debug_info_for_null_check(null_check_offset, op->info());
+     }
+diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+index e126f148cdf..c45a75b2301 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+@@ -277,6 +277,11 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
+ 
+   // "lock" stores the address of the monitor stack slot, so this is not an oop
+   LIR_Opr lock = new_register(T_INT);
++  // Need a scratch register for biased locking
++  LIR_Opr scratch = LIR_OprFact::illegalOpr;
++  if (UseBiasedLocking) {
++    scratch = new_register(T_INT);
++  }
+ 
+   CodeEmitInfo* info_for_exception = NULL;
+   if (x->needs_null_check()) {
+@@ -285,7 +290,7 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
+   // this CodeEmitInfo must not have the xhandlers because here the
+   // object is already locked (xhandlers expect object to be unlocked)
+   CodeEmitInfo* info = state_for(x, x->state(), true);
+-  monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr,
++  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
+                 x->monitor_no(), info_for_exception, info);
+ }
+ 
+diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+index 2d52343587e..e486f41948e 100644
+--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+@@ -35,6 +35,7 @@
+ #include "oops/arrayOop.hpp"
+ #include "oops/markWord.hpp"
+ #include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
+ #include "runtime/os.hpp"
+ #include "runtime/sharedRuntime.hpp"
+ #include "runtime/stubRoutines.hpp"
+@@ -50,7 +51,7 @@ void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result,
+   }
+ }
+ 
+-int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
+   const int aligned_mask = BytesPerWord - 1;
+   const int hdr_offset = oopDesc::mark_offset_in_bytes();
+   assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
+@@ -62,7 +63,12 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
+   // save object being locked into the BasicObjectLock
+   sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+ 
+-  null_check_offset = offset();
++  if (UseBiasedLocking) {
++    assert(scratch != noreg, "should have scratch register at this point");
++    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
++  } else {
++    null_check_offset = offset();
++  }
+ 
+   // Load object header
+   ld(hdr, Address(obj, hdr_offset));
+@@ -98,6 +104,10 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
+   // otherwise we don't care about the result and handle locking via runtime call
+   bnez(hdr, slow_case, /* is_far */ true);
+   bind(done);
++  if (PrintBiasedLockingStatistics) {
++    la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
++    add_memory_int32(Address(t1, 0), 1);
++  }
+   return null_check_offset;
+ }
+ 
+@@ -107,13 +117,21 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_
+   assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
+   Label done;
+ 
++  if (UseBiasedLocking) {
++    // load object
++    ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++    biased_locking_exit(obj, hdr, done);
++  }
++
+   // load displaced header
+   ld(hdr, Address(disp_hdr, 0));
+   // if the loaded hdr is NULL we had recursive locking
+   // if we had recursive locking, we are done
+   beqz(hdr, done);
+-  // load object
+-  ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++  if (!UseBiasedLocking) {
++    // load object
++    ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++  }
+   verify_oop(obj);
+   // test if object header is pointing to the displaced header, and if so, restore
+   // the displaced header in the object - if the object header is not pointing to
+@@ -140,8 +158,13 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i
+ 
+ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) {
+   assert_different_registers(obj, klass, len);
+-  // This assumes that all prototype bits fitr in an int32_t
+-  mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype());
++  if (UseBiasedLocking && !len->is_valid()) {
++    assert_different_registers(obj, klass, len, tmp1, tmp2);
++    ld(tmp1, Address(klass, Klass::prototype_header_offset()));
++  } else {
++    // This assumes that all prototype bits fitr in an int32_t
++    mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype());
++  }
+   sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes()));
+ 
+   if (UseCompressedClassPointers) { // Take care not to kill klass
+diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
+index dfd3c17d7c7..1950cee5dd5 100644
+--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
+@@ -59,8 +59,9 @@ using MacroAssembler::null_check;
+   // hdr     : must be x10, contents destroyed
+   // obj     : must point to the object to lock, contents preserved
+   // disp_hdr: must point to the displaced header location, contents preserved
++  // scratch : scratch register, contents destroyed
+   // returns code offset at which to add null check debug information
+-  int lock_object  (Register swap, Register obj, Register disp_hdr, Label& slow_case);
++  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
+ 
+   // unlocking
+   // hdr     : contents destroyed
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+index 4e642af87c4..f0c249f0d26 100644
+--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+@@ -39,6 +39,7 @@
+ #include "prims/jvmtiExport.hpp"
+ #include "prims/jvmtiThreadState.hpp"
+ #include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
+ #include "runtime/frame.inline.hpp"
+ #include "runtime/safepointMechanism.hpp"
+ #include "runtime/sharedRuntime.hpp"
+@@ -782,6 +783,10 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
+     // Load object pointer into obj_reg c_rarg3
+     ld(obj_reg, Address(lock_reg, obj_offset));
+ 
++    if (UseBiasedLocking) {
++      biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
++    }
++
+     // Load (object->mark() | 1) into swap_reg
+     ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+     ori(swap_reg, t0, 1);
+@@ -792,7 +797,17 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
+     assert(lock_offset == 0,
+            "displached header must be first word in BasicObjectLock");
+ 
+-    cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL);
++    if (PrintBiasedLockingStatistics) {
++      Label fail, fast;
++      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail);
++      bind(fast);
++      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
++                  t1, t0);
++      j(done);
++      bind(fail);
++    } else {
++      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL);
++    }
+ 
+     // Test if the oopMark is an obvious stack pointer, i.e.,
+     //  1) (mark & 7) == 0, and
+@@ -809,6 +824,12 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
+ 
+     // Save the test result, for recursive case, the result is zero
+     sd(swap_reg, Address(lock_reg, mark_offset));
++
++    if (PrintBiasedLockingStatistics) {
++      bnez(swap_reg, slow_case);
++      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
++                  t1, t0);
++    }
+     beqz(swap_reg, done);
+ 
+     bind(slow_case);
+@@ -861,6 +882,10 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
+     // Free entry
+     sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
+ 
++    if (UseBiasedLocking) {
++      biased_locking_exit(obj_reg, header_reg, done);
++    }
++
+     // Load the old header from BasicLock structure
+     ld(header_reg, Address(swap_reg,
+                            BasicLock::displaced_header_offset_in_bytes()));
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index 73629e3dba3..e557a134b5b 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -41,6 +41,7 @@
+ #include "oops/compressedOops.inline.hpp"
+ #include "oops/klass.inline.hpp"
+ #include "oops/oop.hpp"
++#include "runtime/biasedLocking.hpp"
+ #include "runtime/interfaceSupport.inline.hpp"
+ #include "runtime/jniHandles.inline.hpp"
+ #include "runtime/sharedRuntime.hpp"
+@@ -2791,6 +2792,222 @@ void MacroAssembler::reserved_stack_check() {
+     bind(no_reserved_zone_enabling);
+ }
+ 
++void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) {
++  Label retry_load;
++  bind(retry_load);
++  // flush and load exclusive from the memory location
++  lr_w(tmp, counter_addr);
++  addw(tmp, tmp, 1);
++  // if we store+flush with no intervening write tmp wil be zero
++  sc_w(tmp, tmp, counter_addr);
++  bnez(tmp, retry_load);
++}
++
++void MacroAssembler::load_prototype_header(Register dst, Register src) {
++  load_klass(dst, src);
++  ld(dst, Address(dst, Klass::prototype_header_offset()));
++}
++
++int MacroAssembler::biased_locking_enter(Register lock_reg,
++                                         Register obj_reg,
++                                         Register swap_reg,
++                                         Register tmp_reg,
++                                         bool swap_reg_contains_mark,
++                                         Label& done,
++                                         Label* slow_case,
++                                         BiasedLockingCounters* counters,
++                                         Register flag) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++  assert_different_registers(lock_reg, obj_reg, swap_reg);
++
++  if (PrintBiasedLockingStatistics && counters == NULL)
++    counters = BiasedLocking::counters();
++
++  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0);
++  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
++  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
++
++  // Biased locking
++  // See whether the lock is currently biased toward our thread and
++  // whether the epoch is still valid
++  // Note that the runtime guarantees sufficient alignment of JavaThread
++  // pointers to allow age to be placed into low bits
++  // First check to see whether biasing is even enabled for this object
++  Label cas_label;
++  int null_check_offset = -1;
++  if (!swap_reg_contains_mark) {
++    null_check_offset = offset();
++    ld(swap_reg, mark_addr);
++  }
++  andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place);
++  li(t0, markOopDesc::biased_lock_pattern);
++  bne(t0, tmp_reg, cas_label);
++  // The bias pattern is present in the object's header. Need to check
++  // whether the bias owner and the epoch are both still current.
++  load_prototype_header(tmp_reg, obj_reg);
++  orr(tmp_reg, tmp_reg, xthread);
++  xorr(tmp_reg, swap_reg, tmp_reg);
++  andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place));
++  if (flag->is_valid()) {
++    mv(flag, tmp_reg);
++  }
++  if (counters != NULL) {
++    Label around;
++    bnez(tmp_reg, around);
++    atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0);
++    j(done);
++    bind(around);
++  } else {
++    beqz(tmp_reg, done);
++  }
++
++  Label try_revoke_bias;
++  Label try_rebias;
++
++  // At this point we know that the header has the bias pattern and
++  // that we are not the bias owner in the current epoch. We need to
++  // figure out more details about the state of the header in order to
++  // know what operations can be legally performed on the object's
++  // header.
++
++  // If the low three bits in the xor result aren't clear, that means
++  // the prototype header is no longer biased and we have to revoke
++  // the bias on this object.
++  andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place);
++  bnez(t0, try_revoke_bias);
++
++  // Biasing is still enabled for this data type. See whether the
++  // epoch of the current bias is still valid, meaning that the epoch
++  // bits of the mark word are equal to the epoch bits of the
++  // prototype header. (Note that the prototype header's epoch bits
++  // only change at a safepoint.) If not, attempt to rebias the object
++  // toward the current thread. Note that we must be absolutely sure
++  // that the current epoch is invalid in order to do this because
++  // otherwise the manipulations it performs on the mark word are
++  // illegal.
++  andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place);
++  bnez(t0, try_rebias);
++
++  // The epoch of the current bias is still valid but we know nothing
++  // about the owner; it might be set or it might be clear. Try to
++  // acquire the bias of the object using an atomic operation. If this
++  // fails we will go in to the runtime to revoke the object's bias.
++  // Note that we first construct the presumed unbiased header so we
++  // don't accidentally blow away another thread's valid bias.
++  {
++    Label cas_success;
++    Label counter;
++    mv(t0, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
++    andr(swap_reg, swap_reg, t0);
++    orr(tmp_reg, swap_reg, xthread);
++    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
++    // cas failed here if slow_cass == NULL
++    if (flag->is_valid()) {
++      mv(flag, 1);
++      j(counter);
++    }
++    // If the biasing toward our thread failed, this means that
++    // another thread succeeded in biasing it toward itself and we
++    // need to revoke that bias. The revocation will occur in the
++    // interpreter runtime in the slow case.
++    bind(cas_success);
++    if (flag->is_valid()) {
++      mv(flag, 0);
++      bind(counter);
++    }
++    if (counters != NULL) {
++      atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()),
++                  tmp_reg, t0);
++    }
++  }
++  j(done);
++
++  bind(try_rebias);
++  // At this point we know the epoch has expired, meaning that the
++  // current "bias owner", if any, is actually invalid. Under these
++  // circumstances _only_, we are allowed to use the current header's
++  // value as the comparison value when doing the cas to acquire the
++  // bias in the current epoch. In other words, we allow transfer of
++  // the bias from one thread to another directly in this situation.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  {
++    Label cas_success;
++    Label counter;
++    load_prototype_header(tmp_reg, obj_reg);
++    orr(tmp_reg, xthread, tmp_reg);
++    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
++    // cas failed here if slow_cass == NULL
++    if (flag->is_valid()) {
++      mv(flag, 1);
++      j(counter);
++    }
++
++    // If the biasing toward our thread failed, then another thread
++    // succeeded in biasing it toward itself and we need to revoke that
++    // bias. The revocation will occur in the runtime in the slow case.
++    bind(cas_success);
++    if (flag->is_valid()) {
++      mv(flag, 0);
++      bind(counter);
++    }
++    if (counters != NULL) {
++      atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()),
++                  tmp_reg, t0);
++    }
++  }
++  j(done);
++
++  bind(try_revoke_bias);
++  // The prototype mark in the klass doesn't have the bias bit set any
++  // more, indicating that objects of this data type are not supposed
++  // to be biased any more. We are going to try to reset the mark of
++  // this object to the prototype value and fall through to the
++  // CAS-based locking scheme. Note that if our CAS fails, it means
++  // that another thread raced us for the privilege of revoking the
++  // bias of this particular object, so it's okay to continue in the
++  // normal locking code.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  {
++    Label cas_success, nope;
++    load_prototype_header(tmp_reg, obj_reg);
++    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope);
++    bind(cas_success);
++
++    // Fall through to the normal CAS-based lock, because no matter what
++    // the result of the above CAS, some thread must have succeeded in
++    // removing the bias bit from the object's header.
++    if (counters != NULL) {
++      atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg,
++                  t0);
++    }
++    bind(nope);
++  }
++
++  bind(cas_label);
++
++  return null_check_offset;
++}
++
++void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++
++  // Check for biased locking unlock case, which is a no-op
++  // Note: we do not have to check the thread ID for two reasons.
++  // First, the interpreter checks for IllegalMonitorStateException at
++  // a higher level. Second, if the bias was revoked while we held the
++  // lock, the object could not be rebiased toward another thread, so
++  // the bias bit would be clear.
++  ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
++  sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern);
++  if (flag->is_valid()) { mv(flag, tmp_reg); }
++  beqz(tmp_reg, done);
++}
++
+ // Move the address of the polling page into dest.
+ void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) {
+   if (SafepointMechanism::uses_thread_local_poll()) {
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index 8a2c6e07d88..c1ffa120774 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -47,6 +47,32 @@ class MacroAssembler: public Assembler {
+   void safepoint_poll(Label& slow_path);
+   void safepoint_poll_acquire(Label& slow_path);
+ 
++  // Biased locking support
++  // lock_reg and obj_reg must be loaded up with the appropriate values.
++  // swap_reg is killed.
++  // tmp_reg must be supplied and must not be rscratch1 or rscratch2
++  // Optional slow case is for implementations (interpreter and C1) which branch to
++  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
++  // Returns offset of first potentially-faulting instruction for null
++  // check info (currently consumed only by C1). If
++  // swap_reg_contains_mark is true then returns -1 as it is assumed
++  // the calling code has already passed any potential faults.
++  int biased_locking_enter(Register lock_reg, Register obj_reg,
++                           Register swap_reg, Register tmp_reg,
++                           bool swap_reg_contains_mark,
++                           Label& done, Label* slow_case = NULL,
++                           BiasedLockingCounters* counters = NULL,
++                           Register flag = noreg);
++  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done, Register flag = noreg);
++
++  // Helper functions for statistics gathering.
++  // Unconditional atomic increment.
++  void atomic_incw(Register counter_addr, Register tmp);
++  void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
++    la(tmp1, counter_addr);
++    atomic_incw(tmp1, tmp2);
++  }
++
+   // Place a fence.i after code may have been modified due to a safepoint.
+   void safepoint_ifence();
+ 
+@@ -225,6 +251,8 @@ class MacroAssembler: public Assembler {
+   // stored using routines that take a jobject.
+   void store_heap_oop_null(Address dst);
+ 
++  void load_prototype_header(Register dst, Register src);
++
+   // This dummy is to prevent a call to store_heap_oop from
+   // converting a zero (linke NULL) into a Register by giving
+   // the compiler two choices it can't resolve
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index c3ef648b21d..c2a0be140e9 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -2156,6 +2156,10 @@ encode %{
+       return;
+     }
+ 
++    if (UseBiasedLocking && !UseOptoBiasInlining) {
++      __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag);
++    }
++
+     // Check for existing monitor
+     if ((EmitSync & 0x02) == 0) {
+       __ andi(t0, disp_hdr, markOopDesc::monitor_value);
+@@ -2236,6 +2240,10 @@ encode %{
+       return;
+     }
+ 
++    if (UseBiasedLocking && !UseOptoBiasInlining) {
++      __ biased_locking_exit(oop, tmp, cont, flag);
++    }
++
+     // Find the lock address and load the displaced header from the stack.
+     __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+ 
+@@ -4961,6 +4969,10 @@ instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFla
+   ins_pipe(pipe_serial);
+ %}
+ 
++// storeLConditional is used by PhaseMacroExpand::expand_lock_node
++// when attempting to rebias a lock towards the current thread.  We
++// must use the acquire form of cmpxchg in order to guarantee acquire
++// semantics in this case.
+ instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
+ %{
+   match(Set cr (StoreLConditional mem (Binary oldval newval)));
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+index d740c99c979..eaefcc2b595 100644
+--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -1489,6 +1489,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+     // Load the oop from the handle
+     __ ld(obj_reg, Address(oop_handle_reg, 0));
+ 
++    if (UseBiasedLocking) {
++      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock);
++    }
++
+     // Load (object->mark() | 1) into swap_reg % x10
+     __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+     __ ori(swap_reg, t0, 1);
+@@ -1597,6 +1601,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+ 
+     Label done;
+ 
++    if (UseBiasedLocking) {
++      __ biased_locking_exit(obj_reg, old_hdr, done);
++    }
++
+     // Simple recursive lock?
+     __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+     __ beqz(t0, done);
+diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+index c9d399ccdaf..1e23fb4dc09 100644
+--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+@@ -3563,9 +3563,13 @@ void TemplateTable::_new() {
+       __ bnez(x13, loop);
+     }
+ 
+-    // initialize object hader only.
++    // initialize object header only.
+     __ bind(initialize_header);
+-    __ mv(t0, (intptr_t)markOopDesc::prototype());
++    if (UseBiasedLocking) {
++      __ ld(t0, Address(x14, Klass::prototype_header_offset()));
++    } else {
++      __ mv(t0, (intptr_t)markOopDesc::prototype());
++    }
+     __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes()));
+     __ store_klass_gap(x10, zr);   // zero klass gap for compressed oops
+     __ store_klass(x10, x14);      // store klass last
+
+From 864e551505bb816f3dc8a3bd1b065328ba7b5d65 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Mon, 17 Apr 2023 19:52:44 +0800
+Subject: [PATCH 051/140] Revert JDK-8227680: FastJNIAccessors: Check for JVMTI
+ field access event requests at runtime
+
+---
+ .../cpu/riscv/jniFastGetField_riscv.cpp       | 32 ++++---------------
+ 1 file changed, 6 insertions(+), 26 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
+index 814ed23e471..f6e7351c4fc 100644
+--- a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
++++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
+@@ -83,28 +83,10 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
+   // An even value means there are no ongoing safepoint operations
+   __ andi(t0, rcounter, 1);
+   __ bnez(t0, slow);
+-
+-  if (JvmtiExport::can_post_field_access()) {
+-    // Using barrier to order wrt. JVMTI check and load of result.
+-    __ membar(MacroAssembler::LoadLoad);
+-
+-    // Check to see if a field access watch has been set before we
+-    // take the fast path.
+-    int32_t offset2;
+-    __ la_patchable(result,
+-                    ExternalAddress((address) JvmtiExport::get_field_access_count_addr()),
+-                    offset2);
+-    __ lwu(result, Address(result, offset2));
+-    __ bnez(result, slow);
+-
+-    __ mv(robj, c_rarg1);
+-  } else {
+-    // Using address dependency to order wrt. load of result.
+-    __ xorr(robj, c_rarg1, rcounter);
+-    __ xorr(robj, robj, rcounter);               // obj, since
+-                                                 // robj ^ rcounter ^ rcounter == robj
+-                                                 // robj is address dependent on rcounter.
+-  }
++  __ xorr(robj, c_rarg1, rcounter);
++  __ xorr(robj, robj, rcounter);               // obj, since
++                                               // robj ^ rcounter ^ rcounter == robj
++                                               // robj is address dependent on rcounter.
+ 
+   // Both robj and t0 are clobbered by try_resolve_jobject_in_native.
+   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+@@ -137,10 +119,8 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
+     default:        ShouldNotReachHere();
+   }
+ 
+-  // Using acquire: Order JVMTI check and load of result wrt. succeeding check
+-  // (LoadStore for volatile field).
+-  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+-
++  __ xorr(rcounter_addr, rcounter_addr, result);
++  __ xorr(rcounter_addr, rcounter_addr, result);
+   __ lw(t0, safepoint_counter_addr);
+   __ bne(rcounter, t0, slow);
+ 
+
+From b822b64cb6be38cb7806fda3d56675674557c163 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 18 Apr 2023 16:34:32 +0800
+Subject: [PATCH 052/140] Revert JDK-8249768: Move static oops and
+ NullPointerException oops from Universe into OopStorage
+
+---
+ src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+index 1e23fb4dc09..fbcdcf60d9c 100644
+--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+@@ -411,7 +411,6 @@ void TemplateTable::fast_aldc(bool wide)
+     int32_t offset = 0;
+     __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset);
+     __ ld(tmp, Address(rarg, offset));
+-    __ resolve_oop_handle(tmp);
+     __ bne(result, tmp, notNull);
+     __ mv(result, zr);  // NULL object reference
+     __ bind(notNull);
+
+From c82c482aa065ffd39eab6b87a0ad6c6cbca1e3af Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 18 Apr 2023 16:58:23 +0800
+Subject: [PATCH 053/140] Revert JDK-8217998: Remove method_type field
+ associated with the appendix field of an indy or method handle call
+
+---
+ src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+index fbcdcf60d9c..158294f7436 100644
+--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+@@ -3192,6 +3192,7 @@ void TemplateTable::prepare_invoke(int byte_no,
+     // since the parameter_size includes it.
+     __ push_reg(x9);
+     __ mv(x9, index);
++    assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
+     __ load_resolved_reference_at_index(index, x9);
+     __ pop_reg(x9);
+     __ push_reg(index);  // push appendix (MethodType, CallSite, etc.)
+
+From 3e50d62dd06c3f8bc586e3ab2b00f2f587d950bf Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 16:04:31 +0800
+Subject: [PATCH 054/140] Revert JDK-8277372: Add getters for BOT and card
+ table members
+
+---
+ src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 4 ++--
+ .../riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp  | 6 +++---
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+index 1c46b3947d3..6b75bf63781 100644
+--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+@@ -215,7 +215,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
+   ExternalAddress cardtable((address) ct->byte_map_base());
+   const Register card_addr = tmp;
+ 
+-  __ srli(card_addr, store_addr, CardTable::card_shift());
++  __ srli(card_addr, store_addr, CardTable::card_shift);
+ 
+   // get the address of the card
+   __ load_byte_map_base(tmp2);
+@@ -437,7 +437,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
+   assert_different_registers(card_offset, byte_map_base, t0);
+ 
+   __ load_parameter(0, card_offset);
+-  __ srli(card_offset, card_offset, CardTable::card_shift());
++  __ srli(card_offset, card_offset, CardTable::card_shift);
+   __ load_byte_map_base(byte_map_base);
+ 
+   // Convert card offset into an address in card_addr
+diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
+index a419f92b5f6..868d022ac74 100644
+--- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
+@@ -41,7 +41,7 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
+   BarrierSet* bs = BarrierSet::barrier_set();
+   assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
+ 
+-  __ srli(obj, obj, CardTable::card_shift());
++  __ srli(obj, obj, CardTable::card_shift);
+ 
+   assert(CardTable::dirty_card_val() == 0, "must be");
+ 
+@@ -74,8 +74,8 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
+   __ shadd(end, count, start, count, LogBytesPerHeapOop);
+   __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive
+ 
+-  __ srli(start, start, CardTable::card_shift());
+-  __ srli(end, end, CardTable::card_shift());
++  __ srli(start, start, CardTable::card_shift);
++  __ srli(end, end, CardTable::card_shift);
+   __ sub(count, end, start); // number of bytes to copy
+ 
+   __ load_byte_map_base(tmp);
+
+From 6a81a820e6c08cfdd8e29a835e953dabffdca98a Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Wed, 19 Apr 2023 11:30:58 +0800
+Subject: [PATCH 055/140] Revert JDK-8260941: Remove the conc_scan parameter
+ for CardTable
+
+---
+ .../shared/cardTableBarrierSetAssembler_riscv.cpp   | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
+index 868d022ac74..a476e5ec84d 100644
+--- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
+@@ -41,6 +41,9 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
+   BarrierSet* bs = BarrierSet::barrier_set();
+   assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
+ 
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++
+   __ srli(obj, obj, CardTable::card_shift);
+ 
+   assert(CardTable::dirty_card_val() == 0, "must be");
+@@ -56,6 +59,9 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
+     __ sb(zr, Address(tmp));
+     __ bind(L_already_dirty);
+   } else {
++    if (ct->scanned_concurrently()) {
++      __ membar(MacroAssembler::StoreStore);
++    }
+     __ sb(zr, Address(tmp));
+   }
+ }
+@@ -66,6 +72,10 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
+   assert_different_registers(start, tmp);
+   assert_different_registers(count, tmp);
+ 
++  BarrierSet* bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++
+   Label L_loop, L_done;
+   const Register end = count;
+ 
+@@ -80,6 +90,9 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
+ 
+   __ load_byte_map_base(tmp);
+   __ add(start, start, tmp);
++  if (ct->scanned_concurrently()) {
++    __ membar(MacroAssembler::StoreStore);
++  }
+ 
+   __ bind(L_loop);
+   __ add(tmp, start, count);
+
+From 24688cb665b16331b491bed2566dc97582a3d73c Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Wed, 19 Apr 2023 11:32:54 +0800
+Subject: [PATCH 056/140] Revert JDK-8220301: Remove jbyte use in CardTable
+
+Note: An assertion in `CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier` is removed. See the jdk11u backport for AArch64: https://mail.openjdk.org/pipermail/jdk-updates-dev/2019-August/001746.html
+---
+ src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp    | 3 +++
+ .../cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp | 1 +
+ src/hotspot/cpu/riscv/macroAssembler_riscv.cpp                 | 2 +-
+ src/hotspot/cpu/riscv/riscv.ad                                 | 3 +--
+ 4 files changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+index 6b75bf63781..b6786c6b327 100644
+--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+@@ -196,6 +196,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
+   BarrierSet* bs = BarrierSet::barrier_set();
+   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
+   CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
+ 
+   Label done;
+   Label runtime;
+@@ -213,6 +214,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
+   // storing region crossing non-NULL, is card already dirty?
+ 
+   ExternalAddress cardtable((address) ct->byte_map_base());
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
+   const Register card_addr = tmp;
+ 
+   __ srli(card_addr, store_addr, CardTable::card_shift);
+@@ -419,6 +421,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
+   BarrierSet* bs = BarrierSet::barrier_set();
+   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
+   CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
+ 
+   Label done;
+   Label runtime;
+diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
+index a476e5ec84d..81d47d61d4c 100644
+--- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
+@@ -43,6 +43,7 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
+ 
+   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
+   CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
+ 
+   __ srli(obj, obj, CardTable::card_shift);
+ 
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index e557a134b5b..6e4d22db40f 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -2719,7 +2719,7 @@ void MacroAssembler::get_thread(Register thread) {
+ }
+ 
+ void MacroAssembler::load_byte_map_base(Register reg) {
+-  CardTable::CardValue* byte_map_base =
++  jbyte *byte_map_base =
+     ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
+   li(reg, (uint64_t)byte_map_base);
+ }
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index c2a0be140e9..ca6a232e1e0 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -2735,8 +2735,7 @@ operand immByteMapBase()
+ %{
+   // Get base of card map
+   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
+-            (CardTable::CardValue*)n->get_ptr() ==
+-             ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
++            (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
+   match(ConP);
+ 
+   op_cost(0);
+
+From 6ee27261d406342a5378d4a404319866a9bae804 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Wed, 19 Apr 2023 11:51:20 +0800
+Subject: [PATCH 057/140] Revert JDK-8230486:
+ G1BarrierSetAssembler::g1_write_barrier_post unnecessarily pushes/pops
+ new_val
+
+---
+ src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+index b6786c6b327..d724876ec3a 100644
+--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+@@ -250,7 +250,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
+ 
+   __ bind(runtime);
+   // save the live input values
+-  RegSet saved = RegSet::of(store_addr);
++  RegSet saved = RegSet::of(store_addr, new_val);
+   __ push_reg(saved, sp);
+   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
+   __ pop_reg(saved, sp);
+
+From 57067a358ffc1b54edfb305549bd460b0fca47f0 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Fri, 21 Apr 2023 12:10:22 +0800
+Subject: [PATCH 058/140] Revert JDK-8242449: AArch64: r27 can be allocated in
+ CompressedOops mode
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index ca6a232e1e0..e3f976faa0d 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -4846,6 +4846,8 @@ instruct storeN(iRegN src, memory mem)
+ instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
+ %{
+   match(Set mem (StoreN mem zero));
++  predicate(Universe::narrow_oop_base() == NULL &&
++            Universe::narrow_klass_base() == NULL);
+ 
+   ins_cost(STORE_COST);
+   format %{ "sw  rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %}
+
+From 0db520768d4d268a9dc641e301df45653c52f6eb Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 23 Apr 2023 14:59:09 +0800
+Subject: [PATCH 059/140] A fix for interpreter frame verification code,
+ skipping the locals check if there is no locals. See one of the additional
+ commits in JDK-8286301, the RISC-V loom port.
+
+---
+ src/hotspot/cpu/riscv/frame_riscv.cpp | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
+index d03adc0bff4..13c482b610a 100644
+--- a/src/hotspot/cpu/riscv/frame_riscv.cpp
++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
+@@ -571,7 +571,16 @@ bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
+ 
+   // validate locals
+   address locals = (address) *interpreter_frame_locals_addr();
+-  if (locals > thread->stack_base() || locals < (address) fp()) {
++  if (locals > thread->stack_base()) {
++    return false;
++  }
++
++  if (m->max_locals() > 0 && locals < (address) fp()) {
++    // fp in interpreter frame on RISC-V is higher than that on AArch64,
++    // pointing to sender_sp and sender_sp-2 relatively.
++    // On RISC-V, if max_locals is 0, the 'locals' pointer may be below fp,
++    // pointing to sender_sp-1 (with one padding slot).
++    // So we verify the 'locals' pointer only if max_locals > 0.
+     return false;
+   }
+ 
+
+From 795da5afe59658b4d89cd8501b4f4ec56471b14c Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 11 Apr 2023 11:45:40 +0800
+Subject: [PATCH 060/140] ShenandoahGC adaptations on JDK11 for RISC-V backend
+
+---
+ .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |   4 +-
+ .../c1/shenandoahBarrierSetC1_riscv.cpp       |   2 +-
+ .../shenandoahBarrierSetAssembler_riscv.cpp   | 229 +++++++++---------
+ .../shenandoahBarrierSetAssembler_riscv.hpp   |  15 +-
+ .../riscv/gc/shenandoah/shenandoah_riscv64.ad |  88 -------
+ src/hotspot/cpu/riscv/riscv.ad                |   6 +-
+ .../templateInterpreterGenerator_riscv.cpp    |  15 +-
+ 7 files changed, 146 insertions(+), 213 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+index 6a961ee2307..90c4af5d3b0 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+@@ -1817,10 +1817,12 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
+ 
+ 
+ void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
+-  if (patch_code != lir_patch_none) {
++#if INCLUDE_SHENANDOAHGC
++  if (UseShenandoahGC && patch_code != lir_patch_none) {
+     deoptimize_trap(info);
+     return;
+   }
++#endif
+ 
+   assert(patch_code == lir_patch_none, "Patch code not supported");
+   LIR_Address* adr = addr->as_address_ptr();
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
+index cd568cc723f..d19f5b859ce 100644
+--- a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
+@@ -103,7 +103,7 @@ LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRIt
+   __ xchg(access.resolved_addr(), value_opr, result, tmp);
+ 
+   if (access.is_oop()) {
+-    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators());
++    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0));
+     LIR_Opr tmp_opr = gen->new_register(type);
+     __ move(result, tmp_opr);
+     result = tmp_opr;
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
+index 84e1205bc25..b8534c52e77 100644
+--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
+@@ -27,7 +27,7 @@
+ #include "gc/shenandoah/shenandoahBarrierSet.hpp"
+ #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
+ #include "gc/shenandoah/shenandoahForwarding.hpp"
+-#include "gc/shenandoah/shenandoahHeap.inline.hpp"
++#include "gc/shenandoah/shenandoahHeap.hpp"
+ #include "gc/shenandoah/shenandoahHeapRegion.hpp"
+ #include "gc/shenandoah/shenandoahRuntime.hpp"
+ #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
+@@ -44,6 +44,8 @@
+ 
+ #define __ masm->
+ 
++address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL;
++
+ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+                                                        Register src, Register dst, Register count, RegSet saved_regs) {
+   if (is_oop) {
+@@ -116,10 +118,10 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
+   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
+ 
+   // Is marking active?
+-  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++  if (in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 4) {
+     __ lwu(tmp, in_progress);
+   } else {
+-    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    assert(in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+     __ lbu(tmp, in_progress);
+   }
+   __ beqz(tmp, done);
+@@ -225,37 +227,21 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb
+   __ pop_reg(saved_regs, sp);
+ }
+ 
+-void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
+-                                                           Register dst,
+-                                                           Address load_addr,
+-                                                           DecoratorSet decorators) {
++void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm,
++                                                                    Register dst,
++                                                                    Address load_addr) {
+   assert(ShenandoahLoadRefBarrier, "Should be enabled");
+   assert(dst != t1 && load_addr.base() != t1, "need t1");
+   assert_different_registers(load_addr.base(), t0, t1);
+ 
+-  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
+-  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
+-  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
+-  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
+-  bool is_narrow  = UseCompressedOops && !is_native;
+-
+-  Label heap_stable, not_cset;
++  Label done;
+   __ enter();
+   Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
+   __ lbu(t1, gc_state);
+ 
+   // Check for heap stability
+-  if (is_strong) {
+-    __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED);
+-    __ beqz(t1, heap_stable);
+-  } else {
+-    Label lrb;
+-    __ andi(t0, t1, ShenandoahHeap::WEAK_ROOTS);
+-    __ bnez(t0, lrb);
+-    __ andi(t0, t1, ShenandoahHeap::HAS_FORWARDED);
+-    __ beqz(t0, heap_stable);
+-    __ bind(lrb);
+-  }
++  __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED);
++  __ beqz(t1, done);
+ 
+   // use x11 for load address
+   Register result_dst = dst;
+@@ -270,43 +256,12 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
+   __ la(x11, load_addr);
+   __ mv(x10, dst);
+ 
+-  // Test for in-cset
+-  if (is_strong) {
+-    __ li(t1, (uint64_t)ShenandoahHeap::in_cset_fast_test_addr());
+-    __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+-    __ add(t1, t1, t0);
+-    __ lbu(t1, Address(t1));
+-    __ andi(t0, t1, 1);
+-    __ beqz(t0, not_cset);
+-  }
++  __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
+ 
+-  __ push_call_clobbered_registers();
+-  if (is_strong) {
+-    if (is_narrow) {
+-      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow);
+-    } else {
+-      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
+-    }
+-  } else if (is_weak) {
+-    if (is_narrow) {
+-      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow);
+-    } else {
+-      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
+-    }
+-  } else {
+-    assert(is_phantom, "only remaining strength");
+-    assert(!is_narrow, "phantom access cannot be narrow");
+-    __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
+-  }
+-  __ jalr(ra);
+-  __ mv(t0, x10);
+-  __ pop_call_clobbered_registers();
+-  __ mv(x10, t0);
+-  __ bind(not_cset);
+   __ mv(result_dst, x10);
+   __ pop_reg(saved_regs, sp);
+ 
+-  __ bind(heap_stable);
++  __ bind(done);
+   __ leave();
+ }
+ 
+@@ -320,6 +275,15 @@ void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register ds
+   }
+ }
+ 
++void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) {
++  if (ShenandoahLoadRefBarrier) {
++    Label is_null;
++    __ beqz(dst, is_null);
++    load_reference_barrier_not_null(masm, dst, load_addr);
++    __ bind(is_null);
++  }
++}
++
+ //
+ // Arguments:
+ //
+@@ -363,7 +327,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
+ 
+     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+ 
+-    load_reference_barrier(masm, dst, src, decorators);
++    load_reference_barrier(masm, dst, src);
+ 
+     if (dst != result_dst) {
+       __ mv(result_dst, dst);
+@@ -555,7 +519,7 @@ void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, Shen
+   Register pre_val_reg = stub->pre_val()->as_register();
+ 
+   if (stub->do_load()) {
+-    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
++    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
+   }
+   __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
+   ce->store_parameter(stub->pre_val()->as_register(), 0);
+@@ -568,12 +532,6 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble
+   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
+   __ bind(*stub->entry());
+ 
+-  DecoratorSet decorators = stub->decorators();
+-  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
+-  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
+-  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
+-  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
+-
+   Register obj = stub->obj()->as_register();
+   Register res = stub->result()->as_register();
+   Register addr = stub->addr()->as_pointer_register();
+@@ -587,30 +545,32 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble
+     __ mv(res, obj);
+   }
+ 
+-  if (is_strong) {
+-    // Check for object in cset.
+-    __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
+-    __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+-    __ add(tmp2, tmp2, tmp1);
+-    __ lbu(tmp2, Address(tmp2));
+-    __ beqz(tmp2, *stub->continuation(), true /* is_far */);
+-  }
++  // Check for null.
++  __ beqz(res, *stub->continuation(), /* is_far */ true);
++
++  // Check for object in cset.
++  __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
++  __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
++  __ add(t0, tmp2, tmp1);
++  __ lb(tmp2, Address(t0));
++  __ beqz(tmp2, *stub->continuation(), /* is_far */ true);
++
++  // Check if object is already forwarded.
++  Label slow_path;
++  __ ld(tmp1, Address(res, oopDesc::mark_offset_in_bytes()));
++  __ xori(tmp1, tmp1, -1);
++  __ andi(t0, tmp1, markOopDesc::lock_mask_in_place);
++  __ bnez(t0, slow_path);
++
++  // Decode forwarded object.
++  __ ori(tmp1, tmp1, markOopDesc::marked_value);
++  __ xori(res, tmp1, -1);
++  __ j(*stub->continuation());
+ 
++  __ bind(slow_path);
+   ce->store_parameter(res, 0);
+   ce->store_parameter(addr, 1);
+-
+-  if (is_strong) {
+-    if (is_native) {
+-      __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
+-    } else {
+-      __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
+-    }
+-  } else if (is_weak) {
+-    __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
+-  } else {
+-    assert(is_phantom, "only remaining strength");
+-    __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
+-  }
++  __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
+ 
+   __ j(*stub->continuation());
+ }
+@@ -664,8 +624,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss
+   __ epilogue();
+ }
+ 
+-void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm,
+-                                                                                    DecoratorSet decorators) {
++void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) {
+   __ prologue("shenandoah_load_reference_barrier", false);
+   // arg0 : object to be resolved
+ 
+@@ -673,31 +632,10 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
+   __ load_parameter(0, x10);
+   __ load_parameter(1, x11);
+ 
+-  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
+-  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
+-  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
+-  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
+-  if (is_strong) {
+-    if (is_native) {
+-      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
+-    } else {
+-      if (UseCompressedOops) {
+-        __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow);
+-      } else {
+-        __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
+-      }
+-    }
+-  } else if (is_weak) {
+-    assert(!is_native, "weak must not be called off-heap");
+-    if (UseCompressedOops) {
+-      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow);
+-    } else {
+-      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
+-    }
++  if (UseCompressedOops) {
++    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
+   } else {
+-    assert(is_phantom, "only remaining strength");
+-    assert(is_native, "phantom must only be called off-heap");
+-    __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_phantom);
++    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+   }
+   __ jalr(ra);
+   __ mv(t0, x10);
+@@ -710,3 +648,68 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
+ #undef __
+ 
+ #endif // COMPILER1
++
++address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
++  assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
++  return _shenandoah_lrb;
++}
++
++#define __ cgen->assembler()->
++
++// Shenandoah load reference barrier.
++//
++// Input:
++//   x10: OOP to evacuate.  Not null.
++//   x11: load address
++//
++// Output:
++//   x10: Pointer to evacuated OOP.
++//
++// Trash t0 t1  Preserve everything else.
++address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
++  __ align(6);
++  StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
++  address start = __ pc();
++
++  Label slow_path;
++  __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr());
++  __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
++  __ add(t1, t1, t0);
++  __ lbu(t1, Address(t1, 0));
++  __ andi(t0, t1, 1);
++  __ bnez(t0, slow_path);
++  __ ret();
++
++  __ bind(slow_path);
++  __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++  __ push_call_clobbered_registers();
++
++  if (UseCompressedOops) {
++    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
++  } else {
++    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
++  }
++  __ jalr(ra);
++  __ mv(t0, x10);
++  __ pop_call_clobbered_registers();
++  __ mv(x10, t0);
++
++  __ leave(); // required for proper stackwalking of RuntimeStub frame
++  __ ret();
++
++  return start;
++}
++
++#undef __
++
++void ShenandoahBarrierSetAssembler::barrier_stubs_init() {
++  if (ShenandoahLoadRefBarrier) {
++    int stub_code_size = 2048;
++    ResourceMark rm;
++    BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size);
++    CodeBuffer buf(bb);
++    StubCodeGenerator cgen(&buf);
++    _shenandoah_lrb = generate_shenandoah_lrb(&cgen);
++  }
++}
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
+index a705f497667..5d75035e9d4 100644
+--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
+@@ -40,6 +40,8 @@ class StubCodeGenerator;
+ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
+ private:
+ 
++  static address _shenandoah_lrb;
++
+   void satb_write_barrier_pre(MacroAssembler* masm,
+                               Register obj,
+                               Register pre_val,
+@@ -57,17 +59,22 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
+ 
+   void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
+   void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
+-  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators);
++  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr);
++  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr);
++
++  address generate_shenandoah_lrb(StubCodeGenerator* cgen);
+ 
+ public:
+ 
++  static address shenandoah_lrb();
++
+   void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);
+ 
+ #ifdef COMPILER1
+   void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
+   void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
+   void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
+-  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators);
++  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm);
+ #endif
+ 
+   virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+@@ -81,8 +88,10 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
+   virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
+                                              Register obj, Register tmp, Label& slowpath);
+ 
+-  void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
++  virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
+                    Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
++
++  virtual void barrier_stubs_init();
+ };
+ 
+ #endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
+index 6c855f23c2a..bab407a8b76 100644
+--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
+@@ -176,48 +176,6 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva
+   ins_pipe(pipe_slow);
+ %}
+ 
+-instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
+-  predicate(needs_acquiring_load_reserved(n));
+-  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
+-  ins_cost(10 * DEFAULT_COST);
+-
+-  effect(TEMP_DEF res, TEMP tmp, KILL cr);
+-  format %{
+-    "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah"
+-  %}
+-
+-  ins_encode %{
+-    Register tmp = $tmp$$Register;
+-    __ mv(tmp, $oldval$$Register);
+-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+-                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
+-                                                   true /* is_cae */, $res$$Register);
+-  %}
+-
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+-  predicate(needs_acquiring_load_reserved(n));
+-  match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
+-  ins_cost(10 * DEFAULT_COST);
+-
+-  effect(TEMP_DEF res, TEMP tmp, KILL cr);
+-  format %{
+-    "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah"
+-  %}
+-
+-  ins_encode %{
+-    Register tmp = $tmp$$Register;
+-    __ mv(tmp, $oldval$$Register);
+-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+-                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
+-                                                   true /* is_cae */, $res$$Register);
+-  %}
+-
+-  ins_pipe(pipe_slow);
+-%}
+-
+ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+   match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
+   ins_cost(10 * DEFAULT_COST);
+@@ -237,49 +195,3 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva
+ 
+   ins_pipe(pipe_slow);
+ %}
+-
+-instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
+-  predicate(needs_acquiring_load_reserved(n));
+-  match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
+-  ins_cost(10 * DEFAULT_COST);
+-
+-  effect(TEMP tmp, KILL cr);
+-  format %{
+-    "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah"
+-    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+-  %}
+-
+-  ins_encode %{
+-    Register tmp = $tmp$$Register;
+-    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+-    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
+-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+-                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
+-                                                   false /* is_cae */, $res$$Register);
+-  %}
+-
+-  ins_pipe(pipe_slow);
+-%}
+-
+-instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+-  predicate(needs_acquiring_load_reserved(n));
+-  match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
+-  ins_cost(10 * DEFAULT_COST);
+-
+-  effect(TEMP tmp, KILL cr);
+-  format %{
+-    "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah"
+-    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+-  %}
+-
+-  ins_encode %{
+-    Register tmp = $tmp$$Register;
+-    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+-    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
+-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+-                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
+-                                                   false /* is_cae */, $res$$Register);
+-  %}
+-
+-  ins_pipe(pipe_slow);
+-%}
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index e3f976faa0d..a6061de7a33 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -828,8 +828,10 @@ bool is_CAS(int opcode, bool maybe_volatile)
+     case Op_CompareAndSwapL:
+     case Op_CompareAndSwapP:
+     case Op_CompareAndSwapN:
++#if INCLUDE_SHENANDOAHGC
+     case Op_ShenandoahCompareAndSwapP:
+     case Op_ShenandoahCompareAndSwapN:
++#endif
+     case Op_CompareAndSwapB:
+     case Op_CompareAndSwapS:
+     case Op_GetAndSetI:
+@@ -851,10 +853,6 @@ bool is_CAS(int opcode, bool maybe_volatile)
+     case Op_WeakCompareAndSwapL:
+     case Op_WeakCompareAndSwapP:
+     case Op_WeakCompareAndSwapN:
+-    case Op_ShenandoahWeakCompareAndSwapP:
+-    case Op_ShenandoahWeakCompareAndSwapN:
+-    case Op_ShenandoahCompareAndExchangeP:
+-    case Op_ShenandoahCompareAndExchangeN:
+       return maybe_volatile;
+     default:
+       return false;
+diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+index a07dea35b73..5a87c687cf7 100644
+--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+@@ -765,9 +765,18 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
+   __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize));
+ 
+   // Get mirror and store it in the frame as GC root for this Method*
+-  __ load_mirror(t2, xmethod);
+-  __ sd(zr, Address(sp, 5 * wordSize));
+-  __ sd(t2, Address(sp, 4 * wordSize));
++#if INCLUDE_SHENANDOAHGC
++  if (UseShenandoahGC) {
++    __ load_mirror(x28, xmethod);
++    __ sd(zr, Address(sp, 5 * wordSize));
++    __ sd(x28, Address(sp, 4 * wordSize));
++  } else
++#endif
++  {
++    __ load_mirror(t2, xmethod);
++    __ sd(zr, Address(sp, 5 * wordSize));
++    __ sd(t2, Address(sp, 4 * wordSize));
++  }
+ 
+   __ ld(xcpool, Address(xmethod, Method::const_offset()));
+   __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset()));
+
+From d8b14fd5e6455b47cfcb02d13c0c24c74e824570 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 14:42:07 +0800
+Subject: [PATCH 061/140] Revert JDK-8248404: AArch64: Remove uses of long and
+ unsigned long
+
+---
+ src/hotspot/cpu/riscv/assembler_riscv.hpp     | 19 +++++++++++++------
+ .../cpu/riscv/macroAssembler_riscv.cpp        |  6 ------
+ .../cpu/riscv/macroAssembler_riscv.hpp        | 13 ++++++++-----
+ 3 files changed, 21 insertions(+), 17 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
+index 51aa052a0c7..31aeeb9b425 100644
+--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
+@@ -183,13 +183,20 @@ class Address {
+     : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { }
+   Address(Register r)
+     : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { }
+-
+-  template<typename T, ENABLE_IF(std::is_integral<T>::value)>
+-  Address(Register r, T o)
+-    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) {}
+-
++  Address(Register r, int o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++  Address(Register r, long o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++  Address(Register r, long long o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++  Address(Register r, unsigned int o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++  Address(Register r, unsigned long o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++  Address(Register r, unsigned long long o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
+   Address(Register r, ByteSize disp)
+-    : Address(r, in_bytes(disp)) {}
++    : Address(r, in_bytes(disp)) { }
+   Address(address target, RelocationHolder const& rspec)
+     : _base(noreg),
+       _index(noreg),
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index 6e4d22db40f..b95f69cfcda 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -1364,12 +1364,6 @@ void MacroAssembler::mv(Register Rd, Address dest) {
+   movptr(Rd, dest.target());
+ }
+ 
+-void MacroAssembler::mv(Register Rd, address addr) {
+-  // Here in case of use with relocation, use fix length instruciton
+-  // movptr instead of li
+-  movptr(Rd, addr);
+-}
+-
+ void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
+   if (src.is_register()) {
+     mv(Rd, src.as_register());
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index c1ffa120774..76b2716659b 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -543,15 +543,18 @@ class MacroAssembler: public Assembler {
+   }
+ 
+   // mv
+-  template<typename T, ENABLE_IF(std::is_integral<T>::value)>
+-  inline void mv(Register Rd, T o) {
+-    li(Rd, (int64_t)o);
+-  }
++  void mv(Register Rd, address addr)                    { li(Rd, (int64_t)addr);  }
++
++  inline void mv(Register Rd, int imm64)                { li(Rd, (int64_t)imm64); }
++  inline void mv(Register Rd, long imm64)               { li(Rd, (int64_t)imm64); }
++  inline void mv(Register Rd, long long imm64)          { li(Rd, (int64_t)imm64); }
++  inline void mv(Register Rd, unsigned int imm64)       { li(Rd, (int64_t)imm64); }
++  inline void mv(Register Rd, unsigned long imm64)      { li(Rd, (int64_t)imm64); }
++  inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); }
+ 
+   inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
+ 
+   void mv(Register Rd, Address dest);
+-  void mv(Register Rd, address addr);
+   void mv(Register Rd, RegisterOrConstant src);
+ 
+   // logic
+
+From 94c1c9c01e61d0cb7c32596ef19b347c32406546 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 16:54:36 +0800
+Subject: [PATCH 062/140] Revert JDK-8280503: Use allStatic.hpp instead of
+ allocation.hpp where possible
+
+---
+ src/hotspot/cpu/riscv/bytes_riscv.hpp    | 2 --
+ src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 1 -
+ 2 files changed, 3 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp
+index 23d982f9abd..f60e0e38ae8 100644
+--- a/src/hotspot/cpu/riscv/bytes_riscv.hpp
++++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp
+@@ -27,8 +27,6 @@
+ #ifndef CPU_RISCV_BYTES_RISCV_HPP
+ #define CPU_RISCV_BYTES_RISCV_HPP
+ 
+-#include "memory/allStatic.hpp"
+-
+ class Bytes: AllStatic {
+  public:
+   // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
+diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
+index 83ffcc55d83..bc4e5758256 100644
+--- a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
++++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
+@@ -27,7 +27,6 @@
+ #define CPU_RISCV_JNITYPES_RISCV_HPP
+ 
+ #include "jni.h"
+-#include "memory/allStatic.hpp"
+ #include "oops/oop.hpp"
+ 
+ // This file holds platform-dependent routines used to write primitive jni
+
+From 49e6399009b51edafa6904164528e1d051aeae6c Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 17:07:31 +0800
+Subject: [PATCH 063/140] Revert JDK-8276453: Undefined behavior in C1
+ LIR_OprDesc causes SEGV in fastdebug build
+
+---
+ src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp    | 4 ++--
+ src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp     | 4 ++--
+ src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 1 +
+ 3 files changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
+index af7bd067f33..6057d43296b 100644
+--- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
+@@ -58,7 +58,7 @@ RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array)
+ }
+ 
+ RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index)
+-  : _index(index), _array(), _throw_index_out_of_bounds_exception(true) {
++  : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) {
+   assert(info != NULL, "must have info");
+   _info = new CodeEmitInfo(info);
+ }
+@@ -83,7 +83,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) {
+   if (_throw_index_out_of_bounds_exception) {
+     stub_id = Runtime1::throw_index_exception_id;
+   } else {
+-    assert(_array != LIR_Opr::nullOpr(), "sanity");
++    assert(_array != NULL, "sanity");
+     __ mv(t1, _array->as_pointer_register());
+     stub_id = Runtime1::throw_range_check_failed_id;
+   }
+diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
+index 172031941b2..1f8b2b55100 100644
+--- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
+@@ -156,8 +156,8 @@ LIR_Opr FrameMap::long11_opr;
+ LIR_Opr FrameMap::fpu10_float_opr;
+ LIR_Opr FrameMap::fpu10_double_opr;
+ 
+-LIR_Opr FrameMap::_caller_save_cpu_regs[] = {};
+-LIR_Opr FrameMap::_caller_save_fpu_regs[] = {};
++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
+ 
+ //--------------------------------------------------------
+ //               FrameMap
+diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+index c45a75b2301..227e7664225 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+@@ -206,6 +206,7 @@ LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
+       break;
+     default:
+       ShouldNotReachHere();
++      r = NULL;
+   }
+   return r;
+ }
+
+From b94bda9d1a2c12fa379f8fe813460c498344f543 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 17:19:19 +0800
+Subject: [PATCH 064/140] Revert JDK-8256205: Simplify compiler calling
+ convention handling
+
+---
+ src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp  |  2 +-
+ src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp   |  2 +-
+ src/hotspot/cpu/riscv/riscv.ad                | 25 +++++++++++++++++++
+ src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 11 ++------
+ 4 files changed, 29 insertions(+), 11 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
+index 6057d43296b..12980c12de6 100644
+--- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
+@@ -290,7 +290,7 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
+   const int args_num = 5;
+   VMRegPair args[args_num];
+   BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT };
+-  SharedRuntime::java_calling_convention(signature, args, args_num);
++  SharedRuntime::java_calling_convention(signature, args, args_num, true);
+ 
+   // push parameters
+   Register r[args_num];
+diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
+index 1f8b2b55100..682ebe82627 100644
+--- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
+@@ -314,7 +314,7 @@ void FrameMap::initialize() {
+ 
+   VMRegPair regs;
+   BasicType sig_bt = T_OBJECT;
+-  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1);
++  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1, true);
+   receiver_opr = as_oop_opr(regs.first()->as_Register());
+ 
+   for (i = 0; i < nof_caller_save_fpu_regs; i++) {
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index a6061de7a33..1667994699f 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -2443,6 +2443,12 @@ frame %{
+   // Stack alignment requirement
+   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
+ 
++  // Number of stack slots between incoming argument block and the start of
++  // a new frame.  The PROLOG must add this many slots to the stack.  The
++  // EPILOG must remove this many slots. RISC-V needs two slots for
++  // return address and fp.
++  in_preserve_stack_slots(2 * VMRegImpl::slots_per_word);
++
+   // Number of outgoing stack slots killed above the out_preserve_stack_slots
+   // for calls to C.  Supports the var-args backing area for register parms.
+   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt);
+@@ -2461,6 +2467,25 @@ frame %{
+                         Compile::current()->fixed_slots()),
+                        stack_alignment_in_slots()));
+ 
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++  calling_convention
++  %{
++    // No difference between ingoing/outgoing just pass false
++    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
++  %}
++
++  c_calling_convention
++  %{
++    // This is obviously always outgoing
++    (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
++  %}
++
+   // Location of compiled Java return values.  Same as C for now.
+   return_value
+   %{
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+index eaefcc2b595..411bddd2ace 100644
+--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -233,7 +233,8 @@ static int reg2offset_out(VMReg r) {
+ 
+ int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
+                                            VMRegPair *regs,
+-                                           int total_args_passed) {
++                                           int total_args_passed,
++                                           int is_outgoing) {
+   // Create the mapping between argument positions and
+   // registers.
+   static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
+@@ -2155,14 +2156,6 @@ void SharedRuntime::generate_deopt_blob() {
+   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
+ }
+ 
+-// Number of stack slots between incoming argument block and the start of
+-// a new frame. The PROLOG must add this many slots to the stack. The
+-// EPILOG must remove this many slots.
+-// RISCV needs two words for RA (return address) and FP (frame pointer).
+-uint SharedRuntime::in_preserve_stack_slots() {
+-  return 2 * VMRegImpl::slots_per_word;
+-}
+-
+ uint SharedRuntime::out_preserve_stack_slots() {
+   return 0;
+ }
+
+From 3fc948472c4a0918b967646b45c8886103b839d2 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 17:27:57 +0800
+Subject: [PATCH 065/140] Revert JDK-8183574: Unify the is_power_of_2 functions
+
+---
+ src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp        | 4 ++--
+ src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp              | 1 -
+ src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp              | 3 +--
+ src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp                  | 1 -
+ src/hotspot/cpu/riscv/interp_masm_riscv.cpp                  | 1 -
+ src/hotspot/cpu/riscv/macroAssembler_riscv.cpp               | 1 -
+ src/hotspot/cpu/riscv/macroAssembler_riscv.hpp               | 1 -
+ src/hotspot/cpu/riscv/stubGenerator_riscv.cpp                | 1 -
+ src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp | 1 -
+ src/hotspot/cpu/riscv/templateTable_riscv.cpp                | 1 -
+ 10 files changed, 3 insertions(+), 12 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
+index 4c1c13dc290..65d0eda62ef 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
+@@ -190,7 +190,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
+         code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c);
+         break;
+       case lir_div:
+-        assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
+         if (c == 1) {
+           // move lreg_lo to dreg if divisor is 1
+           __ mv(dreg, lreg_lo);
+@@ -208,7 +208,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
+         }
+         break;
+       case lir_rem:
+-        assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
+         if (c == 1) {
+           // move 0 to dreg if divisor is 1
+           __ mv(dreg, zr);
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+index 90c4af5d3b0..9de89a3b026 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+@@ -41,7 +41,6 @@
+ #include "oops/objArrayKlass.hpp"
+ #include "runtime/frame.inline.hpp"
+ #include "runtime/sharedRuntime.hpp"
+-#include "utilities/powerOfTwo.hpp"
+ #include "vmreg_riscv.inline.hpp"
+ 
+ #ifndef PRODUCT
+diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+index 227e7664225..a9345158749 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+@@ -38,7 +38,6 @@
+ #include "ci/ciTypeArrayKlass.hpp"
+ #include "runtime/sharedRuntime.hpp"
+ #include "runtime/stubRoutines.hpp"
+-#include "utilities/powerOfTwo.hpp"
+ #include "vmreg_riscv.inline.hpp"
+ 
+ #ifdef ASSERT
+@@ -383,7 +382,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
+       // no need to do div-by-zero check if the divisor is a non-zero constant
+       if (c != 0) { need_zero_check = false; }
+       // do not load right if the divisor is a power-of-2 constant
+-      if (c > 0 && is_power_of_2(c)) {
++      if (c > 0 && is_power_of_2_long(c)) {
+         right.dont_load_item();
+       } else {
+         right.load_item();
+diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
+index 1f45fba9de0..fc88d5c180e 100644
+--- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
+@@ -46,7 +46,6 @@
+ #include "runtime/stubRoutines.hpp"
+ #include "runtime/vframe.hpp"
+ #include "runtime/vframeArray.hpp"
+-#include "utilities/powerOfTwo.hpp"
+ #include "vmreg_riscv.inline.hpp"
+ 
+ 
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+index f0c249f0d26..2fc0b00e2cb 100644
+--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+@@ -44,7 +44,6 @@
+ #include "runtime/safepointMechanism.hpp"
+ #include "runtime/sharedRuntime.hpp"
+ #include "runtime/thread.inline.hpp"
+-#include "utilities/powerOfTwo.hpp"
+ 
+ void InterpreterMacroAssembler::narrow(Register result) {
+   // Get method->_constMethod->_result_type
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index b95f69cfcda..41a415ef2cf 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -47,7 +47,6 @@
+ #include "runtime/sharedRuntime.hpp"
+ #include "runtime/stubRoutines.hpp"
+ #include "runtime/thread.hpp"
+-#include "utilities/powerOfTwo.hpp"
+ #ifdef COMPILER2
+ #include "opto/compile.hpp"
+ #include "opto/node.hpp"
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index 76b2716659b..dd39f67d507 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -30,7 +30,6 @@
+ #include "asm/assembler.hpp"
+ #include "metaprogramming/enableIf.hpp"
+ #include "oops/compressedOops.hpp"
+-#include "utilities/powerOfTwo.hpp"
+ 
+ // MacroAssembler extends Assembler by frequently used macros.
+ //
+diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+index 8392b768847..0c5b0e001ee 100644
+--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+@@ -45,7 +45,6 @@
+ #include "runtime/stubRoutines.hpp"
+ #include "runtime/thread.inline.hpp"
+ #include "utilities/align.hpp"
+-#include "utilities/powerOfTwo.hpp"
+ #ifdef COMPILER2
+ #include "opto/runtime.hpp"
+ #endif
+diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+index 5a87c687cf7..a10677bf650 100644
+--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+@@ -51,7 +51,6 @@
+ #include "runtime/timer.hpp"
+ #include "runtime/vframeArray.hpp"
+ #include "utilities/debug.hpp"
+-#include "utilities/powerOfTwo.hpp"
+ #include <sys/types.h>
+ 
+ #ifndef PRODUCT
+diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+index 158294f7436..2a92fb9dd49 100644
+--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+@@ -44,7 +44,6 @@
+ #include "runtime/sharedRuntime.hpp"
+ #include "runtime/stubRoutines.hpp"
+ #include "runtime/synchronizer.hpp"
+-#include "utilities/powerOfTwo.hpp"
+ 
+ #define __ _masm->
+ 
+
+From 31b18aa6a29b83e2cae7ea76c5d4759b2596eca0 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 17:34:39 +0800
+Subject: [PATCH 066/140] Revert JDK-8276976: Rename LIR_OprDesc to LIR_Opr
+
+---
+ src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp |  2 +-
+ src/hotspot/cpu/riscv/c1_LIR_riscv.cpp          | 14 +++++++-------
+ 2 files changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+index 9de89a3b026..70ee6295bfb 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+@@ -1261,7 +1261,7 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
+     assert(op->addr()->is_address(), "what else?");
+     LIR_Address* addr_ptr = op->addr()->as_address_ptr();
+     assert(addr_ptr->disp() == 0, "need 0 disp");
+-    assert(addr_ptr->index() == LIR_Opr::illegalOpr(), "need 0 index");
++    assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index");
+     addr = as_reg(addr_ptr->base());
+   }
+   Register newval = as_reg(op->new_value());
+diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
+index 5f1c394ab3d..0317ed9003e 100644
+--- a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
+@@ -27,22 +27,22 @@
+ #include "asm/register.hpp"
+ #include "c1/c1_LIR.hpp"
+ 
+-FloatRegister LIR_Opr::as_float_reg() const {
++FloatRegister LIR_OprDesc::as_float_reg() const {
+   return as_FloatRegister(fpu_regnr());
+ }
+ 
+-FloatRegister LIR_Opr::as_double_reg() const {
++FloatRegister LIR_OprDesc::as_double_reg() const {
+   return as_FloatRegister(fpu_regnrLo());
+ }
+ 
+ // Reg2 unused.
+ LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
+   assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform");
+-  return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) |
+-                             (reg1 << LIR_Opr::reg2_shift) |
+-                             LIR_Opr::double_type          |
+-                             LIR_Opr::fpu_register         |
+-                             LIR_Opr::double_size);
++  return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
++                             (reg1 << LIR_OprDesc::reg2_shift) |
++                             LIR_OprDesc::double_type          |
++                             LIR_OprDesc::fpu_register         |
++                             LIR_OprDesc::double_size);
+ }
+ 
+ #ifndef PRODUCT
+
+From 2e64fa47eddc271d32b136ace4f062cfb9648b25 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 17:39:16 +0800
+Subject: [PATCH 067/140] Revert JDK-8269672: C1: Remove unaligned move on all
+ architectures
+
+---
+ src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp           | 8 +++++---
+ .../cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp       | 2 +-
+ 2 files changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+index 70ee6295bfb..e29c0df5f8b 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+@@ -673,7 +673,7 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool po
+   }
+ }
+ 
+-void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) {
++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
+   LIR_Address* to_addr = dest->as_address_ptr();
+   // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src
+   Register compressed_src = t1;
+@@ -795,7 +795,7 @@ void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
+   reg2stack(temp, dest, dest->type(), false);
+ }
+ 
+-void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) {
++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) {
+   assert(src->is_address(), "should not call otherwise");
+   assert(dest->is_register(), "should not call otherwise");
+ 
+@@ -910,11 +910,13 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
+   Label done;
+   move_op(opr2, result, type, lir_patch_none, NULL,
+           false,   // pop_fpu_stack
++          false,   // unaligned
+           false);  // wide
+   __ j(done);
+   __ bind(label);
+   move_op(opr1, result, type, lir_patch_none, NULL,
+           false,   // pop_fpu_stack
++          false,   // unaligned
+           false);  // wide
+   __ bind(done);
+ }
+@@ -1866,7 +1868,7 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* arg
+ 
+ void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
+   if (dest->is_address() || src->is_address()) {
+-    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /* wide */ false);
++    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/ false, /* wide */ false);
+   } else {
+     ShouldNotReachHere();
+   }
+diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+index d724876ec3a..bc847388f68 100644
+--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+@@ -340,7 +340,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
+   Register pre_val_reg = stub->pre_val()->as_register();
+ 
+   if (stub->do_load()) {
+-    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
++    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
+   }
+   __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
+   ce->store_parameter(stub->pre_val()->as_register(), 0);
+
+From 5f15abe61c700cbf59805530c52e8e558354d552 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 17:54:05 +0800
+Subject: [PATCH 068/140] Revert JDK-8264805: Remove the experimental
+ Ahead-of-Time Compiler
+
+---
+ src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp | 1 +
+ src/hotspot/cpu/riscv/compiledIC_riscv.cpp      | 4 ++--
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
+index 051328c3a8a..5c81f1c704c 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
+@@ -73,6 +73,7 @@ friend class ArrayCopyStub;
+     // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address)
+     _call_stub_size = 14 * NativeInstruction::instruction_size +
+                       (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size),
++    _call_aot_stub_size = 0,
+     // See emit_exception_handler for detail
+     // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY)
+     _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller
+diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+index 1cfc92b28fa..a29e5be9dbb 100644
+--- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+@@ -86,7 +86,7 @@ int CompiledStaticCall::reloc_to_interp_stub() {
+ }
+ 
+ void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
+-  address stub = find_stub();
++  address stub = find_stub(false /* is_aot */);
+   guarantee(stub != NULL, "stub not found");
+ 
+   if (TraceICs) {
+@@ -138,7 +138,7 @@ void CompiledDirectStaticCall::verify() {
+   _call->verify_alignment();
+ 
+   // Verify stub.
+-  address stub = find_stub();
++  address stub = find_stub(false /* is_aot */);
+   assert(stub != NULL, "no stub found for static call");
+   // Creation also verifies the object.
+   NativeMovConstReg* method_holder
+
+From 4cfd20c7d163188a1a4e63ffaa19708e15be9d96 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 17:59:45 +0800
+Subject: [PATCH 069/140] Revert JDK-8277417: C1 LIR instruction for load-klass
+
+---
+ .../cpu/riscv/c1_LIRAssembler_riscv.cpp       | 30 ++++++++-----------
+ 1 file changed, 12 insertions(+), 18 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+index e29c0df5f8b..49653d04d81 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+@@ -840,7 +840,14 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
+       __ ld(dest->as_register(), as_Address(from_addr));
+       break;
+     case T_ADDRESS:
+-      __ ld(dest->as_register(), as_Address(from_addr));
++      // FIXME: OMG this is a horrible kludge.  Any offset from an
++      // address that matches klass_offset_in_bytes() will be loaded
++      // as a word, not a long.
++      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
++        __ lwu(dest->as_register(), as_Address(from_addr));
++      } else {
++        __ ld(dest->as_register(), as_Address(from_addr));
++      }
+       break;
+     case T_INT:
+       __ lw(dest->as_register(), as_Address(from_addr));
+@@ -869,6 +876,10 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
+       __ decode_heap_oop(dest->as_register());
+     }
+     __ verify_oop(dest->as_register());
++  } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
++    if (UseCompressedClassPointers) {
++      __ decode_klass_not_null(dest->as_register());
++    }
+   }
+ }
+ 
+@@ -1531,23 +1542,6 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
+   __ bind(*op->stub()->continuation());
+ }
+ 
+-void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
+-  Register obj = op->obj()->as_pointer_register();
+-  Register result = op->result_opr()->as_pointer_register();
+-
+-  CodeEmitInfo* info = op->info();
+-  if (info != NULL) {
+-    add_debug_info_for_null_check_here(info);
+-  }
+-
+-  if (UseCompressedClassPointers) {
+-    __ lwu(result, Address(obj, oopDesc::klass_offset_in_bytes()));
+-    __ decode_klass_not_null(result);
+-  } else {
+-    __ ld(result, Address(obj, oopDesc::klass_offset_in_bytes()));
+-  }
+-}
+-
+ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
+   ciMethod* method = op->profiled_method();
+   int bci          = op->profiled_bci();
+
+From eb4de6fc8f9b6192d16343382ebbe4035ce71702 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 18:09:31 +0800
+Subject: [PATCH 070/140] Revert JDK-8245957: Remove unused LIR_OpBranch::type
+ after SPARC port removal
+
+---
+ src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+index a9345158749..2aba4f4974f 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+@@ -393,7 +393,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
+     if (need_zero_check) {
+       CodeEmitInfo* info = state_for(x);
+       __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
+-      __ branch(lir_cond_equal, new DivByZeroStub(info));
++      __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
+     }
+ 
+     rlock_result(x);
+@@ -467,7 +467,7 @@ void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
+     if (need_zero_check) {
+       CodeEmitInfo* info = state_for(x);
+       __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
+-      __ branch(lir_cond_equal, new DivByZeroStub(info));
++      __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
+     }
+ 
+     LIR_Opr ill = LIR_OprFact::illegalOpr;
+@@ -1055,9 +1055,9 @@ void LIRGenerator::do_If(If* x) {
+   profile_branch(x, cond);
+   move_to_phi(x->state());
+   if (x->x()->type()->is_float_kind()) {
+-    __ branch(lir_cond(cond), x->tsux(), x->usux());
++    __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
+   } else {
+-    __ branch(lir_cond(cond), x->tsux());
++    __ branch(lir_cond(cond), right->type(), x->tsux());
+   }
+   assert(x->default_sux() == x->fsux(), "wrong destination above");
+   __ jump(x->default_sux());
+
+From d34f25c618982d3ac79e6ab2a47b3a199434d01b Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 18:14:10 +0800
+Subject: [PATCH 071/140] Revert JDK-8266950: Remove vestigial support for
+ non-strict floating-point execution
+
+---
+ src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 4 ++++
+ src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp       | 7 ++++++-
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
+index 65d0eda62ef..2a99d49c94b 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
+@@ -238,7 +238,9 @@ void LIR_Assembler::arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
+   switch (code) {
+     case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+     case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++    case lir_mul_strictfp: // fall through
+     case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++    case lir_div_strictfp: // fall through
+     case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+     default:
+       ShouldNotReachHere();
+@@ -251,7 +253,9 @@ void LIR_Assembler::arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
+     switch (code) {
+       case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+       case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++      case lir_mul_strictfp: // fall through
+       case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++      case lir_div_strictfp: // fall through
+       case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+       default:
+         ShouldNotReachHere();
+diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+index 2aba4f4974f..21ae066e9ab 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+@@ -360,7 +360,12 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
+   right.load_item();
+ 
+   LIR_Opr reg = rlock(x);
+-  arithmetic_op_fpu(x->op(), reg, left.result(), right.result());
++  LIR_Opr tmp = LIR_OprFact::illegalOpr;
++  if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) {
++    tmp = new_register(T_DOUBLE);
++  }
++
++  arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp());
+ 
+   set_result(x, round_item(reg));
+ }
+
+From 02c0a84d52417d4aeddbdd10c07df446ee45c5de Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 18:19:51 +0800
+Subject: [PATCH 072/140] Revert JDK-8276217: Harmonize StrictMath intrinsics
+ handling
+
+---
+ src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+index 21ae066e9ab..f9242251491 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+@@ -651,16 +651,14 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
+       do_LibmIntrinsic(x);
+       break;
+     case vmIntrinsics::_dabs: // fall through
+-    case vmIntrinsics::_dsqrt: // fall through
+-    case vmIntrinsics::_dsqrt_strict: {
++    case vmIntrinsics::_dsqrt: {
+       assert(x->number_of_arguments() == 1, "wrong type");
+       LIRItem value(x->argument_at(0), this);
+       value.load_item();
+       LIR_Opr dst = rlock_result(x);
+ 
+       switch (x->id()) {
+-        case vmIntrinsics::_dsqrt: // fall through
+-        case vmIntrinsics::_dsqrt_strict: {
++        case vmIntrinsics::_dsqrt: {
+           __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
+           break;
+         }
+
+From 8dbace163d42cbb41ff49463b34f8971437fe82f Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 18:35:08 +0800
+Subject: [PATCH 073/140] Revert JDK-8276209: Some call sites doesn't pass the
+ parameter 'size' to SharedRuntime::dtrace_object_alloc(_base)
+
+---
+ src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp   | 2 +-
+ src/hotspot/cpu/riscv/templateTable_riscv.cpp | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
+index fc88d5c180e..329df2e1ca7 100644
+--- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
+@@ -1186,7 +1186,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+         StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
+         save_live_registers(sasm);
+ 
+-        __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(oopDesc*)>(SharedRuntime::dtrace_object_alloc)), c_rarg0);
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0);
+ 
+         restore_live_registers(sasm);
+       }
+diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+index 2a92fb9dd49..ddc9498dddc 100644
+--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+@@ -3577,7 +3577,7 @@ void TemplateTable::_new() {
+       SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
+       // Trigger dtrace event for fastpath
+       __ push(atos); // save the return value
+-      __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(oopDesc*)>(SharedRuntime::dtrace_object_alloc)), x10);
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10);
+       __ pop(atos); // restore the return value
+     }
+     __ j(done);
+
+From 8930b6049a5b6e31ec9409c167b0e58d24cf6821 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 18:38:51 +0800
+Subject: [PATCH 074/140] Revert JDK-8229838: Rename markOop files to markWord
+
+---
+ src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 1 -
+ src/hotspot/cpu/riscv/frame_riscv.cpp             | 1 -
+ src/hotspot/cpu/riscv/interp_masm_riscv.cpp       | 1 -
+ 3 files changed, 3 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+index e486f41948e..44ceccd8bd1 100644
+--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+@@ -33,7 +33,6 @@
+ #include "gc/shared/collectedHeap.hpp"
+ #include "interpreter/interpreter.hpp"
+ #include "oops/arrayOop.hpp"
+-#include "oops/markWord.hpp"
+ #include "runtime/basicLock.hpp"
+ #include "runtime/biasedLocking.hpp"
+ #include "runtime/os.hpp"
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
+index 13c482b610a..050595389e9 100644
+--- a/src/hotspot/cpu/riscv/frame_riscv.cpp
++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
+@@ -29,7 +29,6 @@
+ #include "interpreter/interpreter.hpp"
+ #include "memory/resourceArea.hpp"
+ #include "memory/universe.hpp"
+-#include "oops/markWord.hpp"
+ #include "oops/method.hpp"
+ #include "oops/oop.inline.hpp"
+ #include "prims/methodHandles.hpp"
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+index 2fc0b00e2cb..006fe49b155 100644
+--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+@@ -33,7 +33,6 @@
+ #include "interpreter/interpreterRuntime.hpp"
+ #include "logging/log.hpp"
+ #include "oops/arrayOop.hpp"
+-#include "oops/markWord.hpp"
+ #include "oops/method.hpp"
+ #include "oops/methodData.hpp"
+ #include "prims/jvmtiExport.hpp"
+
+From f11c5a2beca94c8248c30899fef90947d478e10c Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 18:42:33 +0800
+Subject: [PATCH 075/140] Revert JDK-8235673: [C1, C2] Split inlining control
+ flags
+
+---
+ src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+index fe46f7b21c8..fd25f8f9afd 100644
+--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+@@ -42,6 +42,7 @@ define_pd_global(bool, TieredCompilation,            false);
+ define_pd_global(intx, CompileThreshold,             1500 );
+ 
+ define_pd_global(intx, OnStackReplacePercentage,     933  );
++define_pd_global(intx, FreqInlineSize,               325  );
+ define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
+ define_pd_global(intx, InitialCodeCacheSize,         160*K);
+ define_pd_global(intx, ReservedCodeCacheSize,        32*M );
+
+From 6908dc58f2c66ca6a5adf4444a7ec2a91a80b9c8 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 18:45:00 +0800
+Subject: [PATCH 076/140] Revert JDK-8262074: Consolidate the default value of
+ MetaspaceSize
+
+---
+ src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 +
+ src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 3 +++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+index fd25f8f9afd..1c55a23eecf 100644
+--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+@@ -53,6 +53,7 @@ define_pd_global(bool, ProfileInterpreter,           false);
+ define_pd_global(intx, CodeCacheExpansionSize,       32*K );
+ define_pd_global(uintx, CodeCacheMinBlockLength,     1);
+ define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++define_pd_global(uintx, MetaspaceSize,               12*M );
+ define_pd_global(bool, NeverActAsServerClassMachine, true );
+ define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
+ define_pd_global(bool, CICompileOSR,                 true );
+diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
+index 53a41665f4b..d9e5fcc1bb0 100644
+--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
+@@ -75,6 +75,9 @@ define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
+ define_pd_global(uintx, CodeCacheMinBlockLength,     6);
+ define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
+ 
++// Heap related flags
++define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
++
+ // Ergonomics related flags
+ define_pd_global(bool, NeverActAsServerClassMachine, false);
+ 
+
+From a3e991b37781d90c822471b54ace915622bee0da Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 18:48:15 +0800
+Subject: [PATCH 077/140] Revert JDK-8246023: Obsolete LIRFillDelaySlot
+
+---
+ src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+index 1c55a23eecf..bd8d039de03 100644
+--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+@@ -60,6 +60,7 @@ define_pd_global(bool, CICompileOSR,                 true );
+ #endif // !COMPILER2
+ define_pd_global(bool, UseTypeProfile,               false);
+ 
++define_pd_global(bool, LIRFillDelaySlots,            false);
+ define_pd_global(bool, OptimizeSinglePrecision,      true );
+ define_pd_global(bool, CSEArrayLength,               false);
+ define_pd_global(bool, TwoOperandLIRForm,            false);
+
+From 9f6082ae9810e6a26c6803cb37cce62297d15a74 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 18:50:27 +0800
+Subject: [PATCH 078/140] Revert JDK-8136414: Large performance penalty
+ declaring a method strictfp on strict-only platforms
+
+---
+ src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+index bd8d039de03..16a87b7aced 100644
+--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+@@ -59,6 +59,7 @@ define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
+ define_pd_global(bool, CICompileOSR,                 true );
+ #endif // !COMPILER2
+ define_pd_global(bool, UseTypeProfile,               false);
++define_pd_global(bool, RoundFPResults,               true );
+ 
+ define_pd_global(bool, LIRFillDelaySlots,            false);
+ define_pd_global(bool, OptimizeSinglePrecision,      true );
+
+From fbf03fc61be068f7f7c8ca1ab3854cc05519c5a3 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Tue, 25 Apr 2023 18:58:36 +0800
+Subject: [PATCH 079/140] Revert JDK-8251462: Simplify compilation policy
+
+---
+ src/hotspot/cpu/riscv/c1_globals_riscv.hpp    |   4 +-
+ src/hotspot/cpu/riscv/c2_globals_riscv.hpp    |   2 +-
+ src/hotspot/cpu/riscv/globals_riscv.hpp       |   2 +-
+ .../templateInterpreterGenerator_riscv.cpp    | 114 +++++++++---
+ src/hotspot/cpu/riscv/templateTable_riscv.cpp | 176 ++++++++++++------
+ 5 files changed, 210 insertions(+), 88 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+index 16a87b7aced..8f2f4e0e81d 100644
+--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+@@ -32,7 +32,7 @@
+ // Sets the default values for platform dependent flags used by the client compiler.
+ // (see c1_globals.hpp)
+ 
+-#ifndef COMPILER2
++#ifndef TIERED
+ define_pd_global(bool, BackgroundCompilation,        true );
+ define_pd_global(bool, InlineIntrinsics,             true );
+ define_pd_global(bool, PreferInterpreterNativeStubs, false);
+@@ -57,7 +57,7 @@ define_pd_global(uintx, MetaspaceSize,               12*M );
+ define_pd_global(bool, NeverActAsServerClassMachine, true );
+ define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
+ define_pd_global(bool, CICompileOSR,                 true );
+-#endif // !COMPILER2
++#endif // !TIERED
+ define_pd_global(bool, UseTypeProfile,               false);
+ define_pd_global(bool, RoundFPResults,               true );
+ 
+diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
+index d9e5fcc1bb0..6c301cdae04 100644
+--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
+@@ -39,7 +39,7 @@ define_pd_global(bool, PreferInterpreterNativeStubs, false);
+ define_pd_global(bool, ProfileTraps,                 true);
+ define_pd_global(bool, UseOnStackReplacement,        true);
+ define_pd_global(bool, ProfileInterpreter,           true);
+-define_pd_global(bool, TieredCompilation,            COMPILER1_PRESENT(true) NOT_COMPILER1(false));
++define_pd_global(bool, TieredCompilation,            trueInTiered);
+ define_pd_global(intx, CompileThreshold,             10000);
+ 
+ define_pd_global(intx, OnStackReplacePercentage,     140);
+diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
+index 50bbb6a77b8..b78f258a764 100644
+--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
+@@ -36,7 +36,7 @@ define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for im
+ define_pd_global(bool, TrapBasedNullChecks,      false);
+ define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
+ 
+-define_pd_global(uintx, CodeCacheSegmentSize,    64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment.
++define_pd_global(uintx, CodeCacheSegmentSize,    64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
+ define_pd_global(intx, CodeEntryAlignment,       64);
+ define_pd_global(intx, OptoLoopAlignment,        16);
+ 
+diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+index a10677bf650..8aea4eca048 100644
+--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+@@ -556,31 +556,81 @@ address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state,
+ //
+ // xmethod: method
+ //
+-void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) {
++void TemplateInterpreterGenerator::generate_counter_incr(
++        Label* overflow,
++        Label* profile_method,
++        Label* profile_method_continue) {
+   Label done;
+   // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
+-  int increment = InvocationCounter::count_increment;
+-  Label no_mdo;
+-  if (ProfileInterpreter) {
+-    // Are we profiling?
+-    __ ld(x10, Address(xmethod, Method::method_data_offset()));
+-    __ beqz(x10, no_mdo);
+-    // Increment counter in the MDO
+-    const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
+-                                         in_bytes(InvocationCounter::counter_offset()));
+-    const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
+-    __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
+-    __ j(done);
++  if (TieredCompilation) {
++    int increment = InvocationCounter::count_increment;
++    Label no_mdo;
++    if (ProfileInterpreter) {
++      // Are we profiling?
++      __ ld(x10, Address(xmethod, Method::method_data_offset()));
++      __ beqz(x10, no_mdo);
++      // Increment counter in the MDO
++      const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
++                                                in_bytes(InvocationCounter::counter_offset()));
++      const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
++      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
++      __ j(done);
++    }
++    __ bind(no_mdo);
++    // Increment counter in MethodCounters
++    const Address invocation_counter(t1,
++                  MethodCounters::invocation_counter_offset() +
++                  InvocationCounter::counter_offset());
++    __ get_method_counters(xmethod, t1, done);
++    const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
++    __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
++    __ bind(done);
++  } else { // not TieredCompilation
++    const Address backedge_counter(t1,
++                  MethodCounters::backedge_counter_offset() +
++                  InvocationCounter::counter_offset());
++    const Address invocation_counter(t1,
++                  MethodCounters::invocation_counter_offset() +
++                  InvocationCounter::counter_offset());
++
++    __ get_method_counters(xmethod, t1, done);
++
++    if (ProfileInterpreter) { // %%% Merge this into MethodData*
++      __ lwu(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
++      __ addw(x11, x11, 1);
++      __ sw(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
++    }
++    // Update standard invocation counters
++    __ lwu(x11, invocation_counter);
++    __ lwu(x10, backedge_counter);
++
++    __ addw(x11, x11, InvocationCounter::count_increment);
++    __ andi(x10, x10, InvocationCounter::count_mask_value);
++
++    __ sw(x11, invocation_counter);
++    __ addw(x10, x10, x11);                // add both counters
++
++    // profile_method is non-null only for interpreted method so
++    // profile_method != NULL == !native_call
++
++    if (ProfileInterpreter && profile_method != NULL) {
++      // Test to see if we should create a method data oop
++      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
++      __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
++      __ blt(x10, t1, *profile_method_continue);
++
++      // if no method data exists, go to profile_method
++      __ test_method_data_pointer(t1, *profile_method);
++    }
++
++    {
++      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
++      __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset())));
++      __ bltu(x10, t1, done);
++      __ j(*overflow);
++    }
++    __ bind(done);
+   }
+-  __ bind(no_mdo);
+-  // Increment counter in MethodCounters
+-  const Address invocation_counter(t1,
+-                                   MethodCounters::invocation_counter_offset() +
+-                                   InvocationCounter::counter_offset());
+-  __ get_method_counters(xmethod, t1, done);
+-  const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
+-  __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
+-  __ bind(done);
+ }
+ 
+ void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
+@@ -977,7 +1027,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
+   // increment invocation count & check for overflow
+   Label invocation_counter_overflow;
+   if (inc_counter) {
+-    generate_counter_incr(&invocation_counter_overflow);
++    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
+   }
+ 
+   Label continue_after_compile;
+@@ -1389,8 +1439,15 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
+ 
+   // increment invocation count & check for overflow
+   Label invocation_counter_overflow;
++  Label profile_method;
++  Label profile_method_continue;
+   if (inc_counter) {
+-    generate_counter_incr(&invocation_counter_overflow);
++    generate_counter_incr(&invocation_counter_overflow,
++                          &profile_method,
++                          &profile_method_continue);
++    if (ProfileInterpreter) {
++      __ bind(profile_method_continue);
++    }
+   }
+ 
+   Label continue_after_compile;
+@@ -1427,6 +1484,15 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
+ 
+   // invocation counter overflow
+   if (inc_counter) {
++    if (ProfileInterpreter) {
++      // We have decided to profile this method in the interpreter
++      __ bind(profile_method);
++      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      // don't think we need this
++      __ get_method(x11);
++      __ j(profile_method_continue);
++    }
+     // Handle overflow of counter and compile method
+     __ bind(invocation_counter_overflow);
+     generate_counter_overflow(continue_after_compile);
+diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+index ddc9498dddc..bb20f228447 100644
+--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+@@ -1745,6 +1745,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
+   assert(UseLoopCounter || !UseOnStackReplacement,
+          "on-stack-replacement requires loop counters");
+   Label backedge_counter_overflow;
++  Label profile_method;
+   Label dispatch;
+   if (UseLoopCounter) {
+     // increment backedge counter for backward branches
+@@ -1769,31 +1770,75 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
+     __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory
+     __ bind(has_counters);
+ 
+-    Label no_mdo;
+-    int increment = InvocationCounter::count_increment;
+-    if (ProfileInterpreter) {
+-      // Are we profiling?
+-      __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
+-      __ beqz(x11, no_mdo);
+-      // Increment the MDO backedge counter
+-      const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
+-                                         in_bytes(InvocationCounter::counter_offset()));
+-      const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
+-      __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
+-                                 x10, t0, false,
++    if (TieredCompilation) {
++      Label no_mdo;
++      int increment = InvocationCounter::count_increment;
++      if (ProfileInterpreter) {
++        // Are we profiling?
++        __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
++        __ beqz(x11, no_mdo);
++        // Increment the MDO backedge counter
++        const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
++                                           in_bytes(InvocationCounter::counter_offset()));
++        const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
++        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
++                                   x10, t0, false,
++                                   UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
++        __ j(dispatch);
++      }
++      __ bind(no_mdo);
++      // Increment backedge counter in MethodCounters*
++      __ ld(t0, Address(xmethod, Method::method_counters_offset()));
++      const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset()));
++      __ increment_mask_and_jump(Address(t0, be_offset), increment, mask,
++                                 x10, t1, false,
+                                  UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
+-      __ j(dispatch);
++    } else { // not TieredCompilation
++      // increment counter
++      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
++      __ lwu(x10, Address(t1, be_offset));     // load backedge counter
++      __ addw(t0, x10, InvocationCounter::count_increment); // increment counter
++      __ sw(t0, Address(t1, be_offset));       // store counter
++
++      __ lwu(x10, Address(t1, inv_offset));    // load invocation counter
++      __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits
++      __ addw(x10, x10, t0);        // add both counters
++
++      if (ProfileInterpreter) {
++        // Test to see if we should create a method data oop
++        __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
++        __ blt(x10, t0, dispatch);
++
++        // if no method data exists, go to profile method
++        __ test_method_data_pointer(x10, profile_method);
++
++        if (UseOnStackReplacement) {
++          // check for overflow against x11 which is the MDO taken count
++          __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
++          __ bltu(x11, t0, dispatch); // Intel == Assembler::below, lo:unsigned lower
++
++          // When ProfileInterpreter is on, the backedge_count comes
++          // from the MethodData*, which value does not get reset on
++          // the call to frequency_counter_overflow().  To avoid
++          // excessive calls to the overflow routine while the method is
++          // being compiled, add a second test to make sure the overflow
++          // function is called only once every overflow_frequency.
++          const int overflow_frequency = 1024;
++          __ andi(x11, x11, overflow_frequency - 1);
++          __ beqz(x11, backedge_counter_overflow);
++
++        }
++      } else {
++        if (UseOnStackReplacement) {
++          // check for overflow against x10, which is the sum of the
++          // counters
++          __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
++          __ bgeu(x10, t0, backedge_counter_overflow); // Intel == Assembler::aboveEqual
++        }
++      }
+     }
+-    __ bind(no_mdo);
+-    // Increment backedge counter in MethodCounters*
+-    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
+-    const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset()));
+-    __ increment_mask_and_jump(Address(t0, be_offset), increment, mask,
+-                               x10, t1, false,
+-                               UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
+     __ bind(dispatch);
+   }
+-
+   // Pre-load the next target bytecode into t0
+   __ load_unsigned_byte(t0, Address(xbcp, 0));
+ 
+@@ -1802,52 +1847,63 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
+   // xbcp: target bcp
+   __ dispatch_only(vtos, /*generate_poll*/true);
+ 
+-  if (UseLoopCounter && UseOnStackReplacement) {
+-    // invocation counter overflow
+-    __ bind(backedge_counter_overflow);
+-    __ neg(x12, x12);
+-    __ add(x12, x12, xbcp);     // branch xbcp
+-    // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
+-    __ call_VM(noreg,
+-               CAST_FROM_FN_PTR(address,
+-                                InterpreterRuntime::frequency_counter_overflow),
+-               x12);
+-    __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
+-
+-    // x10: osr nmethod (osr ok) or NULL (osr not possible)
+-    // w11: target bytecode
+-    // x12: temporary
+-    __ beqz(x10, dispatch);     // test result -- no osr if null
+-    // nmethod may have been invalidated (VM may block upon call_VM return)
+-    __ lbu(x12, Address(x10, nmethod::state_offset()));
+-    if (nmethod::in_use != 0) {
+-      __ sub(x12, x12, nmethod::in_use);
++  if (UseLoopCounter) {
++    if (ProfileInterpreter && !TieredCompilation) {
++      // Out-of-line code to allocate method data oop.
++      __ bind(profile_method);
++      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++      __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
++      __ set_method_data_pointer_for_bcp();
++      __ j(dispatch);
+     }
+-    __ bnez(x12, dispatch);
+ 
+-    // We have the address of an on stack replacement routine in x10
+-    // We need to prepare to execute the OSR method. First we must
+-    // migrate the locals and monitors off of the stack.
++    if (UseOnStackReplacement) {
++      // invocation counter overflow
++      __ bind(backedge_counter_overflow);
++      __ neg(x12, x12);
++      __ add(x12, x12, xbcp);     // branch xbcp
++      // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
++      __ call_VM(noreg,
++                 CAST_FROM_FN_PTR(address,
++                                  InterpreterRuntime::frequency_counter_overflow),
++                 x12);
++      __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
++
++      // x10: osr nmethod (osr ok) or NULL (osr not possible)
++      // w11: target bytecode
++      // x12: temporary
++      __ beqz(x10, dispatch);     // test result -- no osr if null
++      // nmethod may have been invalidated (VM may block upon call_VM return)
++      __ lbu(x12, Address(x10, nmethod::state_offset()));
++      if (nmethod::in_use != 0) {
++        __ sub(x12, x12, nmethod::in_use);
++      }
++      __ bnez(x12, dispatch);
++
++      // We have the address of an on stack replacement routine in x10
++      // We need to prepare to execute the OSR method. First we must
++      // migrate the locals and monitors off of the stack.
+ 
+-    __ mv(x9, x10);                             // save the nmethod
++      __ mv(x9, x10);                             // save the nmethod
+ 
+-    call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
++      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
+ 
+-    // x10 is OSR buffer, move it to expected parameter location
+-    __ mv(j_rarg0, x10);
++      // x10 is OSR buffer, move it to expected parameter location
++      __ mv(j_rarg0, x10);
+ 
+-    // remove activation
+-    // get sender esp
+-    __ ld(esp,
+-        Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
+-    // remove frame anchor
+-    __ leave();
+-    // Ensure compiled code always sees stack at proper alignment
+-    __ andi(sp, esp, -16);
++      // remove activation
++      // get sender esp
++      __ ld(esp,
++          Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
++      // remove frame anchor
++      __ leave();
++      // Ensure compiled code always sees stack at proper alignment
++      __ andi(sp, esp, -16);
+ 
+-    // and begin the OSR nmethod
+-    __ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
+-    __ jr(t0);
++      // and begin the OSR nmethod
++      __ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
++      __ jr(t0);
++    }
+   }
+ }
+ 
+
+From b1f3fd0510681324d70028443a3532d6084be504 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 27 Apr 2023 11:37:05 +0800
+Subject: [PATCH 080/140] Revert JDK-8250902: Implement MD5 Intrinsics on x86
+
+---
+ src/hotspot/cpu/riscv/vm_version_riscv.cpp    |  5 ----
+ ...nericTestCaseForUnsupportedRISCV64CPU.java | 30 +++++++++----------
+ 2 files changed, 15 insertions(+), 20 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+index c0491d23fa6..d4b79162d84 100644
+--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+@@ -97,11 +97,6 @@ void VM_Version::initialize() {
+     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
+   }
+ 
+-  if (UseMD5Intrinsics) {
+-    warning("MD5 intrinsics are not available on this CPU.");
+-    FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
+-  }
+-
+   if (UseRVV) {
+     if (!(_features & CPU_V)) {
+       warning("RVV is not supported on this CPU");
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
+index 2ecfec07a4c..8566d57c391 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
+@@ -24,7 +24,7 @@
+ 
+ package compiler.intrinsics.sha.cli.testcases;
+ 
+-import compiler.intrinsics.sha.cli.DigestOptionsBase;
++import compiler.intrinsics.sha.cli.SHAOptionsBase;
+ import jdk.test.lib.process.ExitCode;
+ import jdk.test.lib.Platform;
+ import jdk.test.lib.cli.CommandLineOptionTest;
+@@ -36,7 +36,7 @@
+  * which don't support instruction required by the tested option.
+  */
+ public class GenericTestCaseForUnsupportedRISCV64CPU extends
+-        DigestOptionsBase.TestCase {
++        SHAOptionsBase.TestCase {
+ 
+     final private boolean checkUseSHA;
+ 
+@@ -46,7 +46,7 @@ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) {
+ 
+     public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) {
+         super(optionName, new AndPredicate(Platform::isRISCV64,
+-                new NotPredicate(DigestOptionsBase.getPredicateForOption(
++                new NotPredicate(SHAOptionsBase.getPredicateForOption(
+                         optionName))));
+ 
+         this.checkUseSHA = checkUseSHA;
+@@ -58,27 +58,27 @@ protected void verifyWarnings() throws Throwable {
+                 + "option '-XX:-%s' without any warnings", optionName);
+         //Verify that option could be disabled without any warnings.
+         CommandLineOptionTest.verifySameJVMStartup(null, new String[] {
+-                        DigestOptionsBase.getWarningForUnsupportedCPU(optionName)
++                        SHAOptionsBase.getWarningForUnsupportedCPU(optionName)
+                 }, shouldPassMessage, shouldPassMessage, ExitCode.OK,
+-                DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
+                 CommandLineOptionTest.prepareBooleanFlag(optionName, false));
+ 
+         if (checkUseSHA) {
+             shouldPassMessage = String.format("If JVM is started with '-XX:-"
+                     + "%s' '-XX:+%s', output should contain warning.",
+-                    DigestOptionsBase.USE_SHA_OPTION, optionName);
++                    SHAOptionsBase.USE_SHA_OPTION, optionName);
+ 
+             // Verify that when the tested option is enabled, then
+             // a warning will occur in VM output if UseSHA is disabled.
+-            if (!optionName.equals(DigestOptionsBase.USE_SHA_OPTION)) {
++            if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) {
+                 CommandLineOptionTest.verifySameJVMStartup(
+-                        new String[] { DigestOptionsBase.getWarningForUnsupportedCPU(optionName) },
++                        new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) },
+                         null,
+                         shouldPassMessage,
+                         shouldPassMessage,
+                         ExitCode.OK,
+-                        DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
+-                        CommandLineOptionTest.prepareBooleanFlag(DigestOptionsBase.USE_SHA_OPTION, false),
++                        SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                        CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false),
+                         CommandLineOptionTest.prepareBooleanFlag(optionName, true));
+             }
+         }
+@@ -90,7 +90,7 @@ protected void verifyOptionValues() throws Throwable {
+         CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
+                 String.format("Option '%s' should be disabled by default",
+                         optionName),
+-                DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
++                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
+ 
+         if (checkUseSHA) {
+             // Verify that option is disabled even if it was explicitly enabled
+@@ -98,7 +98,7 @@ protected void verifyOptionValues() throws Throwable {
+             CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
+                     String.format("Option '%s' should be off on unsupported "
+                             + "RISCV64CPU even if set to true directly", optionName),
+-                    DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
+                     CommandLineOptionTest.prepareBooleanFlag(optionName, true));
+ 
+             // Verify that option is disabled when +UseSHA was passed to JVM.
+@@ -106,10 +106,10 @@ protected void verifyOptionValues() throws Throwable {
+                     String.format("Option '%s' should be off on unsupported "
+                             + "RISCV64CPU even if %s flag set to JVM",
+                             optionName, CommandLineOptionTest.prepareBooleanFlag(
+-                                DigestOptionsBase.USE_SHA_OPTION, true)),
+-                    DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                                  SHAOptionsBase.USE_SHA_OPTION, true)),
++                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
+                     CommandLineOptionTest.prepareBooleanFlag(
+-                            DigestOptionsBase.USE_SHA_OPTION, true));
++                            SHAOptionsBase.USE_SHA_OPTION, true));
+         }
+     }
+ }
+
+From b5e96cb7663b2def3a064b9aede7209fb0c5eeda Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 27 Apr 2023 15:41:48 +0800
+Subject: [PATCH 081/140] Revert JDK-8253555: Make ByteSize and WordSize typed
+ scoped enums
+
+---
+ src/hotspot/cpu/riscv/assembler_riscv.hpp | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
+index 31aeeb9b425..9959ac1d02c 100644
+--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
+@@ -195,8 +195,10 @@ class Address {
+     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
+   Address(Register r, unsigned long long o)
+     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++#ifdef ASSERT
+   Address(Register r, ByteSize disp)
+-    : Address(r, in_bytes(disp)) { }
++    : _base(r), _index(noreg), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(0) { }
++#endif
+   Address(address target, RelocationHolder const& rspec)
+     : _base(noreg),
+       _index(noreg),
+
+From 592afab705a4d4c8b2773a0808e47efc2a14517d Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sat, 29 Apr 2023 15:18:12 +0800
+Subject: [PATCH 082/140] Revert JDK-8253457: Remove unimplemented register
+ stack functions
+
+---
+ .../os_cpu/linux_riscv/thread_linux_riscv.hpp    | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
+index 61e2cf85b63..313a7b932c3 100644
+--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
+@@ -34,15 +34,31 @@
+   frame pd_last_frame();
+ 
+  public:
++
++  void set_base_of_stack_pointer(intptr_t* base_sp) {
++  }
++
+   static ByteSize last_Java_fp_offset()          {
+     return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
+   }
+ 
++  intptr_t* base_of_stack_pointer() {
++    return NULL;
++  }
++  void record_base_of_stack_pointer() {
++  }
++
+   bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
+     bool isInJava);
+ 
+   bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
+ private:
+   bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
++public:
++  // These routines are only used on cpu architectures that
++  // have separate register stacks (Itanium).
++  static bool register_stack_overflow() { return false; }
++  static void enable_register_stack_guard() {}
++  static void disable_register_stack_guard() {}
+ 
+ #endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
+
+From 28238cf776bd25c9805d9dd686c08fe8d3a1500b Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sat, 29 Apr 2023 15:22:30 +0800
+Subject: [PATCH 083/140] Revert JDK-8253539: Remove unused JavaThread
+ functions for set_last_Java_fp/pc
+
+---
+ src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp       | 3 +++
+ src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp | 3 +++
+ 2 files changed, 6 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
+index 9a6084afa1d..5a0c9b812fc 100644
+--- a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
++++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
+@@ -83,4 +83,7 @@
+ 
+   intptr_t* last_Java_fp(void)                   { return _last_Java_fp; }
+ 
++  // Assert (last_Java_sp == NULL || fp == NULL)
++  void set_last_Java_fp(intptr_t* fp)            { OrderAccess::release(); _last_Java_fp = fp; }
++
+ #endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
+index 313a7b932c3..4b91fa855ae 100644
+--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
+@@ -34,6 +34,9 @@
+   frame pd_last_frame();
+ 
+  public:
++  // Mutators are highly dangerous....
++  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
++  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
+ 
+   void set_base_of_stack_pointer(intptr_t* base_sp) {
+   }
+
+From f9322bb6235b603eac825c6e6751093ada1e6cfe Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sat, 29 Apr 2023 15:45:56 +0800
+Subject: [PATCH 084/140] Revert JDK-8269853: Prefetch::read should accept
+ pointer to const
+
+---
+ src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
+index a6432c84ec7..2bd48e09c34 100644
+--- a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
++++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
+@@ -29,7 +29,7 @@
+ #include "runtime/prefetch.hpp"
+ 
+ 
+-inline void Prefetch::read (const void *loc, intx interval) {
++inline void Prefetch::read (void *loc, intx interval) {
+ }
+ 
+ inline void Prefetch::write(void *loc, intx interval) {
+
+From aa6f7320d8d849b8e47b6e77a20257e3d99fd14f Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sat, 29 Apr 2023 16:14:55 +0800
+Subject: [PATCH 085/140] Revert: JDK-8254231: Implementation of Foreign Linker
+ API (Incubator) JDK-8264774: Implementation of Foreign Function and Memory
+ API (Incubator)
+
+---
+ .../cpu/riscv/foreign_globals_riscv.cpp       | 44 -------------------
+ .../cpu/riscv/foreign_globals_riscv.hpp       | 32 --------------
+ src/hotspot/cpu/riscv/frame_riscv.cpp         | 15 -------
+ src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 12 +----
+ src/hotspot/cpu/riscv/riscv.ad                |  5 ---
+ src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 10 +----
+ .../riscv/universalNativeInvoker_riscv.cpp    | 33 --------------
+ .../cpu/riscv/universalUpcallHandle_riscv.cpp | 42 ------------------
+ src/hotspot/cpu/riscv/vmreg_riscv.cpp         |  5 ---
+ 9 files changed, 2 insertions(+), 196 deletions(-)
+ delete mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
+ delete mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
+ delete mode 100644 src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
+ delete mode 100644 src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
+
+diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
+deleted file mode 100644
+index 5c700be9c91..00000000000
+--- a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
++++ /dev/null
+@@ -1,44 +0,0 @@
+-/*
+- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#include "precompiled.hpp"
+-#include "prims/foreign_globals.hpp"
+-#include "utilities/debug.hpp"
+-
+-// Stubbed out, implement later
+-const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const {
+-  Unimplemented();
+-  return {};
+-}
+-
+-const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const {
+-  Unimplemented();
+-  return {};
+-}
+-
+-const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const {
+-  ShouldNotCallThis();
+-  return {};
+-}
+diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
+deleted file mode 100644
+index 3ac89752c27..00000000000
+--- a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
++++ /dev/null
+@@ -1,32 +0,0 @@
+-/*
+- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#ifndef CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
+-#define CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
+-
+-class ABIDescriptor {};
+-class BufferLayout {};
+-
+-#endif // CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
+index 050595389e9..40ec584b994 100644
+--- a/src/hotspot/cpu/riscv/frame_riscv.cpp
++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
+@@ -361,21 +361,6 @@ frame frame::sender_for_entry_frame(RegisterMap* map) const {
+   return fr;
+ }
+ 
+-OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const {
+-  ShouldNotCallThis();
+-  return nullptr;
+-}
+-
+-bool frame::optimized_entry_frame_is_first() const {
+-  ShouldNotCallThis();
+-  return false;
+-}
+-
+-frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const {
+-  ShouldNotCallThis();
+-  return {};
+-}
+-
+ //------------------------------------------------------------------------------
+ // frame::verify_deopt_original_pc
+ //
+diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+index 1f7c0c87c21..3bf5cfb16c3 100644
+--- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+@@ -181,13 +181,6 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler*
+     return NULL;
+   }
+ 
+-  // No need in interpreter entry for linkToNative for now.
+-  // Interpreter calls compiled entry through i2c.
+-  if (iid == vmIntrinsics::_linkToNative) {
+-    __ ebreak();
+-    return NULL;
+-  }
+-
+   // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted)
+   // xmethod: Method*
+   // x13: argument locator (parameter slot count, added to sp)
+@@ -280,10 +273,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
+   assert_different_registers(temp1, temp2, temp3, receiver_reg);
+   assert_different_registers(temp1, temp2, temp3, member_reg);
+ 
+-  if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
+-    if (iid == vmIntrinsics::_linkToNative) {
+-      assert(for_compiler_entry, "only compiler entry is supported");
+-    }
++  if (iid == vmIntrinsics::_invokeBasic) {
+     // indirect through MH.form.vmentry.vmtarget
+     jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry);
+   } else {
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 1667994699f..7ec76e72ff0 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -945,11 +945,6 @@ int MachCallRuntimeNode::ret_addr_offset() {
+   }
+ }
+ 
+-int MachCallNativeNode::ret_addr_offset() {
+-  Unimplemented();
+-  return -1;
+-}
+-
+ //
+ // Compute padding required for nodes which need alignment
+ //
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+index 411bddd2ace..897dafcc99c 100644
+--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -1037,7 +1037,7 @@ static void gen_special_dispatch(MacroAssembler* masm,
+     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
+     member_reg = x9;  // known to be free at this point
+     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
+-  } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
++  } else if (iid == vmIntrinsics::_invokeBasic) {
+     has_receiver = true;
+   } else {
+     fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
+@@ -2566,14 +2566,6 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
+ }
+ 
+ #ifdef COMPILER2
+-RuntimeStub* SharedRuntime::make_native_invoker(address call_target,
+-                                                int shadow_space_bytes,
+-                                                const GrowableArray<VMReg>& input_registers,
+-                                                const GrowableArray<VMReg>& output_registers) {
+-  Unimplemented();
+-  return nullptr;
+-}
+-
+ //------------------------------generate_exception_blob---------------------------
+ // creates exception blob at the end
+ // Using exception blob, this code is jumped from a compiled method.
+diff --git a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
+deleted file mode 100644
+index 4f50adb05c3..00000000000
+--- a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
++++ /dev/null
+@@ -1,33 +0,0 @@
+-/*
+- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#include "precompiled.hpp"
+-#include "prims/universalNativeInvoker.hpp"
+-#include "utilities/debug.hpp"
+-
+-address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) {
+-  Unimplemented();
+-  return nullptr;
+-}
+diff --git a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
+deleted file mode 100644
+index ce70da72f2e..00000000000
+--- a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
++++ /dev/null
+@@ -1,42 +0,0 @@
+-/*
+- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#include "precompiled.hpp"
+-#include "prims/universalUpcallHandler.hpp"
+-#include "utilities/debug.hpp"
+-
+-address ProgrammableUpcallHandler::generate_upcall_stub(jobject jrec, jobject jabi, jobject jlayout) {
+-  Unimplemented();
+-  return nullptr;
+-}
+-
+-address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) {
+-  ShouldNotCallThis();
+-  return nullptr;
+-}
+-
+-bool ProgrammableUpcallHandler::supports_optimized_upcalls() {
+-  return false;
+-}
+diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
+index 1f6eff96cba..5d1187c2a27 100644
+--- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp
++++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
+@@ -49,8 +49,3 @@ void VMRegImpl::set_regName() {
+     regName[i] = "NON-GPR-FPR";
+   }
+ }
+-
+-VMReg VMRegImpl::vmStorageToVMReg(int type, int index) {
+-  Unimplemented();
+-  return VMRegImpl::Bad();
+-}
+
+From a5889735a97f3712bb649c454dee192d75457f96 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sat, 29 Apr 2023 17:35:20 +0800
+Subject: [PATCH 086/140] Revert JDK-8256254: Convert vmIntrinsics::ID to enum
+ class
+
+---
+ src/hotspot/cpu/riscv/interp_masm_riscv.cpp   | 2 +-
+ src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 2 +-
+ src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+index 006fe49b155..1133e80a210 100644
+--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+@@ -1841,7 +1841,7 @@ void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret,
+       beq(t0, tmp, do_profile);
+       get_method(tmp);
+       lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
+-      li(t1, static_cast<int>(vmIntrinsics::_compiledLambdaForm));
++      li(t1, vmIntrinsics::_compiledLambdaForm);
+       bne(t0, t1, profile_continue);
+       bind(do_profile);
+     }
+diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+index 3bf5cfb16c3..4442b5991b1 100644
+--- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+@@ -411,7 +411,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
+       }
+ 
+       default:
+-        fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid));
++        fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
+         break;
+     }
+ 
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+index 897dafcc99c..5b934b04e8e 100644
+--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -1040,7 +1040,7 @@ static void gen_special_dispatch(MacroAssembler* masm,
+   } else if (iid == vmIntrinsics::_invokeBasic) {
+     has_receiver = true;
+   } else {
+-    fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
++    fatal("unexpected intrinsic id %d", iid);
+   }
+ 
+   if (member_reg != noreg) {
+
+From 245d01e2cae27e41b875450f5f92751e4f36a095 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Thu, 6 Apr 2023 20:27:58 +0800
+Subject: [PATCH 087/140] Revert JDK-8216557: Aarch64: Add support for
+ Concurrent Class Unloading
+
+---
+ .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |   4 -
+ .../gc/shared/barrierSetAssembler_riscv.cpp   |  71 --------
+ .../gc/shared/barrierSetAssembler_riscv.hpp   |   3 -
+ .../gc/shared/barrierSetNMethod_riscv.cpp     | 171 ------------------
+ .../cpu/riscv/macroAssembler_riscv.cpp        |  35 +---
+ .../cpu/riscv/macroAssembler_riscv.hpp        |   2 -
+ src/hotspot/cpu/riscv/relocInfo_riscv.cpp     |   1 -
+ src/hotspot/cpu/riscv/riscv.ad                |  16 --
+ src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |   7 -
+ src/hotspot/cpu/riscv/stubGenerator_riscv.cpp |  49 -----
+ src/hotspot/cpu/riscv/stubRoutines_riscv.cpp  |   1 -
+ src/hotspot/cpu/riscv/stubRoutines_riscv.hpp  |   6 -
+ 12 files changed, 5 insertions(+), 361 deletions(-)
+ delete mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
+
+diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+index 44ceccd8bd1..a6d1b1470f9 100644
+--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+@@ -322,10 +322,6 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
+   // Note that we do this before creating a frame.
+   generate_stack_overflow_check(bang_size_in_bytes);
+   MacroAssembler::build_frame(framesize);
+-
+-  // Insert nmethod entry barrier into frame.
+-  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+-  bs->nmethod_entry_barrier(this);
+ }
+ 
+ void C1_MacroAssembler::remove_frame(int framesize) {
+diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
+index 3c115a2ea02..2b556b95d71 100644
+--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
+@@ -27,7 +27,6 @@
+ #include "classfile/classLoaderData.hpp"
+ #include "gc/shared/barrierSet.hpp"
+ #include "gc/shared/barrierSetAssembler.hpp"
+-#include "gc/shared/barrierSetNMethod.hpp"
+ #include "gc/shared/collectedHeap.hpp"
+ #include "interpreter/interp_masm.hpp"
+ #include "memory/universe.hpp"
+@@ -230,73 +229,3 @@ void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
+   }
+   __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
+ }
+-
+-void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
+-  BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
+-
+-  if (bs_nm == NULL) {
+-    return;
+-  }
+-
+-  // RISCV atomic operations require that the memory address be naturally aligned.
+-  __ align(4);
+-
+-  Label skip, guard;
+-  Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset()));
+-
+-  __ lwu(t0, guard);
+-
+-  // Subsequent loads of oops must occur after load of guard value.
+-  // BarrierSetNMethod::disarm sets guard with release semantics.
+-  __ membar(MacroAssembler::LoadLoad);
+-  __ lwu(t1, thread_disarmed_addr);
+-  __ beq(t0, t1, skip);
+-
+-  int32_t offset = 0;
+-  __ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset);
+-  __ jalr(ra, t0, offset);
+-  __ j(skip);
+-
+-  __ bind(guard);
+-
+-  assert(__ offset() % 4 == 0, "bad alignment");
+-  __ emit_int32(0); // nmethod guard value. Skipped over in common case.
+-
+-  __ bind(skip);
+-}
+-
+-void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
+-  BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
+-  if (bs == NULL) {
+-    return;
+-  }
+-
+-  Label bad_call;
+-  __ beqz(xmethod, bad_call);
+-
+-  // Pointer chase to the method holder to find out if the method is concurrently unloading.
+-  Label method_live;
+-  __ load_method_holder_cld(t0, xmethod);
+-
+-  // Is it a strong CLD?
+-  __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_offset()));
+-  __ bnez(t1, method_live);
+-
+-  // Is it a weak but alive CLD?
+-  __ push_reg(RegSet::of(x28, x29), sp);
+-
+-  __ ld(x28, Address(t0, ClassLoaderData::holder_offset()));
+-
+-  // Uses x28 & x29, so we must pass new temporaries.
+-  __ resolve_weak_handle(x28, x29);
+-  __ mv(t0, x28);
+-
+-  __ pop_reg(RegSet::of(x28, x29), sp);
+-
+-  __ bnez(t0, method_live);
+-
+-  __ bind(bad_call);
+-
+-  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+-  __ bind(method_live);
+-}
+diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
+index b85f7f5582b..984d94f4c3d 100644
+--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
+@@ -28,7 +28,6 @@
+ 
+ #include "asm/macroAssembler.hpp"
+ #include "gc/shared/barrierSet.hpp"
+-#include "gc/shared/barrierSetNMethod.hpp"
+ #include "memory/allocation.hpp"
+ #include "oops/access.hpp"
+ 
+@@ -71,8 +70,6 @@ class BarrierSetAssembler: public CHeapObj<mtGC> {
+   );
+   virtual void barrier_stubs_init() {}
+ 
+-  virtual void nmethod_entry_barrier(MacroAssembler* masm);
+-  virtual void c2i_entry_barrier(MacroAssembler* masm);
+   virtual ~BarrierSetAssembler() {}
+ };
+ 
+diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
+deleted file mode 100644
+index ae7ee4c5a44..00000000000
+--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
++++ /dev/null
+@@ -1,171 +0,0 @@
+-/*
+- * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#include "precompiled.hpp"
+-#include "code/codeCache.hpp"
+-#include "code/nativeInst.hpp"
+-#include "gc/shared/barrierSetNMethod.hpp"
+-#include "logging/log.hpp"
+-#include "memory/resourceArea.hpp"
+-#include "runtime/sharedRuntime.hpp"
+-#include "runtime/registerMap.hpp"
+-#include "runtime/thread.hpp"
+-#include "utilities/align.hpp"
+-#include "utilities/debug.hpp"
+-
+-class NativeNMethodBarrier: public NativeInstruction {
+-  address instruction_address() const { return addr_at(0); }
+-
+-  int *guard_addr() {
+-    /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */
+-    return reinterpret_cast<int*>(instruction_address() + 12 * 4);
+-  }
+-
+-public:
+-  int get_value() {
+-    return Atomic::load_acquire(guard_addr());
+-  }
+-
+-  void set_value(int value) {
+-    Atomic::release_store(guard_addr(), value);
+-  }
+-
+-  void verify() const;
+-};
+-
+-// Store the instruction bitmask, bits and name for checking the barrier.
+-struct CheckInsn {
+-  uint32_t mask;
+-  uint32_t bits;
+-  const char *name;
+-};
+-
+-static const struct CheckInsn barrierInsn[] = {
+-  { 0x00000fff, 0x00000297, "auipc  t0, 0           "},
+-  { 0x000fffff, 0x0002e283, "lwu    t0, 48(t0)      "},
+-  { 0xffffffff, 0x0aa0000f, "fence  ir, ir          "},
+-  { 0x000fffff, 0x000be303, "lwu    t1, 112(xthread)"},
+-  { 0x01fff07f, 0x00628063, "beq    t0, t1, skip    "},
+-  { 0x00000fff, 0x000002b7, "lui    t0, imm0        "},
+-  { 0x000fffff, 0x00028293, "addi   t0, t0, imm1    "},
+-  { 0xffffffff, 0x00b29293, "slli   t0, t0, 11      "},
+-  { 0x000fffff, 0x00028293, "addi   t0, t0, imm2    "},
+-  { 0xffffffff, 0x00529293, "slli   t0, t0, 5       "},
+-  { 0x000fffff, 0x000280e7, "jalr   ra, imm3(t0)    "},
+-  { 0x00000fff, 0x0000006f, "j      skip            "}
+-  /* guard: */
+-  /* 32bit nmethod guard value */
+-  /* skip: */
+-};
+-
+-// The encodings must match the instructions emitted by
+-// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific
+-// register numbers and immediate values in the encoding.
+-void NativeNMethodBarrier::verify() const {
+-  intptr_t addr = (intptr_t) instruction_address();
+-  for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) {
+-    uint32_t inst = *((uint32_t*) addr);
+-    if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) {
+-      tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst);
+-      fatal("not an %s instruction.", barrierInsn[i].name);
+-    }
+-    addr += 4;
+-  }
+-}
+-
+-
+-/* We're called from an nmethod when we need to deoptimize it. We do
+-   this by throwing away the nmethod's frame and jumping to the
+-   ic_miss stub. This looks like there has been an IC miss at the
+-   entry of the nmethod, so we resolve the call, which will fall back
+-   to the interpreter if the nmethod has been unloaded. */
+-void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
+-
+-  typedef struct {
+-    intptr_t *sp; intptr_t *fp; address ra; address pc;
+-  } frame_pointers_t;
+-
+-  frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5);
+-
+-  JavaThread *thread = JavaThread::current();
+-  RegisterMap reg_map(thread, false);
+-  frame frame = thread->last_frame();
+-
+-  assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be");
+-  assert(frame.cb() == nm, "must be");
+-  frame = frame.sender(&reg_map);
+-
+-  LogTarget(Trace, nmethod, barrier) out;
+-  if (out.is_enabled()) {
+-    ResourceMark mark;
+-    log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p",
+-                                nm->method()->name_and_sig_as_C_string(),
+-                                nm, *(address *) return_address_ptr, nm->is_osr_method(), thread,
+-                                thread->name(), frame.sp(), nm->verified_entry_point());
+-  }
+-
+-  new_frame->sp = frame.sp();
+-  new_frame->fp = frame.fp();
+-  new_frame->ra = frame.pc();
+-  new_frame->pc = SharedRuntime::get_handle_wrong_method_stub();
+-}
+-
+-// This is the offset of the entry barrier from where the frame is completed.
+-// If any code changes between the end of the verified entry where the entry
+-// barrier resides, and the completion of the frame, then
+-// NativeNMethodCmpBarrier::verify() will immediately complain when it does
+-// not find the expected native instruction at this offset, which needs updating.
+-// Note that this offset is invariant of PreserveFramePointer.
+-
+-// see BarrierSetAssembler::nmethod_entry_barrier
+-// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32
+-static const int entry_barrier_offset = -4 * 13;
+-
+-static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) {
+-  address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset;
+-  NativeNMethodBarrier* barrier = reinterpret_cast<NativeNMethodBarrier*>(barrier_address);
+-  debug_only(barrier->verify());
+-  return barrier;
+-}
+-
+-void BarrierSetNMethod::disarm(nmethod* nm) {
+-  if (!supports_entry_barrier(nm)) {
+-    return;
+-  }
+-
+-  // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier.
+-  NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
+-
+-  barrier->set_value(disarmed_value());
+-}
+-
+-bool BarrierSetNMethod::is_armed(nmethod* nm) {
+-  if (!supports_entry_barrier(nm)) {
+-    return false;
+-  }
+-
+-  NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
+-  return barrier->get_value() != disarmed_value();
+-}
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index 41a415ef2cf..a75bd9dfa89 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -1638,10 +1638,10 @@ void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp,
+   beq(trial_klass, tmp, L);
+ }
+ 
+-// Move an oop into a register. immediate is true if we want
+-// immediate instructions and nmethod entry barriers are not enabled.
+-// i.e. we are not going to patch this instruction while the code is being
+-// executed by another thread.
++// Move an oop into a register.  immediate is true if we want
++// immediate instructions, i.e. we are not going to patch this
++// instruction while the code is being executed by another thread.  In
++// that case we can use move immediates rather than the constant pool.
+ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
+   int oop_index;
+   if (obj == NULL) {
+@@ -1656,11 +1656,7 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
+     oop_index = oop_recorder()->find_index(obj);
+   }
+   RelocationHolder rspec = oop_Relocation::spec(oop_index);
+-
+-  // nmethod entry barrier necessitate using the constant pool. They have to be
+-  // ordered with respected to oop access.
+-  // Using immediate literals would necessitate fence.i.
+-  if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) {
++  if (!immediate) {
+     address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
+     ld_constant(dst, Address(dummy, rspec));
+   } else
+@@ -1738,22 +1734,6 @@ void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
+   access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg);
+ }
+ 
+-// ((WeakHandle)result).resolve()
+-void MacroAssembler::resolve_weak_handle(Register result, Register tmp) {
+-  assert_different_registers(result, tmp);
+-  Label resolved;
+-
+-  // A null weak handle resolves to null.
+-  beqz(result, resolved);
+-
+-  // Only 64 bit platforms support GCs that require a tmp register
+-  // Only IN_HEAP loads require a thread_tmp register
+-  // WeakHandle::resolve is an indirection like jweak.
+-  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
+-                 result, Address(result), tmp, noreg /* tmp_thread */);
+-  bind(resolved);
+-}
+-
+ void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
+                                     Register dst, Address src,
+                                     Register tmp1, Register thread_tmp) {
+@@ -3195,11 +3175,6 @@ void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
+   beq(src1, t0, equal);
+ }
+ 
+-void MacroAssembler::load_method_holder_cld(Register result, Register method) {
+-  load_method_holder(result, method);
+-  ld(result, Address(result, InstanceKlass::class_loader_data_offset()));
+-}
+-
+ void MacroAssembler::load_method_holder(Register holder, Register method) {
+   ld(holder, Address(method, Method::const_offset()));                      // ConstMethod*
+   ld(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index dd39f67d507..b16fe904888 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -207,7 +207,6 @@ class MacroAssembler: public Assembler {
+   virtual void check_and_handle_earlyret(Register java_thread);
+   virtual void check_and_handle_popframe(Register java_thread);
+ 
+-  void resolve_weak_handle(Register result, Register tmp);
+   void resolve_oop_handle(Register result, Register tmp = x15);
+   void resolve_jobject(Register value, Register thread, Register tmp);
+ 
+@@ -673,7 +672,6 @@ class MacroAssembler: public Assembler {
+   void cmpptr(Register src1, Address src2, Label& equal);
+ 
+   void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL);
+-  void load_method_holder_cld(Register result, Register method);
+   void load_method_holder(Register holder, Register method);
+ 
+   void compute_index(Register str1, Register trailing_zeros, Register match_mask,
+diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
+index 228a64eae2c..047ea2276ca 100644
+--- a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
++++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
+@@ -41,7 +41,6 @@ void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
+   switch (type()) {
+     case relocInfo::oop_type: {
+       oop_Relocation *reloc = (oop_Relocation *)this;
+-      // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate
+       if (NativeInstruction::is_load_pc_relative_at(addr())) {
+         address constptr = (address)code()->oop_addr_at(reloc->oop_index());
+         bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr);
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 7ec76e72ff0..0a1838695e1 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1068,17 +1068,6 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+   st->print("sd  ra, [sp, #%d]\n\t", - wordSize);
+   if (PreserveFramePointer) { st->print("sub  fp, sp, #%d\n\t", 2 * wordSize); }
+   st->print("sub sp, sp, #%d\n\t", framesize);
+-
+-  if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
+-    st->print("ld  t0, [guard]\n\t");
+-    st->print("membar LoadLoad\n\t");
+-    st->print("ld  t1, [xthread, #thread_disarmed_offset]\n\t");
+-    st->print("beq t0, t1, skip\n\t");
+-    st->print("jalr #nmethod_entry_barrier_stub\n\t");
+-    st->print("j skip\n\t");
+-    st->print("guard: int\n\t");
+-    st->print("skip:\n\t");
+-  }
+ }
+ #endif
+ 
+@@ -1114,11 +1103,6 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ 
+   __ build_frame(framesize);
+ 
+-  if (C->stub_function() == NULL) {
+-    BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+-    bs->nmethod_entry_barrier(&_masm);
+-  }
+-
+   if (VerifyStackAtCalls) {
+     Unimplemented();
+   }
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+index 5b934b04e8e..326ba62fcb0 100644
+--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -642,9 +642,6 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
+     c2i_no_clinit_check_entry = __ pc();
+   }
+ 
+-  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+-  bs->c2i_entry_barrier(masm);
+-
+   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
+ 
+   __ flush();
+@@ -1290,10 +1287,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+   // -2 because return address is already present and so is saved fp
+   __ sub(sp, sp, stack_size - 2 * wordSize);
+ 
+-  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+-  assert_cond(bs != NULL);
+-  bs->nmethod_entry_barrier(masm);
+-
+   // Frame is now completed as far as size and linkage.
+   int frame_complete = ((intptr_t)__ pc()) - start;
+ 
+diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+index 0c5b0e001ee..74c38c3d044 100644
+--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+@@ -2352,50 +2352,6 @@ class StubGenerator: public StubCodeGenerator {
+     return entry;
+   }
+ 
+-  address generate_method_entry_barrier() {
+-    __ align(CodeEntryAlignment);
+-    StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
+-
+-    Label deoptimize_label;
+-
+-    address start = __ pc();
+-
+-    __ set_last_Java_frame(sp, fp, ra, t0);
+-
+-    __ enter();
+-    __ add(t1, sp, wordSize);
+-
+-    __ sub(sp, sp, 4 * wordSize);
+-
+-    __ push_call_clobbered_registers();
+-
+-    __ mv(c_rarg0, t1);
+-    __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1);
+-
+-    __ reset_last_Java_frame(true);
+-
+-    __ mv(t0, x10);
+-
+-    __ pop_call_clobbered_registers();
+-
+-    __ bnez(t0, deoptimize_label);
+-
+-    __ leave();
+-    __ ret();
+-
+-    __ BIND(deoptimize_label);
+-
+-    __ ld(t0, Address(sp, 0));
+-    __ ld(fp, Address(sp, wordSize));
+-    __ ld(ra, Address(sp, wordSize * 2));
+-    __ ld(t1, Address(sp, wordSize * 3));
+-
+-    __ mv(sp, t0);
+-    __ jr(t1);
+-
+-    return start;
+-  }
+-
+   // x10  = result
+   // x11  = str1
+   // x12  = cnt1
+@@ -3703,11 +3659,6 @@ class StubGenerator: public StubCodeGenerator {
+ 
+     generate_string_indexof_stubs();
+ 
+-    BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
+-    if (bs_nm != NULL) {
+-      StubRoutines::riscv::_method_entry_barrier = generate_method_entry_barrier();
+-    }
+-
+     StubRoutines::riscv::set_completed();
+   }
+ 
+diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
+index 395a2d338e4..9202d9ec4b0 100644
+--- a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
++++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
+@@ -53,6 +53,5 @@ address StubRoutines::riscv::_string_indexof_linear_ll = NULL;
+ address StubRoutines::riscv::_string_indexof_linear_uu = NULL;
+ address StubRoutines::riscv::_string_indexof_linear_ul = NULL;
+ address StubRoutines::riscv::_large_byte_array_inflate = NULL;
+-address StubRoutines::riscv::_method_entry_barrier = NULL;
+ 
+ bool StubRoutines::riscv::_completed = false;
+diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
+index 51f07819c33..0c9445e18a7 100644
+--- a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
++++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
+@@ -67,8 +67,6 @@ class riscv {
+   static address _string_indexof_linear_ul;
+   static address _large_byte_array_inflate;
+ 
+-  static address _method_entry_barrier;
+-
+   static bool _completed;
+ 
+  public:
+@@ -145,10 +143,6 @@ class riscv {
+     return _large_byte_array_inflate;
+   }
+ 
+-  static address method_entry_barrier() {
+-    return _method_entry_barrier;
+-  }
+-
+   static bool complete() {
+     return _completed;
+   }
+
+From aee31440dde84c54449b5c0dbdfb43b4d3826f5a Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sat, 29 Apr 2023 17:59:40 +0800
+Subject: [PATCH 088/140] Revert JDK-8223173: Implement fast class
+ initialization checks on AARCH64 && JDK-8227260: JNI upcalls should bypass
+ class initialization barrier in c2i adapter
+
+---
+ .../cpu/riscv/c1_LIRAssembler_riscv.cpp       | 12 -------
+ .../cpu/riscv/c1_MacroAssembler_riscv.cpp     | 12 +++----
+ src/hotspot/cpu/riscv/interp_masm_riscv.cpp   | 12 -------
+ src/hotspot/cpu/riscv/interp_masm_riscv.hpp   |  2 --
+ .../cpu/riscv/macroAssembler_riscv.cpp        | 36 -------------------
+ .../cpu/riscv/macroAssembler_riscv.hpp        |  3 --
+ src/hotspot/cpu/riscv/riscv.ad                | 11 ------
+ src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 30 +---------------
+ src/hotspot/cpu/riscv/templateTable_riscv.cpp | 17 +++------
+ 9 files changed, 11 insertions(+), 124 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+index 49653d04d81..1e482d7cc2b 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+@@ -90,18 +90,6 @@ static void select_different_registers(Register preserve,
+ 
+ bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
+ 
+-void LIR_Assembler::clinit_barrier(ciMethod* method) {
+-  assert(VM_Version::supports_fast_class_init_checks(), "sanity");
+-  assert(!method->holder()->is_not_initialized(), "initialization should have been started");
+-
+-  Label L_skip_barrier;
+-
+-  __ mov_metadata(t1, method->holder()->constant_encoding());
+-  __ clinit_barrier(t1, t0, &L_skip_barrier /* L_fast_path */);
+-  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+-  __ bind(L_skip_barrier);
+-}
+-
+ LIR_Opr LIR_Assembler::receiverOpr() {
+   return FrameMap::receiver_opr;
+ }
+diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+index a6d1b1470f9..99d981f97f4 100644
+--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+@@ -317,6 +317,12 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache, L
+ }
+ 
+ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
++  // If we have to make this method not-entrant we'll overwrite its
++  // first instruction with a jump. For this action to be legal we
++  // must ensure that this first instruction is a J, JAL or NOP.
++  // Make it a NOP.
++  nop();
++
+   assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
+   // Make sure there is enough stack space for this method's activation.
+   // Note that we do this before creating a frame.
+@@ -330,12 +336,6 @@ void C1_MacroAssembler::remove_frame(int framesize) {
+ 
+ 
+ void C1_MacroAssembler::verified_entry() {
+-  // If we have to make this method not-entrant we'll overwrite its
+-  // first instruction with a jump. For this action to be legal we
+-  // must ensure that this first instruction is a J, JAL or NOP.
+-  // Make it a NOP.
+-
+-  nop();
+ }
+ 
+ void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+index 1133e80a210..b50be7e726c 100644
+--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+@@ -295,18 +295,6 @@ void InterpreterMacroAssembler::load_resolved_klass_at_offset(
+   ld(klass, Address(klass, Array<Klass*>::base_offset_in_bytes()));
+ }
+ 
+-void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no,
+-                                                              Register method,
+-                                                              Register cache) {
+-  const int method_offset = in_bytes(
+-    ConstantPoolCache::base_offset() +
+-      ((byte_no == TemplateTable::f2_byte)
+-       ? ConstantPoolCacheEntry::f2_offset()
+-       : ConstantPoolCacheEntry::f1_offset()));
+-
+-  ld(method, Address(cache, method_offset)); // get f1 Method*
+-}
+-
+ // Generate a subtype check: branch to ok_is_subtype if sub_klass is a
+ // subtype of super_klass.
+ //
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
+index 4d8cb086f82..4126e8ee70f 100644
+--- a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
+@@ -122,8 +122,6 @@ class InterpreterMacroAssembler: public MacroAssembler {
+   // Load cpool->resolved_klass_at(index).
+   void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp);
+ 
+-  void load_resolved_method_at_index(int byte_no, Register method, Register cache);
+-
+   void pop_ptr(Register r = x10);
+   void pop_i(Register r = x10);
+   void pop_l(Register r = x10);
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index a75bd9dfa89..304b6f2b06c 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -372,36 +372,6 @@ void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thr
+   sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
+ }
+ 
+-void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) {
+-  assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
+-  assert_different_registers(klass, xthread, tmp);
+-
+-  Label L_fallthrough, L_tmp;
+-  if (L_fast_path == NULL) {
+-    L_fast_path = &L_fallthrough;
+-  } else if (L_slow_path == NULL) {
+-    L_slow_path = &L_fallthrough;
+-  }
+-
+-  // Fast path check: class is fully initialized
+-  lbu(tmp, Address(klass, InstanceKlass::init_state_offset()));
+-  sub(tmp, tmp, InstanceKlass::fully_initialized);
+-  beqz(tmp, *L_fast_path);
+-
+-  // Fast path check: current thread is initializer thread
+-  ld(tmp, Address(klass, InstanceKlass::init_thread_offset()));
+-
+-  if (L_slow_path == &L_fallthrough) {
+-    beq(xthread, tmp, *L_fast_path);
+-    bind(*L_slow_path);
+-  } else if (L_fast_path == &L_fallthrough) {
+-    bne(xthread, tmp, *L_slow_path);
+-    bind(*L_fast_path);
+-  } else {
+-    Unimplemented();
+-  }
+-}
+-
+ void MacroAssembler::verify_oop(Register reg, const char* s) {
+   if (!VerifyOops) { return; }
+ 
+@@ -3175,12 +3145,6 @@ void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
+   beq(src1, t0, equal);
+ }
+ 
+-void MacroAssembler::load_method_holder(Register holder, Register method) {
+-  ld(holder, Address(method, Method::const_offset()));                      // ConstMethod*
+-  ld(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
+-  ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
+-}
+-
+ // string indexof
+ // compute index by trailing zeros
+ void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index b16fe904888..c6b71bdbc3c 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -671,9 +671,6 @@ class MacroAssembler: public Assembler {
+ 
+   void cmpptr(Register src1, Address src2, Label& equal);
+ 
+-  void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL);
+-  void load_method_holder(Register holder, Register method);
+-
+   void compute_index(Register str1, Register trailing_zeros, Register match_mask,
+                      Register result, Register char_tmp, Register tmp,
+                      bool haystack_isL);
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 0a1838695e1..13546ab328b 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1085,17 +1085,6 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ 
+   assert_cond(C != NULL);
+ 
+-  if (C->clinit_barrier_on_entry()) {
+-    assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
+-
+-    Label L_skip_barrier;
+-
+-    __ mov_metadata(t1, C->method()->holder()->constant_encoding());
+-    __ clinit_barrier(t1, t0, &L_skip_barrier);
+-    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+-    __ bind(L_skip_barrier);
+-  }
+-
+   int bangsize = C->output()->bang_size_in_bytes();
+   if (C->output()->need_stack_bang(bangsize)) {
+     __ generate_stack_overflow_check(bangsize);
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+index 326ba62fcb0..ae414224c5b 100644
+--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -623,29 +623,10 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
+ 
+   address c2i_entry = __ pc();
+ 
+-  // Class initialization barrier for static methods
+-  address c2i_no_clinit_check_entry = NULL;
+-  if (VM_Version::supports_fast_class_init_checks()) {
+-    Label L_skip_barrier;
+-
+-    { // Bypass the barrier for non-static methods
+-      __ lwu(t0, Address(xmethod, Method::access_flags_offset()));
+-      __ andi(t1, t0, JVM_ACC_STATIC);
+-      __ beqz(t1, L_skip_barrier); // non-static
+-    }
+-
+-    __ load_method_holder(t1, xmethod);
+-    __ clinit_barrier(t1, t0, &L_skip_barrier);
+-    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+-
+-    __ bind(L_skip_barrier);
+-    c2i_no_clinit_check_entry = __ pc();
+-  }
+-
+   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
+ 
+   __ flush();
+-  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
++  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
+ }
+ 
+ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
+@@ -1270,15 +1251,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+   // first instruction with a jump.
+   __ nop();
+ 
+-  if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
+-    Label L_skip_barrier;
+-    __ mov_metadata(t1, method->method_holder()); // InstanceKlass*
+-    __ clinit_barrier(t1, t0, &L_skip_barrier);
+-    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+-
+-    __ bind(L_skip_barrier);
+-  }
+-
+   // Generate stack overflow check
+   __ bang_stack_with_offset(checked_cast<int>(StackOverflow::stack_shadow_zone_size()));
+ 
+diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+index bb20f228447..1f4409a9c9a 100644
+--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+@@ -2307,7 +2307,7 @@ void TemplateTable::resolve_cache_and_index(int byte_no,
+   const Register temp = x9;
+   assert_different_registers(Rcache, index, temp);
+ 
+-  Label resolved, clinit_barrier_slow;
++  Label resolved;
+ 
+   Bytecodes::Code code = bytecode();
+   switch (code) {
+@@ -2321,10 +2321,6 @@ void TemplateTable::resolve_cache_and_index(int byte_no,
+   __ mv(t0, (int) code);
+   __ beq(temp, t0, resolved);
+ 
+-  // resolve first time through
+-  // Class initialization barrier slow path lands here as well.
+-  __ bind(clinit_barrier_slow);
+-
+   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
+   __ mv(temp, (int) code);
+   __ call_VM(noreg, entry, temp);
+@@ -2334,13 +2330,6 @@ void TemplateTable::resolve_cache_and_index(int byte_no,
+   // n.b. unlike x86 Rcache is now rcpool plus the indexed offset
+   // so all clients ofthis method must be modified accordingly
+   __ bind(resolved);
+-
+-  // Class initialization barrier for static methods
+-  if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) {
+-    __ load_resolved_method_at_index(byte_no, temp, Rcache);
+-    __ load_method_holder(temp, temp);
+-    __ clinit_barrier(temp, t0, NULL, &clinit_barrier_slow);
+-  }
+ }
+ 
+ // The Rcache and index registers must be set before call
+@@ -3431,7 +3420,9 @@ void TemplateTable::invokeinterface(int byte_no) {
+   __ profile_virtual_call(x13, x30, x9);
+ 
+   // Get declaring interface class from method, and itable index
+-  __ load_method_holder(x10, xmethod);
++  __ ld(x10, Address(xmethod, Method::const_offset()));
++  __ ld(x10, Address(x10, ConstMethod::constants_offset()));
++  __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes()));
+   __ lwu(xmethod, Address(xmethod, Method::itable_index_offset()));
+   __ subw(xmethod, xmethod, Method::itable_index_max);
+   __ negw(xmethod, xmethod);
+
+From c259a42eac0a11e080d28dabe7f745ee79a53663 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sat, 29 Apr 2023 18:36:13 +0800
+Subject: [PATCH 089/140] Revert JDK-8268119: Rename copy_os_cpu.inline.hpp
+ files to copy_os_cpu.hpp && JDK-8142362: Lots of code duplication in Copy
+ class
+
+---
+ src/hotspot/cpu/riscv/copy_riscv.hpp          |  85 +-----------
+ .../os_cpu/linux_riscv/copy_linux_riscv.hpp   |  31 -----
+ .../linux_riscv/copy_linux_riscv.inline.hpp   | 124 ++++++++++++++++++
+ 3 files changed, 128 insertions(+), 112 deletions(-)
+ delete mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
+ create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
+
+diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp
+index bceadcc5dcc..05da242e354 100644
+--- a/src/hotspot/cpu/riscv/copy_riscv.hpp
++++ b/src/hotspot/cpu/riscv/copy_riscv.hpp
+@@ -27,7 +27,10 @@
+ #ifndef CPU_RISCV_COPY_RISCV_HPP
+ #define CPU_RISCV_COPY_RISCV_HPP
+ 
+-#include OS_CPU_HEADER(copy)
++// Inline functions for memory copy and fill.
++
++// Contains inline asm implementations
++#include OS_CPU_HEADER_INLINE(copy)
+ 
+ static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
+   julong* to = (julong*) tohw;
+@@ -53,84 +56,4 @@ static void pd_zero_to_bytes(void* to, size_t count) {
+   (void)memset(to, 0, count);
+ }
+ 
+-static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
+-  (void)memmove(to, from, count * HeapWordSize);
+-}
+-
+-static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
+-  switch (count) {
+-    case 8:  to[7] = from[7];   // fall through
+-    case 7:  to[6] = from[6];   // fall through
+-    case 6:  to[5] = from[5];   // fall through
+-    case 5:  to[4] = from[4];   // fall through
+-    case 4:  to[3] = from[3];   // fall through
+-    case 3:  to[2] = from[2];   // fall through
+-    case 2:  to[1] = from[1];   // fall through
+-    case 1:  to[0] = from[0];   // fall through
+-    case 0:  break;
+-    default:
+-      memcpy(to, from, count * HeapWordSize);
+-      break;
+-  }
+-}
+-
+-static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
+-  shared_disjoint_words_atomic(from, to, count);
+-}
+-
+-static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
+-  pd_conjoint_words(from, to, count);
+-}
+-
+-static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
+-  pd_disjoint_words(from, to, count);
+-}
+-
+-static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
+-  (void)memmove(to, from, count);
+-}
+-
+-static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
+-  pd_conjoint_bytes(from, to, count);
+-}
+-
+-static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
+-  _Copy_conjoint_jshorts_atomic(from, to, count);
+-}
+-
+-static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
+-  _Copy_conjoint_jints_atomic(from, to, count);
+-}
+-
+-static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
+-  _Copy_conjoint_jlongs_atomic(from, to, count);
+-}
+-
+-static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
+-  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size.");
+-  _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
+-}
+-
+-static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
+-  _Copy_arrayof_conjoint_bytes(from, to, count);
+-}
+-
+-static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
+-  _Copy_arrayof_conjoint_jshorts(from, to, count);
+-}
+-
+-static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
+-  _Copy_arrayof_conjoint_jints(from, to, count);
+-}
+-
+-static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
+-  _Copy_arrayof_conjoint_jlongs(from, to, count);
+-}
+-
+-static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
+-  assert(!UseCompressedOops, "foo!");
+-  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
+-  _Copy_arrayof_conjoint_jlongs(from, to, count);
+-}
+-
+ #endif // CPU_RISCV_COPY_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
+deleted file mode 100644
+index 147cfdf3c10..00000000000
+--- a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
++++ /dev/null
+@@ -1,31 +0,0 @@
+-/*
+- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
+-#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
+-
+-// Empty for build system
+-
+-#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
+new file mode 100644
+index 00000000000..bdf36d6b4c3
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
+@@ -0,0 +1,124 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
++#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
++
++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  (void)memmove(to, from, count * HeapWordSize);
++}
++
++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++    case 8:  to[7] = from[7];   // fall through
++    case 7:  to[6] = from[6];   // fall through
++    case 6:  to[5] = from[5];   // fall through
++    case 5:  to[4] = from[4];   // fall through
++    case 4:  to[3] = from[3];   // fall through
++    case 3:  to[2] = from[2];   // fall through
++    case 2:  to[1] = from[1];   // fall through
++    case 1:  to[0] = from[0];   // fall through
++    case 0:  break;
++    default:
++      memcpy(to, from, count * HeapWordSize);
++      break;
++  }
++}
++
++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++    case 8:  to[7] = from[7];
++    case 7:  to[6] = from[6];
++    case 6:  to[5] = from[5];
++    case 5:  to[4] = from[4];
++    case 4:  to[3] = from[3];
++    case 3:  to[2] = from[2];
++    case 2:  to[1] = from[1];
++    case 1:  to[0] = from[0];
++    case 0:  break;
++    default:
++      while (count-- > 0) {
++        *to++ = *from++;
++      }
++      break;
++  }
++}
++
++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_words(from, to, count);
++}
++
++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_disjoint_words(from, to, count);
++}
++
++static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
++  (void)memmove(to, from, count);
++}
++
++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
++  pd_conjoint_bytes(from, to, count);
++}
++
++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
++  _Copy_conjoint_jshorts_atomic(from, to, count);
++}
++
++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
++  _Copy_conjoint_jints_atomic(from, to, count);
++}
++
++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
++  _Copy_conjoint_jlongs_atomic(from, to, count);
++}
++
++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size.");
++  _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
++}
++
++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_bytes(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_jshorts(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_jints(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_jlongs(from, to, count);
++}
++
++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
++  assert(!UseCompressedOops, "foo!");
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
++  _Copy_arrayof_conjoint_jlongs(from, to, count);
++}
++
++#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
+
+From 6033e30ebd94f2315bf809a42ef00c85bdbc780e Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sat, 29 Apr 2023 19:33:21 +0800
+Subject: [PATCH 090/140] Revert JDK-8241436: C2: Factor out C2-specific code
+ from MacroAssembler
+
+---
+ .../cpu/riscv/c2_MacroAssembler_riscv.cpp     | 1321 -----------------
+ .../cpu/riscv/c2_MacroAssembler_riscv.hpp     |  141 --
+ .../cpu/riscv/macroAssembler_riscv.cpp        | 1282 ++++++++++++++++
+ .../cpu/riscv/macroAssembler_riscv.hpp        |  103 ++
+ src/hotspot/cpu/riscv/riscv.ad                |  124 +-
+ 5 files changed, 1447 insertions(+), 1524 deletions(-)
+ delete mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+ delete mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
+
+diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+deleted file mode 100644
+index 73f84a724ca..00000000000
+--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
++++ /dev/null
+@@ -1,1321 +0,0 @@
+-/*
+- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#include "precompiled.hpp"
+-#include "asm/assembler.hpp"
+-#include "asm/assembler.inline.hpp"
+-#include "opto/c2_MacroAssembler.hpp"
+-#include "opto/intrinsicnode.hpp"
+-#include "opto/subnode.hpp"
+-#include "runtime/stubRoutines.hpp"
+-
+-#ifdef PRODUCT
+-#define BLOCK_COMMENT(str) /* nothing */
+-#define STOP(error) stop(error)
+-#else
+-#define BLOCK_COMMENT(str) block_comment(str)
+-#define STOP(error) block_comment(error); stop(error)
+-#endif
+-
+-#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+-
+-// short string
+-// StringUTF16.indexOfChar
+-// StringLatin1.indexOfChar
+-void C2_MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
+-                                                  Register ch, Register result,
+-                                                  bool isL)
+-{
+-  Register ch1 = t0;
+-  Register index = t1;
+-
+-  BLOCK_COMMENT("string_indexof_char_short {");
+-
+-  Label LOOP, LOOP1, LOOP4, LOOP8;
+-  Label MATCH,  MATCH1, MATCH2, MATCH3,
+-        MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
+-
+-  mv(result, -1);
+-  mv(index, zr);
+-
+-  bind(LOOP);
+-  addi(t0, index, 8);
+-  ble(t0, cnt1, LOOP8);
+-  addi(t0, index, 4);
+-  ble(t0, cnt1, LOOP4);
+-  j(LOOP1);
+-
+-  bind(LOOP8);
+-  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
+-  beq(ch, ch1, MATCH);
+-  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
+-  beq(ch, ch1, MATCH1);
+-  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
+-  beq(ch, ch1, MATCH2);
+-  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
+-  beq(ch, ch1, MATCH3);
+-  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
+-  beq(ch, ch1, MATCH4);
+-  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
+-  beq(ch, ch1, MATCH5);
+-  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
+-  beq(ch, ch1, MATCH6);
+-  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
+-  beq(ch, ch1, MATCH7);
+-  addi(index, index, 8);
+-  addi(str1, str1, isL ? 8 : 16);
+-  blt(index, cnt1, LOOP);
+-  j(NOMATCH);
+-
+-  bind(LOOP4);
+-  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
+-  beq(ch, ch1, MATCH);
+-  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
+-  beq(ch, ch1, MATCH1);
+-  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
+-  beq(ch, ch1, MATCH2);
+-  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
+-  beq(ch, ch1, MATCH3);
+-  addi(index, index, 4);
+-  addi(str1, str1, isL ? 4 : 8);
+-  bge(index, cnt1, NOMATCH);
+-
+-  bind(LOOP1);
+-  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
+-  beq(ch, ch1, MATCH);
+-  addi(index, index, 1);
+-  addi(str1, str1, isL ? 1 : 2);
+-  blt(index, cnt1, LOOP1);
+-  j(NOMATCH);
+-
+-  bind(MATCH1);
+-  addi(index, index, 1);
+-  j(MATCH);
+-
+-  bind(MATCH2);
+-  addi(index, index, 2);
+-  j(MATCH);
+-
+-  bind(MATCH3);
+-  addi(index, index, 3);
+-  j(MATCH);
+-
+-  bind(MATCH4);
+-  addi(index, index, 4);
+-  j(MATCH);
+-
+-  bind(MATCH5);
+-  addi(index, index, 5);
+-  j(MATCH);
+-
+-  bind(MATCH6);
+-  addi(index, index, 6);
+-  j(MATCH);
+-
+-  bind(MATCH7);
+-  addi(index, index, 7);
+-
+-  bind(MATCH);
+-  mv(result, index);
+-  bind(NOMATCH);
+-  BLOCK_COMMENT("} string_indexof_char_short");
+-}
+-
+-// StringUTF16.indexOfChar
+-// StringLatin1.indexOfChar
+-void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
+-                                            Register ch, Register result,
+-                                            Register tmp1, Register tmp2,
+-                                            Register tmp3, Register tmp4,
+-                                            bool isL)
+-{
+-  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
+-  Register ch1 = t0;
+-  Register orig_cnt = t1;
+-  Register mask1 = tmp3;
+-  Register mask2 = tmp2;
+-  Register match_mask = tmp1;
+-  Register trailing_char = tmp4;
+-  Register unaligned_elems = tmp4;
+-
+-  BLOCK_COMMENT("string_indexof_char {");
+-  beqz(cnt1, NOMATCH);
+-
+-  addi(t0, cnt1, isL ? -32 : -16);
+-  bgtz(t0, DO_LONG);
+-  string_indexof_char_short(str1, cnt1, ch, result, isL);
+-  j(DONE);
+-
+-  bind(DO_LONG);
+-  mv(orig_cnt, cnt1);
+-  if (AvoidUnalignedAccesses) {
+-    Label ALIGNED;
+-    andi(unaligned_elems, str1, 0x7);
+-    beqz(unaligned_elems, ALIGNED);
+-    sub(unaligned_elems, unaligned_elems, 8);
+-    neg(unaligned_elems, unaligned_elems);
+-    if (!isL) {
+-      srli(unaligned_elems, unaligned_elems, 1);
+-    }
+-    // do unaligned part per element
+-    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
+-    bgez(result, DONE);
+-    mv(orig_cnt, cnt1);
+-    sub(cnt1, cnt1, unaligned_elems);
+-    bind(ALIGNED);
+-  }
+-
+-  // duplicate ch
+-  if (isL) {
+-    slli(ch1, ch, 8);
+-    orr(ch, ch1, ch);
+-  }
+-  slli(ch1, ch, 16);
+-  orr(ch, ch1, ch);
+-  slli(ch1, ch, 32);
+-  orr(ch, ch1, ch);
+-
+-  if (!isL) {
+-    slli(cnt1, cnt1, 1);
+-  }
+-
+-  uint64_t mask0101 = UCONST64(0x0101010101010101);
+-  uint64_t mask0001 = UCONST64(0x0001000100010001);
+-  mv(mask1, isL ? mask0101 : mask0001);
+-  uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
+-  uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
+-  mv(mask2, isL ? mask7f7f : mask7fff);
+-
+-  bind(CH1_LOOP);
+-  ld(ch1, Address(str1));
+-  addi(str1, str1, 8);
+-  addi(cnt1, cnt1, -8);
+-  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
+-  bnez(match_mask, HIT);
+-  bgtz(cnt1, CH1_LOOP);
+-  j(NOMATCH);
+-
+-  bind(HIT);
+-  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
+-  srli(trailing_char, trailing_char, 3);
+-  addi(cnt1, cnt1, 8);
+-  ble(cnt1, trailing_char, NOMATCH);
+-  // match case
+-  if (!isL) {
+-    srli(cnt1, cnt1, 1);
+-    srli(trailing_char, trailing_char, 1);
+-  }
+-
+-  sub(result, orig_cnt, cnt1);
+-  add(result, result, trailing_char);
+-  j(DONE);
+-
+-  bind(NOMATCH);
+-  mv(result, -1);
+-
+-  bind(DONE);
+-  BLOCK_COMMENT("} string_indexof_char");
+-}
+-
+-typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
+-
+-// Search for needle in haystack and return index or -1
+-// x10: result
+-// x11: haystack
+-// x12: haystack_len
+-// x13: needle
+-// x14: needle_len
+-void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
+-                                       Register haystack_len, Register needle_len,
+-                                       Register tmp1, Register tmp2,
+-                                       Register tmp3, Register tmp4,
+-                                       Register tmp5, Register tmp6,
+-                                       Register result, int ae)
+-{
+-  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
+-
+-  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
+-
+-  Register ch1 = t0;
+-  Register ch2 = t1;
+-  Register nlen_tmp = tmp1; // needle len tmp
+-  Register hlen_tmp = tmp2; // haystack len tmp
+-  Register result_tmp = tmp4;
+-
+-  bool isLL = ae == StrIntrinsicNode::LL;
+-
+-  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
+-  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
+-  int needle_chr_shift = needle_isL ? 0 : 1;
+-  int haystack_chr_shift = haystack_isL ? 0 : 1;
+-  int needle_chr_size = needle_isL ? 1 : 2;
+-  int haystack_chr_size = haystack_isL ? 1 : 2;
+-  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
+-                              (load_chr_insn)&MacroAssembler::lhu;
+-  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
+-                                (load_chr_insn)&MacroAssembler::lhu;
+-
+-  BLOCK_COMMENT("string_indexof {");
+-
+-  // Note, inline_string_indexOf() generates checks:
+-  // if (pattern.count > src.count) return -1;
+-  // if (pattern.count == 0) return 0;
+-
+-  // We have two strings, a source string in haystack, haystack_len and a pattern string
+-  // in needle, needle_len. Find the first occurence of pattern in source or return -1.
+-
+-  // For larger pattern and source we use a simplified Boyer Moore algorithm.
+-  // With a small pattern and source we use linear scan.
+-
+-  // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
+-  sub(result_tmp, haystack_len, needle_len);
+-  // needle_len < 8, use linear scan
+-  sub(t0, needle_len, 8);
+-  bltz(t0, LINEARSEARCH);
+-  // needle_len >= 256, use linear scan
+-  sub(t0, needle_len, 256);
+-  bgez(t0, LINEARSTUB);
+-  // needle_len >= haystack_len/4, use linear scan
+-  srli(t0, haystack_len, 2);
+-  bge(needle_len, t0, LINEARSTUB);
+-
+-  // Boyer-Moore-Horspool introduction:
+-  // The Boyer Moore alogorithm is based on the description here:-
+-  //
+-  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
+-  //
+-  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
+-  // and the 'Good Suffix' rule.
+-  //
+-  // These rules are essentially heuristics for how far we can shift the
+-  // pattern along the search string.
+-  //
+-  // The implementation here uses the 'Bad Character' rule only because of the
+-  // complexity of initialisation for the 'Good Suffix' rule.
+-  //
+-  // This is also known as the Boyer-Moore-Horspool algorithm:
+-  //
+-  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
+-  //
+-  // #define ASIZE 256
+-  //
+-  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
+-  //      int i, j;
+-  //      unsigned c;
+-  //      unsigned char bc[ASIZE];
+-  //
+-  //      /* Preprocessing */
+-  //      for (i = 0; i < ASIZE; ++i)
+-  //        bc[i] = m;
+-  //      for (i = 0; i < m - 1; ) {
+-  //        c = pattern[i];
+-  //        ++i;
+-  //        // c < 256 for Latin1 string, so, no need for branch
+-  //        #ifdef PATTERN_STRING_IS_LATIN1
+-  //        bc[c] = m - i;
+-  //        #else
+-  //        if (c < ASIZE) bc[c] = m - i;
+-  //        #endif
+-  //      }
+-  //
+-  //      /* Searching */
+-  //      j = 0;
+-  //      while (j <= n - m) {
+-  //        c = src[i+j];
+-  //        if (pattern[m-1] == c)
+-  //          int k;
+-  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
+-  //          if (k < 0) return j;
+-  //          // c < 256 for Latin1 string, so, no need for branch
+-  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
+-  //          // LL case: (c< 256) always true. Remove branch
+-  //          j += bc[pattern[j+m-1]];
+-  //          #endif
+-  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
+-  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
+-  //          if (c < ASIZE)
+-  //            j += bc[pattern[j+m-1]];
+-  //          else
+-  //            j += 1
+-  //          #endif
+-  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
+-  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
+-  //          if (c < ASIZE)
+-  //            j += bc[pattern[j+m-1]];
+-  //          else
+-  //            j += m
+-  //          #endif
+-  //      }
+-  //      return -1;
+-  //    }
+-
+-  // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
+-  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
+-        BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
+-
+-  Register haystack_end = haystack_len;
+-  Register skipch = tmp2;
+-
+-  // pattern length is >=8, so, we can read at least 1 register for cases when
+-  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
+-  // UL case. We'll re-read last character in inner pre-loop code to have
+-  // single outer pre-loop load
+-  const int firstStep = isLL ? 7 : 3;
+-
+-  const int ASIZE = 256;
+-  const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
+-
+-  sub(sp, sp, ASIZE);
+-
+-  // init BC offset table with default value: needle_len
+-  slli(t0, needle_len, 8);
+-  orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
+-  slli(tmp1, t0, 16);
+-  orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
+-  slli(tmp1, t0, 32);
+-  orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
+-
+-  mv(ch1, sp);  // ch1 is t0
+-  mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
+-
+-  bind(BM_INIT_LOOP);
+-  // for (i = 0; i < ASIZE; ++i)
+-  //   bc[i] = m;
+-  for (int i = 0; i < 4; i++) {
+-    sd(tmp5, Address(ch1, i * wordSize));
+-  }
+-  add(ch1, ch1, 32);
+-  sub(tmp6, tmp6, 4);
+-  bgtz(tmp6, BM_INIT_LOOP);
+-
+-  sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
+-  Register orig_haystack = tmp5;
+-  mv(orig_haystack, haystack);
+-  // result_tmp = tmp4
+-  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
+-  sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
+-  mv(tmp3, needle);
+-
+-  //  for (i = 0; i < m - 1; ) {
+-  //    c = pattern[i];
+-  //    ++i;
+-  //    // c < 256 for Latin1 string, so, no need for branch
+-  //    #ifdef PATTERN_STRING_IS_LATIN1
+-  //    bc[c] = m - i;
+-  //    #else
+-  //    if (c < ASIZE) bc[c] = m - i;
+-  //    #endif
+-  //  }
+-  bind(BCLOOP);
+-  (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
+-  add(tmp3, tmp3, needle_chr_size);
+-  if (!needle_isL) {
+-    // ae == StrIntrinsicNode::UU
+-    mv(tmp6, ASIZE);
+-    bgeu(ch1, tmp6, BCSKIP);
+-  }
+-  add(tmp4, sp, ch1);
+-  sb(ch2, Address(tmp4)); // store skip offset to BC offset table
+-
+-  bind(BCSKIP);
+-  sub(ch2, ch2, 1); // for next pattern element, skip distance -1
+-  bgtz(ch2, BCLOOP);
+-
+-  // tmp6: pattern end, address after needle
+-  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
+-  if (needle_isL == haystack_isL) {
+-    // load last 8 bytes (8LL/4UU symbols)
+-    ld(tmp6, Address(tmp6, -wordSize));
+-  } else {
+-    // UL: from UTF-16(source) search Latin1(pattern)
+-    lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
+-    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
+-    // We'll have to wait until load completed, but it's still faster than per-character loads+checks
+-    srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
+-    slli(ch2, tmp6, XLEN - 24);
+-    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
+-    slli(ch1, tmp6, XLEN - 16);
+-    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
+-    andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
+-    slli(ch2, ch2, 16);
+-    orr(ch2, ch2, ch1); // 0x00000b0c
+-    slli(result, tmp3, 48); // use result as temp register
+-    orr(tmp6, tmp6, result); // 0x0a00000d
+-    slli(result, ch2, 16);
+-    orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
+-  }
+-
+-  // i = m - 1;
+-  // skipch = j + i;
+-  // if (skipch == pattern[m - 1]
+-  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
+-  // else
+-  //   move j with bad char offset table
+-  bind(BMLOOPSTR2);
+-  // compare pattern to source string backward
+-  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
+-  (this->*haystack_load_1chr)(skipch, Address(result), noreg);
+-  sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
+-  if (needle_isL == haystack_isL) {
+-    // re-init tmp3. It's for free because it's executed in parallel with
+-    // load above. Alternative is to initialize it before loop, but it'll
+-    // affect performance on in-order systems with 2 or more ld/st pipelines
+-    srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
+-  }
+-  if (!isLL) { // UU/UL case
+-    slli(ch2, nlen_tmp, 1); // offsets in bytes
+-  }
+-  bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
+-  add(result, haystack, isLL ? nlen_tmp : ch2);
+-  ld(ch2, Address(result)); // load 8 bytes from source string
+-  mv(ch1, tmp6);
+-  if (isLL) {
+-    j(BMLOOPSTR1_AFTER_LOAD);
+-  } else {
+-    sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
+-    j(BMLOOPSTR1_CMP);
+-  }
+-
+-  bind(BMLOOPSTR1);
+-  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
+-  (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
+-  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
+-  (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
+-
+-  bind(BMLOOPSTR1_AFTER_LOAD);
+-  sub(nlen_tmp, nlen_tmp, 1);
+-  bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
+-
+-  bind(BMLOOPSTR1_CMP);
+-  beq(ch1, ch2, BMLOOPSTR1);
+-
+-  bind(BMSKIP);
+-  if (!isLL) {
+-    // if we've met UTF symbol while searching Latin1 pattern, then we can
+-    // skip needle_len symbols
+-    if (needle_isL != haystack_isL) {
+-      mv(result_tmp, needle_len);
+-    } else {
+-      mv(result_tmp, 1);
+-    }
+-    mv(t0, ASIZE);
+-    bgeu(skipch, t0, BMADV);
+-  }
+-  add(result_tmp, sp, skipch);
+-  lbu(result_tmp, Address(result_tmp)); // load skip offset
+-
+-  bind(BMADV);
+-  sub(nlen_tmp, needle_len, 1);
+-  // move haystack after bad char skip offset
+-  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
+-  ble(haystack, haystack_end, BMLOOPSTR2);
+-  add(sp, sp, ASIZE);
+-  j(NOMATCH);
+-
+-  bind(BMLOOPSTR1_LASTCMP);
+-  bne(ch1, ch2, BMSKIP);
+-
+-  bind(BMMATCH);
+-  sub(result, haystack, orig_haystack);
+-  if (!haystack_isL) {
+-    srli(result, result, 1);
+-  }
+-  add(sp, sp, ASIZE);
+-  j(DONE);
+-
+-  bind(LINEARSTUB);
+-  sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
+-  bltz(t0, LINEARSEARCH);
+-  mv(result, zr);
+-  RuntimeAddress stub = NULL;
+-  if (isLL) {
+-    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
+-    assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
+-  } else if (needle_isL) {
+-    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
+-    assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
+-  } else {
+-    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
+-    assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
+-  }
+-  trampoline_call(stub);
+-  j(DONE);
+-
+-  bind(NOMATCH);
+-  mv(result, -1);
+-  j(DONE);
+-
+-  bind(LINEARSEARCH);
+-  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
+-
+-  bind(DONE);
+-  BLOCK_COMMENT("} string_indexof");
+-}
+-
+-// string_indexof
+-// result: x10
+-// src: x11
+-// src_count: x12
+-// pattern: x13
+-// pattern_count: x14 or 1/2/3/4
+-void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
+-                                               Register haystack_len, Register needle_len,
+-                                               Register tmp1, Register tmp2,
+-                                               Register tmp3, Register tmp4,
+-                                               int needle_con_cnt, Register result, int ae)
+-{
+-  // Note:
+-  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
+-  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
+-  assert(needle_con_cnt <= 4, "Invalid needle constant count");
+-  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
+-
+-  Register ch1 = t0;
+-  Register ch2 = t1;
+-  Register hlen_neg = haystack_len, nlen_neg = needle_len;
+-  Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
+-
+-  bool isLL = ae == StrIntrinsicNode::LL;
+-
+-  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
+-  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
+-  int needle_chr_shift = needle_isL ? 0 : 1;
+-  int haystack_chr_shift = haystack_isL ? 0 : 1;
+-  int needle_chr_size = needle_isL ? 1 : 2;
+-  int haystack_chr_size = haystack_isL ? 1 : 2;
+-
+-  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
+-                              (load_chr_insn)&MacroAssembler::lhu;
+-  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
+-                                (load_chr_insn)&MacroAssembler::lhu;
+-  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
+-  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
+-
+-  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
+-
+-  Register first = tmp3;
+-
+-  if (needle_con_cnt == -1) {
+-    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
+-
+-    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
+-    bltz(t0, DOSHORT);
+-
+-    (this->*needle_load_1chr)(first, Address(needle), noreg);
+-    slli(t0, needle_len, needle_chr_shift);
+-    add(needle, needle, t0);
+-    neg(nlen_neg, t0);
+-    slli(t0, result_tmp, haystack_chr_shift);
+-    add(haystack, haystack, t0);
+-    neg(hlen_neg, t0);
+-
+-    bind(FIRST_LOOP);
+-    add(t0, haystack, hlen_neg);
+-    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
+-    beq(first, ch2, STR1_LOOP);
+-
+-    bind(STR2_NEXT);
+-    add(hlen_neg, hlen_neg, haystack_chr_size);
+-    blez(hlen_neg, FIRST_LOOP);
+-    j(NOMATCH);
+-
+-    bind(STR1_LOOP);
+-    add(nlen_tmp, nlen_neg, needle_chr_size);
+-    add(hlen_tmp, hlen_neg, haystack_chr_size);
+-    bgez(nlen_tmp, MATCH);
+-
+-    bind(STR1_NEXT);
+-    add(ch1, needle, nlen_tmp);
+-    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
+-    add(ch2, haystack, hlen_tmp);
+-    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
+-    bne(ch1, ch2, STR2_NEXT);
+-    add(nlen_tmp, nlen_tmp, needle_chr_size);
+-    add(hlen_tmp, hlen_tmp, haystack_chr_size);
+-    bltz(nlen_tmp, STR1_NEXT);
+-    j(MATCH);
+-
+-    bind(DOSHORT);
+-    if (needle_isL == haystack_isL) {
+-      sub(t0, needle_len, 2);
+-      bltz(t0, DO1);
+-      bgtz(t0, DO3);
+-    }
+-  }
+-
+-  if (needle_con_cnt == 4) {
+-    Label CH1_LOOP;
+-    (this->*load_4chr)(ch1, Address(needle), noreg);
+-    sub(result_tmp, haystack_len, 4);
+-    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
+-    add(haystack, haystack, tmp3);
+-    neg(hlen_neg, tmp3);
+-
+-    bind(CH1_LOOP);
+-    add(ch2, haystack, hlen_neg);
+-    (this->*load_4chr)(ch2, Address(ch2), noreg);
+-    beq(ch1, ch2, MATCH);
+-    add(hlen_neg, hlen_neg, haystack_chr_size);
+-    blez(hlen_neg, CH1_LOOP);
+-    j(NOMATCH);
+-  }
+-
+-  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
+-    Label CH1_LOOP;
+-    BLOCK_COMMENT("string_indexof DO2 {");
+-    bind(DO2);
+-    (this->*load_2chr)(ch1, Address(needle), noreg);
+-    if (needle_con_cnt == 2) {
+-      sub(result_tmp, haystack_len, 2);
+-    }
+-    slli(tmp3, result_tmp, haystack_chr_shift);
+-    add(haystack, haystack, tmp3);
+-    neg(hlen_neg, tmp3);
+-
+-    bind(CH1_LOOP);
+-    add(tmp3, haystack, hlen_neg);
+-    (this->*load_2chr)(ch2, Address(tmp3), noreg);
+-    beq(ch1, ch2, MATCH);
+-    add(hlen_neg, hlen_neg, haystack_chr_size);
+-    blez(hlen_neg, CH1_LOOP);
+-    j(NOMATCH);
+-    BLOCK_COMMENT("} string_indexof DO2");
+-  }
+-
+-  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
+-    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
+-    BLOCK_COMMENT("string_indexof DO3 {");
+-
+-    bind(DO3);
+-    (this->*load_2chr)(first, Address(needle), noreg);
+-    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
+-    if (needle_con_cnt == 3) {
+-      sub(result_tmp, haystack_len, 3);
+-    }
+-    slli(hlen_tmp, result_tmp, haystack_chr_shift);
+-    add(haystack, haystack, hlen_tmp);
+-    neg(hlen_neg, hlen_tmp);
+-
+-    bind(FIRST_LOOP);
+-    add(ch2, haystack, hlen_neg);
+-    (this->*load_2chr)(ch2, Address(ch2), noreg);
+-    beq(first, ch2, STR1_LOOP);
+-
+-    bind(STR2_NEXT);
+-    add(hlen_neg, hlen_neg, haystack_chr_size);
+-    blez(hlen_neg, FIRST_LOOP);
+-    j(NOMATCH);
+-
+-    bind(STR1_LOOP);
+-    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
+-    add(ch2, haystack, hlen_tmp);
+-    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
+-    bne(ch1, ch2, STR2_NEXT);
+-    j(MATCH);
+-    BLOCK_COMMENT("} string_indexof DO3");
+-  }
+-
+-  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
+-    Label DO1_LOOP;
+-
+-    BLOCK_COMMENT("string_indexof DO1 {");
+-    bind(DO1);
+-    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
+-    sub(result_tmp, haystack_len, 1);
+-    mv(tmp3, result_tmp);
+-    if (haystack_chr_shift) {
+-      slli(tmp3, result_tmp, haystack_chr_shift);
+-    }
+-    add(haystack, haystack, tmp3);
+-    neg(hlen_neg, tmp3);
+-
+-    bind(DO1_LOOP);
+-    add(tmp3, haystack, hlen_neg);
+-    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
+-    beq(ch1, ch2, MATCH);
+-    add(hlen_neg, hlen_neg, haystack_chr_size);
+-    blez(hlen_neg, DO1_LOOP);
+-    BLOCK_COMMENT("} string_indexof DO1");
+-  }
+-
+-  bind(NOMATCH);
+-  mv(result, -1);
+-  j(DONE);
+-
+-  bind(MATCH);
+-  srai(t0, hlen_neg, haystack_chr_shift);
+-  add(result, result_tmp, t0);
+-
+-  bind(DONE);
+-}
+-
+-// Compare strings.
+-void C2_MacroAssembler::string_compare(Register str1, Register str2,
+-                                    Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
+-                                    Register tmp3, int ae)
+-{
+-  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
+-      DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
+-      SHORT_LOOP_START, TAIL_CHECK, L;
+-
+-  const int STUB_THRESHOLD = 64 + 8;
+-  bool isLL = ae == StrIntrinsicNode::LL;
+-  bool isLU = ae == StrIntrinsicNode::LU;
+-  bool isUL = ae == StrIntrinsicNode::UL;
+-
+-  bool str1_isL = isLL || isLU;
+-  bool str2_isL = isLL || isUL;
+-
+-  // for L strings, 1 byte for 1 character
+-  // for U strings, 2 bytes for 1 character
+-  int str1_chr_size = str1_isL ? 1 : 2;
+-  int str2_chr_size = str2_isL ? 1 : 2;
+-  int minCharsInWord = isLL ? wordSize : wordSize / 2;
+-
+-  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
+-  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
+-
+-  BLOCK_COMMENT("string_compare {");
+-
+-  // Bizzarely, the counts are passed in bytes, regardless of whether they
+-  // are L or U strings, however the result is always in characters.
+-  if (!str1_isL) {
+-    sraiw(cnt1, cnt1, 1);
+-  }
+-  if (!str2_isL) {
+-    sraiw(cnt2, cnt2, 1);
+-  }
+-
+-  // Compute the minimum of the string lengths and save the difference in result.
+-  sub(result, cnt1, cnt2);
+-  bgt(cnt1, cnt2, L);
+-  mv(cnt2, cnt1);
+-  bind(L);
+-
+-  // A very short string
+-  li(t0, minCharsInWord);
+-  ble(cnt2, t0, SHORT_STRING);
+-
+-  // Compare longwords
+-  // load first parts of strings and finish initialization while loading
+-  {
+-    if (str1_isL == str2_isL) { // LL or UU
+-      // load 8 bytes once to compare
+-      ld(tmp1, Address(str1));
+-      beq(str1, str2, DONE);
+-      ld(tmp2, Address(str2));
+-      li(t0, STUB_THRESHOLD);
+-      bge(cnt2, t0, STUB);
+-      sub(cnt2, cnt2, minCharsInWord);
+-      beqz(cnt2, TAIL_CHECK);
+-      // convert cnt2 from characters to bytes
+-      if (!str1_isL) {
+-        slli(cnt2, cnt2, 1);
+-      }
+-      add(str2, str2, cnt2);
+-      add(str1, str1, cnt2);
+-      sub(cnt2, zr, cnt2);
+-    } else if (isLU) { // LU case
+-      lwu(tmp1, Address(str1));
+-      ld(tmp2, Address(str2));
+-      li(t0, STUB_THRESHOLD);
+-      bge(cnt2, t0, STUB);
+-      addi(cnt2, cnt2, -4);
+-      add(str1, str1, cnt2);
+-      sub(cnt1, zr, cnt2);
+-      slli(cnt2, cnt2, 1);
+-      add(str2, str2, cnt2);
+-      inflate_lo32(tmp3, tmp1);
+-      mv(tmp1, tmp3);
+-      sub(cnt2, zr, cnt2);
+-      addi(cnt1, cnt1, 4);
+-    } else { // UL case
+-      ld(tmp1, Address(str1));
+-      lwu(tmp2, Address(str2));
+-      li(t0, STUB_THRESHOLD);
+-      bge(cnt2, t0, STUB);
+-      addi(cnt2, cnt2, -4);
+-      slli(t0, cnt2, 1);
+-      sub(cnt1, zr, t0);
+-      add(str1, str1, t0);
+-      add(str2, str2, cnt2);
+-      inflate_lo32(tmp3, tmp2);
+-      mv(tmp2, tmp3);
+-      sub(cnt2, zr, cnt2);
+-      addi(cnt1, cnt1, 8);
+-    }
+-    addi(cnt2, cnt2, isUL ? 4 : 8);
+-    bgez(cnt2, TAIL);
+-    xorr(tmp3, tmp1, tmp2);
+-    bnez(tmp3, DIFFERENCE);
+-
+-    // main loop
+-    bind(NEXT_WORD);
+-    if (str1_isL == str2_isL) { // LL or UU
+-      add(t0, str1, cnt2);
+-      ld(tmp1, Address(t0));
+-      add(t0, str2, cnt2);
+-      ld(tmp2, Address(t0));
+-      addi(cnt2, cnt2, 8);
+-    } else if (isLU) { // LU case
+-      add(t0, str1, cnt1);
+-      lwu(tmp1, Address(t0));
+-      add(t0, str2, cnt2);
+-      ld(tmp2, Address(t0));
+-      addi(cnt1, cnt1, 4);
+-      inflate_lo32(tmp3, tmp1);
+-      mv(tmp1, tmp3);
+-      addi(cnt2, cnt2, 8);
+-    } else { // UL case
+-      add(t0, str2, cnt2);
+-      lwu(tmp2, Address(t0));
+-      add(t0, str1, cnt1);
+-      ld(tmp1, Address(t0));
+-      inflate_lo32(tmp3, tmp2);
+-      mv(tmp2, tmp3);
+-      addi(cnt1, cnt1, 8);
+-      addi(cnt2, cnt2, 4);
+-    }
+-    bgez(cnt2, TAIL);
+-
+-    xorr(tmp3, tmp1, tmp2);
+-    beqz(tmp3, NEXT_WORD);
+-    j(DIFFERENCE);
+-    bind(TAIL);
+-    xorr(tmp3, tmp1, tmp2);
+-    bnez(tmp3, DIFFERENCE);
+-    // Last longword.  In the case where length == 4 we compare the
+-    // same longword twice, but that's still faster than another
+-    // conditional branch.
+-    if (str1_isL == str2_isL) { // LL or UU
+-      ld(tmp1, Address(str1));
+-      ld(tmp2, Address(str2));
+-    } else if (isLU) { // LU case
+-      lwu(tmp1, Address(str1));
+-      ld(tmp2, Address(str2));
+-      inflate_lo32(tmp3, tmp1);
+-      mv(tmp1, tmp3);
+-    } else { // UL case
+-      lwu(tmp2, Address(str2));
+-      ld(tmp1, Address(str1));
+-      inflate_lo32(tmp3, tmp2);
+-      mv(tmp2, tmp3);
+-    }
+-    bind(TAIL_CHECK);
+-    xorr(tmp3, tmp1, tmp2);
+-    beqz(tmp3, DONE);
+-
+-    // Find the first different characters in the longwords and
+-    // compute their difference.
+-    bind(DIFFERENCE);
+-    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
+-    srl(tmp1, tmp1, result);
+-    srl(tmp2, tmp2, result);
+-    if (isLL) {
+-      andi(tmp1, tmp1, 0xFF);
+-      andi(tmp2, tmp2, 0xFF);
+-    } else {
+-      andi(tmp1, tmp1, 0xFFFF);
+-      andi(tmp2, tmp2, 0xFFFF);
+-    }
+-    sub(result, tmp1, tmp2);
+-    j(DONE);
+-  }
+-
+-  bind(STUB);
+-  RuntimeAddress stub = NULL;
+-  switch (ae) {
+-    case StrIntrinsicNode::LL:
+-      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
+-      break;
+-    case StrIntrinsicNode::UU:
+-      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
+-      break;
+-    case StrIntrinsicNode::LU:
+-      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
+-      break;
+-    case StrIntrinsicNode::UL:
+-      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
+-      break;
+-    default:
+-      ShouldNotReachHere();
+-  }
+-  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
+-  trampoline_call(stub);
+-  j(DONE);
+-
+-  bind(SHORT_STRING);
+-  // Is the minimum length zero?
+-  beqz(cnt2, DONE);
+-  // arrange code to do most branches while loading and loading next characters
+-  // while comparing previous
+-  (this->*str1_load_chr)(tmp1, Address(str1), t0);
+-  addi(str1, str1, str1_chr_size);
+-  addi(cnt2, cnt2, -1);
+-  beqz(cnt2, SHORT_LAST_INIT);
+-  (this->*str2_load_chr)(cnt1, Address(str2), t0);
+-  addi(str2, str2, str2_chr_size);
+-  j(SHORT_LOOP_START);
+-  bind(SHORT_LOOP);
+-  addi(cnt2, cnt2, -1);
+-  beqz(cnt2, SHORT_LAST);
+-  bind(SHORT_LOOP_START);
+-  (this->*str1_load_chr)(tmp2, Address(str1), t0);
+-  addi(str1, str1, str1_chr_size);
+-  (this->*str2_load_chr)(t0, Address(str2), t0);
+-  addi(str2, str2, str2_chr_size);
+-  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
+-  addi(cnt2, cnt2, -1);
+-  beqz(cnt2, SHORT_LAST2);
+-  (this->*str1_load_chr)(tmp1, Address(str1), t0);
+-  addi(str1, str1, str1_chr_size);
+-  (this->*str2_load_chr)(cnt1, Address(str2), t0);
+-  addi(str2, str2, str2_chr_size);
+-  beq(tmp2, t0, SHORT_LOOP);
+-  sub(result, tmp2, t0);
+-  j(DONE);
+-  bind(SHORT_LOOP_TAIL);
+-  sub(result, tmp1, cnt1);
+-  j(DONE);
+-  bind(SHORT_LAST2);
+-  beq(tmp2, t0, DONE);
+-  sub(result, tmp2, t0);
+-
+-  j(DONE);
+-  bind(SHORT_LAST_INIT);
+-  (this->*str2_load_chr)(cnt1, Address(str2), t0);
+-  addi(str2, str2, str2_chr_size);
+-  bind(SHORT_LAST);
+-  beq(tmp1, cnt1, DONE);
+-  sub(result, tmp1, cnt1);
+-
+-  bind(DONE);
+-
+-  BLOCK_COMMENT("} string_compare");
+-}
+-
+-void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
+-                                      Register tmp4, Register tmp5, Register tmp6, Register result,
+-                                      Register cnt1, int elem_size) {
+-  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
+-  Register tmp1 = t0;
+-  Register tmp2 = t1;
+-  Register cnt2 = tmp2;  // cnt2 only used in array length compare
+-  Register elem_per_word = tmp6;
+-  int log_elem_size = exact_log2(elem_size);
+-  int length_offset = arrayOopDesc::length_offset_in_bytes();
+-  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
+-
+-  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
+-  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
+-  li(elem_per_word, wordSize / elem_size);
+-
+-  BLOCK_COMMENT("arrays_equals {");
+-
+-  // if (a1 == a2), return true
+-  beq(a1, a2, SAME);
+-
+-  mv(result, false);
+-  beqz(a1, DONE);
+-  beqz(a2, DONE);
+-  lwu(cnt1, Address(a1, length_offset));
+-  lwu(cnt2, Address(a2, length_offset));
+-  bne(cnt2, cnt1, DONE);
+-  beqz(cnt1, SAME);
+-
+-  slli(tmp5, cnt1, 3 + log_elem_size);
+-  sub(tmp5, zr, tmp5);
+-  add(a1, a1, base_offset);
+-  add(a2, a2, base_offset);
+-  ld(tmp3, Address(a1, 0));
+-  ld(tmp4, Address(a2, 0));
+-  ble(cnt1, elem_per_word, SHORT); // short or same
+-
+-  // Main 16 byte comparison loop with 2 exits
+-  bind(NEXT_DWORD); {
+-    ld(tmp1, Address(a1, wordSize));
+-    ld(tmp2, Address(a2, wordSize));
+-    sub(cnt1, cnt1, 2 * wordSize / elem_size);
+-    blez(cnt1, TAIL);
+-    bne(tmp3, tmp4, DONE);
+-    ld(tmp3, Address(a1, 2 * wordSize));
+-    ld(tmp4, Address(a2, 2 * wordSize));
+-    add(a1, a1, 2 * wordSize);
+-    add(a2, a2, 2 * wordSize);
+-    ble(cnt1, elem_per_word, TAIL2);
+-  } beq(tmp1, tmp2, NEXT_DWORD);
+-  j(DONE);
+-
+-  bind(TAIL);
+-  xorr(tmp4, tmp3, tmp4);
+-  xorr(tmp2, tmp1, tmp2);
+-  sll(tmp2, tmp2, tmp5);
+-  orr(tmp5, tmp4, tmp2);
+-  j(IS_TMP5_ZR);
+-
+-  bind(TAIL2);
+-  bne(tmp1, tmp2, DONE);
+-
+-  bind(SHORT);
+-  xorr(tmp4, tmp3, tmp4);
+-  sll(tmp5, tmp4, tmp5);
+-
+-  bind(IS_TMP5_ZR);
+-  bnez(tmp5, DONE);
+-
+-  bind(SAME);
+-  mv(result, true);
+-  // That's it.
+-  bind(DONE);
+-
+-  BLOCK_COMMENT("} array_equals");
+-}
+-
+-// Compare Strings
+-
+-// For Strings we're passed the address of the first characters in a1
+-// and a2 and the length in cnt1.
+-// elem_size is the element size in bytes: either 1 or 2.
+-// There are two implementations.  For arrays >= 8 bytes, all
+-// comparisons (including the final one, which may overlap) are
+-// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
+-// halfword, then a short, and then a byte.
+-
+-void C2_MacroAssembler::string_equals(Register a1, Register a2,
+-                                      Register result, Register cnt1, int elem_size)
+-{
+-  Label SAME, DONE, SHORT, NEXT_WORD;
+-  Register tmp1 = t0;
+-  Register tmp2 = t1;
+-
+-  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
+-  assert_different_registers(a1, a2, result, cnt1, t0, t1);
+-
+-  BLOCK_COMMENT("string_equals {");
+-
+-  mv(result, false);
+-
+-  // Check for short strings, i.e. smaller than wordSize.
+-  sub(cnt1, cnt1, wordSize);
+-  bltz(cnt1, SHORT);
+-
+-  // Main 8 byte comparison loop.
+-  bind(NEXT_WORD); {
+-    ld(tmp1, Address(a1, 0));
+-    add(a1, a1, wordSize);
+-    ld(tmp2, Address(a2, 0));
+-    add(a2, a2, wordSize);
+-    sub(cnt1, cnt1, wordSize);
+-    bne(tmp1, tmp2, DONE);
+-  } bgtz(cnt1, NEXT_WORD);
+-
+-  // Last longword.  In the case where length == 4 we compare the
+-  // same longword twice, but that's still faster than another
+-  // conditional branch.
+-  // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
+-  // length == 4.
+-  add(tmp1, a1, cnt1);
+-  ld(tmp1, Address(tmp1, 0));
+-  add(tmp2, a2, cnt1);
+-  ld(tmp2, Address(tmp2, 0));
+-  bne(tmp1, tmp2, DONE);
+-  j(SAME);
+-
+-  bind(SHORT);
+-  Label TAIL03, TAIL01;
+-
+-  // 0-7 bytes left.
+-  andi(t0, cnt1, 4);
+-  beqz(t0, TAIL03);
+-  {
+-    lwu(tmp1, Address(a1, 0));
+-    add(a1, a1, 4);
+-    lwu(tmp2, Address(a2, 0));
+-    add(a2, a2, 4);
+-    bne(tmp1, tmp2, DONE);
+-  }
+-
+-  bind(TAIL03);
+-  // 0-3 bytes left.
+-  andi(t0, cnt1, 2);
+-  beqz(t0, TAIL01);
+-  {
+-    lhu(tmp1, Address(a1, 0));
+-    add(a1, a1, 2);
+-    lhu(tmp2, Address(a2, 0));
+-    add(a2, a2, 2);
+-    bne(tmp1, tmp2, DONE);
+-  }
+-
+-  bind(TAIL01);
+-  if (elem_size == 1) { // Only needed when comparing 1-byte elements
+-    // 0-1 bytes left.
+-    andi(t0, cnt1, 1);
+-    beqz(t0, SAME);
+-    {
+-      lbu(tmp1, a1, 0);
+-      lbu(tmp2, a2, 0);
+-      bne(tmp1, tmp2, DONE);
+-    }
+-  }
+-
+-  // Arrays are equal.
+-  bind(SAME);
+-  mv(result, true);
+-
+-  // That's it.
+-  bind(DONE);
+-  BLOCK_COMMENT("} string_equals");
+-}
+-
+-typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
+-typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
+-                                                              bool is_far, bool is_unordered);
+-
+-static conditional_branch_insn conditional_branches[] =
+-{
+-  /* SHORT branches */
+-  (conditional_branch_insn)&Assembler::beq,
+-  (conditional_branch_insn)&Assembler::bgt,
+-  NULL, // BoolTest::overflow
+-  (conditional_branch_insn)&Assembler::blt,
+-  (conditional_branch_insn)&Assembler::bne,
+-  (conditional_branch_insn)&Assembler::ble,
+-  NULL, // BoolTest::no_overflow
+-  (conditional_branch_insn)&Assembler::bge,
+-
+-  /* UNSIGNED branches */
+-  (conditional_branch_insn)&Assembler::beq,
+-  (conditional_branch_insn)&Assembler::bgtu,
+-  NULL,
+-  (conditional_branch_insn)&Assembler::bltu,
+-  (conditional_branch_insn)&Assembler::bne,
+-  (conditional_branch_insn)&Assembler::bleu,
+-  NULL,
+-  (conditional_branch_insn)&Assembler::bgeu
+-};
+-
+-static float_conditional_branch_insn float_conditional_branches[] =
+-{
+-  /* FLOAT SHORT branches */
+-  (float_conditional_branch_insn)&MacroAssembler::float_beq,
+-  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
+-  NULL,  // BoolTest::overflow
+-  (float_conditional_branch_insn)&MacroAssembler::float_blt,
+-  (float_conditional_branch_insn)&MacroAssembler::float_bne,
+-  (float_conditional_branch_insn)&MacroAssembler::float_ble,
+-  NULL, // BoolTest::no_overflow
+-  (float_conditional_branch_insn)&MacroAssembler::float_bge,
+-
+-  /* DOUBLE SHORT branches */
+-  (float_conditional_branch_insn)&MacroAssembler::double_beq,
+-  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
+-  NULL,
+-  (float_conditional_branch_insn)&MacroAssembler::double_blt,
+-  (float_conditional_branch_insn)&MacroAssembler::double_bne,
+-  (float_conditional_branch_insn)&MacroAssembler::double_ble,
+-  NULL,
+-  (float_conditional_branch_insn)&MacroAssembler::double_bge
+-};
+-
+-void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
+-  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
+-         "invalid conditional branch index");
+-  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
+-}
+-
+-// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
+-// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
+-void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
+-  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
+-         "invalid float conditional branch index");
+-  int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask);
+-  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
+-    (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
+-}
+-
+-void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
+-  switch (cmpFlag) {
+-    case BoolTest::eq:
+-    case BoolTest::le:
+-      beqz(op1, L, is_far);
+-      break;
+-    case BoolTest::ne:
+-    case BoolTest::gt:
+-      bnez(op1, L, is_far);
+-      break;
+-    default:
+-      ShouldNotReachHere();
+-  }
+-}
+-
+-void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
+-  switch (cmpFlag) {
+-    case BoolTest::eq:
+-      beqz(op1, L, is_far);
+-      break;
+-    case BoolTest::ne:
+-      bnez(op1, L, is_far);
+-      break;
+-    default:
+-      ShouldNotReachHere();
+-  }
+-}
+-
+-void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
+-  Label L;
+-  cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
+-  mv(dst, src);
+-  bind(L);
+-}
+-
+-// Set dst to NaN if any NaN input.
+-void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
+-                                  bool is_double, bool is_min) {
+-  assert_different_registers(dst, src1, src2);
+-
+-  Label Done;
+-  fsflags(zr);
+-  if (is_double) {
+-    is_min ? fmin_d(dst, src1, src2)
+-           : fmax_d(dst, src1, src2);
+-    // Checking NaNs
+-    flt_d(zr, src1, src2);
+-  } else {
+-    is_min ? fmin_s(dst, src1, src2)
+-           : fmax_s(dst, src1, src2);
+-    // Checking NaNs
+-    flt_s(zr, src1, src2);
+-  }
+-
+-  frflags(t0);
+-  beqz(t0, Done);
+-
+-  // In case of NaNs
+-  is_double ? fadd_d(dst, src1, src2)
+-            : fadd_s(dst, src1, src2);
+-
+-  bind(Done);
+-}
+diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
+deleted file mode 100644
+index 90b6554af02..00000000000
+--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
++++ /dev/null
+@@ -1,141 +0,0 @@
+-/*
+- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#ifndef CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
+-#define CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
+-
+-// C2_MacroAssembler contains high-level macros for C2
+-
+- public:
+-
+-  void string_compare(Register str1, Register str2,
+-                      Register cnt1, Register cnt2, Register result,
+-                      Register tmp1, Register tmp2, Register tmp3,
+-                      int ae);
+-
+-  void string_indexof_char_short(Register str1, Register cnt1,
+-                                 Register ch, Register result,
+-                                 bool isL);
+-
+-  void string_indexof_char(Register str1, Register cnt1,
+-                           Register ch, Register result,
+-                           Register tmp1, Register tmp2,
+-                           Register tmp3, Register tmp4,
+-                           bool isL);
+-
+-  void string_indexof(Register str1, Register str2,
+-                      Register cnt1, Register cnt2,
+-                      Register tmp1, Register tmp2,
+-                      Register tmp3, Register tmp4,
+-                      Register tmp5, Register tmp6,
+-                      Register result, int ae);
+-
+-  void string_indexof_linearscan(Register haystack, Register needle,
+-                                 Register haystack_len, Register needle_len,
+-                                 Register tmp1, Register tmp2,
+-                                 Register tmp3, Register tmp4,
+-                                 int needle_con_cnt, Register result, int ae);
+-
+-  void arrays_equals(Register r1, Register r2,
+-                     Register tmp3, Register tmp4,
+-                     Register tmp5, Register tmp6,
+-                     Register result, Register cnt1,
+-                     int elem_size);
+-
+-  void string_equals(Register r1, Register r2,
+-                     Register result, Register cnt1,
+-                     int elem_size);
+-
+-  // refer to conditional_branches and float_conditional_branches
+-  static const int bool_test_bits = 3;
+-  static const int neg_cond_bits = 2;
+-  static const int unsigned_branch_mask = 1 << bool_test_bits;
+-  static const int double_branch_mask = 1 << bool_test_bits;
+-
+-  // cmp
+-  void cmp_branch(int cmpFlag,
+-                  Register op1, Register op2,
+-                  Label& label, bool is_far = false);
+-
+-  void float_cmp_branch(int cmpFlag,
+-                        FloatRegister op1, FloatRegister op2,
+-                        Label& label, bool is_far = false);
+-
+-  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
+-                                    Label& L, bool is_far = false);
+-
+-  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
+-                               Label& L, bool is_far = false);
+-
+-  void enc_cmove(int cmpFlag,
+-                 Register op1, Register op2,
+-                 Register dst, Register src);
+-
+-  void spill(Register r, bool is64, int offset) {
+-    is64 ? sd(r, Address(sp, offset))
+-         : sw(r, Address(sp, offset));
+-  }
+-
+-  void spill(FloatRegister f, bool is64, int offset) {
+-    is64 ? fsd(f, Address(sp, offset))
+-         : fsw(f, Address(sp, offset));
+-  }
+-
+-  void spill(VectorRegister v, int offset) {
+-    add(t0, sp, offset);
+-    vs1r_v(v, t0);
+-  }
+-
+-  void unspill(Register r, bool is64, int offset) {
+-    is64 ? ld(r, Address(sp, offset))
+-         : lw(r, Address(sp, offset));
+-  }
+-
+-  void unspillu(Register r, bool is64, int offset) {
+-    is64 ? ld(r, Address(sp, offset))
+-         : lwu(r, Address(sp, offset));
+-  }
+-
+-  void unspill(FloatRegister f, bool is64, int offset) {
+-    is64 ? fld(f, Address(sp, offset))
+-         : flw(f, Address(sp, offset));
+-  }
+-
+-  void unspill(VectorRegister v, int offset) {
+-    add(t0, sp, offset);
+-    vl1r_v(v, t0);
+-  }
+-
+-  void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) {
+-    assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size");
+-    unspill(v0, src_offset);
+-    spill(v0, dst_offset);
+-  }
+-
+-  void minmax_FD(FloatRegister dst,
+-                 FloatRegister src1, FloatRegister src2,
+-                 bool is_double, bool is_min);
+-
+-#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index 304b6f2b06c..d175a62aeeb 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -4125,3 +4125,1285 @@ void MacroAssembler::safepoint_ifence() {
+   ifence();
+ }
+ 
++#ifdef COMPILER2
++// short string
++// StringUTF16.indexOfChar
++// StringLatin1.indexOfChar
++void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
++                                                  Register ch, Register result,
++                                                  bool isL)
++{
++  Register ch1 = t0;
++  Register index = t1;
++
++  BLOCK_COMMENT("string_indexof_char_short {");
++
++  Label LOOP, LOOP1, LOOP4, LOOP8;
++  Label MATCH,  MATCH1, MATCH2, MATCH3,
++          MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
++
++  mv(result, -1);
++  mv(index, zr);
++
++  bind(LOOP);
++  addi(t0, index, 8);
++  ble(t0, cnt1, LOOP8);
++  addi(t0, index, 4);
++  ble(t0, cnt1, LOOP4);
++  j(LOOP1);
++
++  bind(LOOP8);
++  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
++  beq(ch, ch1, MATCH);
++  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
++  beq(ch, ch1, MATCH1);
++  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
++  beq(ch, ch1, MATCH2);
++  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
++  beq(ch, ch1, MATCH3);
++  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
++  beq(ch, ch1, MATCH4);
++  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
++  beq(ch, ch1, MATCH5);
++  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
++  beq(ch, ch1, MATCH6);
++  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
++  beq(ch, ch1, MATCH7);
++  addi(index, index, 8);
++  addi(str1, str1, isL ? 8 : 16);
++  blt(index, cnt1, LOOP);
++  j(NOMATCH);
++
++  bind(LOOP4);
++  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
++  beq(ch, ch1, MATCH);
++  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
++  beq(ch, ch1, MATCH1);
++  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
++  beq(ch, ch1, MATCH2);
++  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
++  beq(ch, ch1, MATCH3);
++  addi(index, index, 4);
++  addi(str1, str1, isL ? 4 : 8);
++  bge(index, cnt1, NOMATCH);
++
++  bind(LOOP1);
++  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
++  beq(ch, ch1, MATCH);
++  addi(index, index, 1);
++  addi(str1, str1, isL ? 1 : 2);
++  blt(index, cnt1, LOOP1);
++  j(NOMATCH);
++
++  bind(MATCH1);
++  addi(index, index, 1);
++  j(MATCH);
++
++  bind(MATCH2);
++  addi(index, index, 2);
++  j(MATCH);
++
++  bind(MATCH3);
++  addi(index, index, 3);
++  j(MATCH);
++
++  bind(MATCH4);
++  addi(index, index, 4);
++  j(MATCH);
++
++  bind(MATCH5);
++  addi(index, index, 5);
++  j(MATCH);
++
++  bind(MATCH6);
++  addi(index, index, 6);
++  j(MATCH);
++
++  bind(MATCH7);
++  addi(index, index, 7);
++
++  bind(MATCH);
++  mv(result, index);
++  bind(NOMATCH);
++  BLOCK_COMMENT("} string_indexof_char_short");
++}
++
++// StringUTF16.indexOfChar
++// StringLatin1.indexOfChar
++void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
++                                            Register ch, Register result,
++                                            Register tmp1, Register tmp2,
++                                            Register tmp3, Register tmp4,
++                                            bool isL)
++{
++  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
++  Register ch1 = t0;
++  Register orig_cnt = t1;
++  Register mask1 = tmp3;
++  Register mask2 = tmp2;
++  Register match_mask = tmp1;
++  Register trailing_char = tmp4;
++  Register unaligned_elems = tmp4;
++
++  BLOCK_COMMENT("string_indexof_char {");
++  beqz(cnt1, NOMATCH);
++
++  addi(t0, cnt1, isL ? -32 : -16);
++  bgtz(t0, DO_LONG);
++  string_indexof_char_short(str1, cnt1, ch, result, isL);
++  j(DONE);
++
++  bind(DO_LONG);
++  mv(orig_cnt, cnt1);
++  if (AvoidUnalignedAccesses) {
++    Label ALIGNED;
++    andi(unaligned_elems, str1, 0x7);
++    beqz(unaligned_elems, ALIGNED);
++    sub(unaligned_elems, unaligned_elems, 8);
++    neg(unaligned_elems, unaligned_elems);
++    if (!isL) {
++      srli(unaligned_elems, unaligned_elems, 1);
++    }
++    // do unaligned part per element
++    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
++    bgez(result, DONE);
++    mv(orig_cnt, cnt1);
++    sub(cnt1, cnt1, unaligned_elems);
++    bind(ALIGNED);
++  }
++
++  // duplicate ch
++  if (isL) {
++    slli(ch1, ch, 8);
++    orr(ch, ch1, ch);
++  }
++  slli(ch1, ch, 16);
++  orr(ch, ch1, ch);
++  slli(ch1, ch, 32);
++  orr(ch, ch1, ch);
++
++  if (!isL) {
++    slli(cnt1, cnt1, 1);
++  }
++
++  uint64_t mask0101 = UCONST64(0x0101010101010101);
++  uint64_t mask0001 = UCONST64(0x0001000100010001);
++  mv(mask1, isL ? mask0101 : mask0001);
++  uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
++  uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
++  mv(mask2, isL ? mask7f7f : mask7fff);
++
++  bind(CH1_LOOP);
++  ld(ch1, Address(str1));
++  addi(str1, str1, 8);
++  addi(cnt1, cnt1, -8);
++  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
++  bnez(match_mask, HIT);
++  bgtz(cnt1, CH1_LOOP);
++  j(NOMATCH);
++
++  bind(HIT);
++  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
++  srli(trailing_char, trailing_char, 3);
++  addi(cnt1, cnt1, 8);
++  ble(cnt1, trailing_char, NOMATCH);
++  // match case
++  if (!isL) {
++    srli(cnt1, cnt1, 1);
++    srli(trailing_char, trailing_char, 1);
++  }
++
++  sub(result, orig_cnt, cnt1);
++  add(result, result, trailing_char);
++  j(DONE);
++
++  bind(NOMATCH);
++  mv(result, -1);
++
++  bind(DONE);
++  BLOCK_COMMENT("} string_indexof_char");
++}
++
++typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
++
++// Search for needle in haystack and return index or -1
++// x10: result
++// x11: haystack
++// x12: haystack_len
++// x13: needle
++// x14: needle_len
++void MacroAssembler::string_indexof(Register haystack, Register needle,
++                                       Register haystack_len, Register needle_len,
++                                       Register tmp1, Register tmp2,
++                                       Register tmp3, Register tmp4,
++                                       Register tmp5, Register tmp6,
++                                       Register result, int ae)
++{
++  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
++
++  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
++
++  Register ch1 = t0;
++  Register ch2 = t1;
++  Register nlen_tmp = tmp1; // needle len tmp
++  Register hlen_tmp = tmp2; // haystack len tmp
++  Register result_tmp = tmp4;
++
++  bool isLL = ae == StrIntrinsicNode::LL;
++
++  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
++  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
++  int needle_chr_shift = needle_isL ? 0 : 1;
++  int haystack_chr_shift = haystack_isL ? 0 : 1;
++  int needle_chr_size = needle_isL ? 1 : 2;
++  int haystack_chr_size = haystack_isL ? 1 : 2;
++  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                   (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                     (load_chr_insn)&MacroAssembler::lhu;
++
++  BLOCK_COMMENT("string_indexof {");
++
++  // Note, inline_string_indexOf() generates checks:
++  // if (pattern.count > src.count) return -1;
++  // if (pattern.count == 0) return 0;
++
++  // We have two strings, a source string in haystack, haystack_len and a pattern string
++  // in needle, needle_len. Find the first occurence of pattern in source or return -1.
++
++  // For larger pattern and source we use a simplified Boyer Moore algorithm.
++  // With a small pattern and source we use linear scan.
++
++  // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
++  sub(result_tmp, haystack_len, needle_len);
++  // needle_len < 8, use linear scan
++  sub(t0, needle_len, 8);
++  bltz(t0, LINEARSEARCH);
++  // needle_len >= 256, use linear scan
++  sub(t0, needle_len, 256);
++  bgez(t0, LINEARSTUB);
++  // needle_len >= haystack_len/4, use linear scan
++  srli(t0, haystack_len, 2);
++  bge(needle_len, t0, LINEARSTUB);
++
++  // Boyer-Moore-Horspool introduction:
++  // The Boyer Moore alogorithm is based on the description here:-
++  //
++  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
++  //
++  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
++  // and the 'Good Suffix' rule.
++  //
++  // These rules are essentially heuristics for how far we can shift the
++  // pattern along the search string.
++  //
++  // The implementation here uses the 'Bad Character' rule only because of the
++  // complexity of initialisation for the 'Good Suffix' rule.
++  //
++  // This is also known as the Boyer-Moore-Horspool algorithm:
++  //
++  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
++  //
++  // #define ASIZE 256
++  //
++  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
++  //      int i, j;
++  //      unsigned c;
++  //      unsigned char bc[ASIZE];
++  //
++  //      /* Preprocessing */
++  //      for (i = 0; i < ASIZE; ++i)
++  //        bc[i] = m;
++  //      for (i = 0; i < m - 1; ) {
++  //        c = pattern[i];
++  //        ++i;
++  //        // c < 256 for Latin1 string, so, no need for branch
++  //        #ifdef PATTERN_STRING_IS_LATIN1
++  //        bc[c] = m - i;
++  //        #else
++  //        if (c < ASIZE) bc[c] = m - i;
++  //        #endif
++  //      }
++  //
++  //      /* Searching */
++  //      j = 0;
++  //      while (j <= n - m) {
++  //        c = src[i+j];
++  //        if (pattern[m-1] == c)
++  //          int k;
++  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
++  //          if (k < 0) return j;
++  //          // c < 256 for Latin1 string, so, no need for branch
++  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
++  //          // LL case: (c< 256) always true. Remove branch
++  //          j += bc[pattern[j+m-1]];
++  //          #endif
++  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
++  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
++  //          if (c < ASIZE)
++  //            j += bc[pattern[j+m-1]];
++  //          else
++  //            j += 1
++  //          #endif
++  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
++  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
++  //          if (c < ASIZE)
++  //            j += bc[pattern[j+m-1]];
++  //          else
++  //            j += m
++  //          #endif
++  //      }
++  //      return -1;
++  //    }
++
++  // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
++  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
++          BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
++
++  Register haystack_end = haystack_len;
++  Register skipch = tmp2;
++
++  // pattern length is >=8, so, we can read at least 1 register for cases when
++  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
++  // UL case. We'll re-read last character in inner pre-loop code to have
++  // single outer pre-loop load
++  const int firstStep = isLL ? 7 : 3;
++
++  const int ASIZE = 256;
++  const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
++
++  sub(sp, sp, ASIZE);
++
++  // init BC offset table with default value: needle_len
++  slli(t0, needle_len, 8);
++  orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
++  slli(tmp1, t0, 16);
++  orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
++  slli(tmp1, t0, 32);
++  orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
++
++  mv(ch1, sp);  // ch1 is t0
++  mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
++
++  bind(BM_INIT_LOOP);
++  // for (i = 0; i < ASIZE; ++i)
++  //   bc[i] = m;
++  for (int i = 0; i < 4; i++) {
++    sd(tmp5, Address(ch1, i * wordSize));
++  }
++  add(ch1, ch1, 32);
++  sub(tmp6, tmp6, 4);
++  bgtz(tmp6, BM_INIT_LOOP);
++
++  sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
++  Register orig_haystack = tmp5;
++  mv(orig_haystack, haystack);
++  // result_tmp = tmp4
++  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
++  sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
++  mv(tmp3, needle);
++
++  //  for (i = 0; i < m - 1; ) {
++  //    c = pattern[i];
++  //    ++i;
++  //    // c < 256 for Latin1 string, so, no need for branch
++  //    #ifdef PATTERN_STRING_IS_LATIN1
++  //    bc[c] = m - i;
++  //    #else
++  //    if (c < ASIZE) bc[c] = m - i;
++  //    #endif
++  //  }
++  bind(BCLOOP);
++  (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
++  add(tmp3, tmp3, needle_chr_size);
++  if (!needle_isL) {
++    // ae == StrIntrinsicNode::UU
++    mv(tmp6, ASIZE);
++    bgeu(ch1, tmp6, BCSKIP);
++  }
++  add(tmp4, sp, ch1);
++  sb(ch2, Address(tmp4)); // store skip offset to BC offset table
++
++  bind(BCSKIP);
++  sub(ch2, ch2, 1); // for next pattern element, skip distance -1
++  bgtz(ch2, BCLOOP);
++
++  // tmp6: pattern end, address after needle
++  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
++  if (needle_isL == haystack_isL) {
++    // load last 8 bytes (8LL/4UU symbols)
++    ld(tmp6, Address(tmp6, -wordSize));
++  } else {
++    // UL: from UTF-16(source) search Latin1(pattern)
++    lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
++    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
++    // We'll have to wait until load completed, but it's still faster than per-character loads+checks
++    srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
++    slli(ch2, tmp6, XLEN - 24);
++    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
++    slli(ch1, tmp6, XLEN - 16);
++    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
++    andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
++    slli(ch2, ch2, 16);
++    orr(ch2, ch2, ch1); // 0x00000b0c
++    slli(result, tmp3, 48); // use result as temp register
++    orr(tmp6, tmp6, result); // 0x0a00000d
++    slli(result, ch2, 16);
++    orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
++  }
++
++  // i = m - 1;
++  // skipch = j + i;
++  // if (skipch == pattern[m - 1]
++  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
++  // else
++  //   move j with bad char offset table
++  bind(BMLOOPSTR2);
++  // compare pattern to source string backward
++  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
++  (this->*haystack_load_1chr)(skipch, Address(result), noreg);
++  sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
++  if (needle_isL == haystack_isL) {
++    // re-init tmp3. It's for free because it's executed in parallel with
++    // load above. Alternative is to initialize it before loop, but it'll
++    // affect performance on in-order systems with 2 or more ld/st pipelines
++    srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
++  }
++  if (!isLL) { // UU/UL case
++    slli(ch2, nlen_tmp, 1); // offsets in bytes
++  }
++  bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
++  add(result, haystack, isLL ? nlen_tmp : ch2);
++  ld(ch2, Address(result)); // load 8 bytes from source string
++  mv(ch1, tmp6);
++  if (isLL) {
++    j(BMLOOPSTR1_AFTER_LOAD);
++  } else {
++    sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
++    j(BMLOOPSTR1_CMP);
++  }
++
++  bind(BMLOOPSTR1);
++  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
++  (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
++  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
++  (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++
++  bind(BMLOOPSTR1_AFTER_LOAD);
++  sub(nlen_tmp, nlen_tmp, 1);
++  bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
++
++  bind(BMLOOPSTR1_CMP);
++  beq(ch1, ch2, BMLOOPSTR1);
++
++  bind(BMSKIP);
++  if (!isLL) {
++    // if we've met UTF symbol while searching Latin1 pattern, then we can
++    // skip needle_len symbols
++    if (needle_isL != haystack_isL) {
++      mv(result_tmp, needle_len);
++    } else {
++      mv(result_tmp, 1);
++    }
++    mv(t0, ASIZE);
++    bgeu(skipch, t0, BMADV);
++  }
++  add(result_tmp, sp, skipch);
++  lbu(result_tmp, Address(result_tmp)); // load skip offset
++
++  bind(BMADV);
++  sub(nlen_tmp, needle_len, 1);
++  // move haystack after bad char skip offset
++  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
++  ble(haystack, haystack_end, BMLOOPSTR2);
++  add(sp, sp, ASIZE);
++  j(NOMATCH);
++
++  bind(BMLOOPSTR1_LASTCMP);
++  bne(ch1, ch2, BMSKIP);
++
++  bind(BMMATCH);
++  sub(result, haystack, orig_haystack);
++  if (!haystack_isL) {
++    srli(result, result, 1);
++  }
++  add(sp, sp, ASIZE);
++  j(DONE);
++
++  bind(LINEARSTUB);
++  sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
++  bltz(t0, LINEARSEARCH);
++  mv(result, zr);
++  RuntimeAddress stub = NULL;
++  if (isLL) {
++    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
++    assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
++  } else if (needle_isL) {
++    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
++    assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
++  } else {
++    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
++    assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
++  }
++  trampoline_call(stub);
++  j(DONE);
++
++  bind(NOMATCH);
++  mv(result, -1);
++  j(DONE);
++
++  bind(LINEARSEARCH);
++  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
++
++  bind(DONE);
++  BLOCK_COMMENT("} string_indexof");
++}
++
++// string_indexof
++// result: x10
++// src: x11
++// src_count: x12
++// pattern: x13
++// pattern_count: x14 or 1/2/3/4
++void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
++                                                  Register haystack_len, Register needle_len,
++                                                  Register tmp1, Register tmp2,
++                                                  Register tmp3, Register tmp4,
++                                                  int needle_con_cnt, Register result, int ae)
++{
++  // Note:
++  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
++  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
++  assert(needle_con_cnt <= 4, "Invalid needle constant count");
++  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
++
++  Register ch1 = t0;
++  Register ch2 = t1;
++  Register hlen_neg = haystack_len, nlen_neg = needle_len;
++  Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
++
++  bool isLL = ae == StrIntrinsicNode::LL;
++
++  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
++  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
++  int needle_chr_shift = needle_isL ? 0 : 1;
++  int haystack_chr_shift = haystack_isL ? 0 : 1;
++  int needle_chr_size = needle_isL ? 1 : 2;
++  int haystack_chr_size = haystack_isL ? 1 : 2;
++
++  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                   (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                     (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
++  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
++
++  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
++
++  Register first = tmp3;
++
++  if (needle_con_cnt == -1) {
++    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
++
++    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
++    bltz(t0, DOSHORT);
++
++    (this->*needle_load_1chr)(first, Address(needle), noreg);
++    slli(t0, needle_len, needle_chr_shift);
++    add(needle, needle, t0);
++    neg(nlen_neg, t0);
++    slli(t0, result_tmp, haystack_chr_shift);
++    add(haystack, haystack, t0);
++    neg(hlen_neg, t0);
++
++    bind(FIRST_LOOP);
++    add(t0, haystack, hlen_neg);
++    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
++    beq(first, ch2, STR1_LOOP);
++
++    bind(STR2_NEXT);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, FIRST_LOOP);
++    j(NOMATCH);
++
++    bind(STR1_LOOP);
++    add(nlen_tmp, nlen_neg, needle_chr_size);
++    add(hlen_tmp, hlen_neg, haystack_chr_size);
++    bgez(nlen_tmp, MATCH);
++
++    bind(STR1_NEXT);
++    add(ch1, needle, nlen_tmp);
++    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
++    add(ch2, haystack, hlen_tmp);
++    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++    bne(ch1, ch2, STR2_NEXT);
++    add(nlen_tmp, nlen_tmp, needle_chr_size);
++    add(hlen_tmp, hlen_tmp, haystack_chr_size);
++    bltz(nlen_tmp, STR1_NEXT);
++    j(MATCH);
++
++    bind(DOSHORT);
++    if (needle_isL == haystack_isL) {
++      sub(t0, needle_len, 2);
++      bltz(t0, DO1);
++      bgtz(t0, DO3);
++    }
++  }
++
++  if (needle_con_cnt == 4) {
++    Label CH1_LOOP;
++    (this->*load_4chr)(ch1, Address(needle), noreg);
++    sub(result_tmp, haystack_len, 4);
++    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
++    add(haystack, haystack, tmp3);
++    neg(hlen_neg, tmp3);
++
++    bind(CH1_LOOP);
++    add(ch2, haystack, hlen_neg);
++    (this->*load_4chr)(ch2, Address(ch2), noreg);
++    beq(ch1, ch2, MATCH);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, CH1_LOOP);
++    j(NOMATCH);
++  }
++
++  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
++    Label CH1_LOOP;
++    BLOCK_COMMENT("string_indexof DO2 {");
++    bind(DO2);
++    (this->*load_2chr)(ch1, Address(needle), noreg);
++    if (needle_con_cnt == 2) {
++      sub(result_tmp, haystack_len, 2);
++    }
++    slli(tmp3, result_tmp, haystack_chr_shift);
++    add(haystack, haystack, tmp3);
++    neg(hlen_neg, tmp3);
++
++    bind(CH1_LOOP);
++    add(tmp3, haystack, hlen_neg);
++    (this->*load_2chr)(ch2, Address(tmp3), noreg);
++    beq(ch1, ch2, MATCH);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, CH1_LOOP);
++    j(NOMATCH);
++    BLOCK_COMMENT("} string_indexof DO2");
++  }
++
++  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
++    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
++    BLOCK_COMMENT("string_indexof DO3 {");
++
++    bind(DO3);
++    (this->*load_2chr)(first, Address(needle), noreg);
++    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
++    if (needle_con_cnt == 3) {
++      sub(result_tmp, haystack_len, 3);
++    }
++    slli(hlen_tmp, result_tmp, haystack_chr_shift);
++    add(haystack, haystack, hlen_tmp);
++    neg(hlen_neg, hlen_tmp);
++
++    bind(FIRST_LOOP);
++    add(ch2, haystack, hlen_neg);
++    (this->*load_2chr)(ch2, Address(ch2), noreg);
++    beq(first, ch2, STR1_LOOP);
++
++    bind(STR2_NEXT);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, FIRST_LOOP);
++    j(NOMATCH);
++
++    bind(STR1_LOOP);
++    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
++    add(ch2, haystack, hlen_tmp);
++    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++    bne(ch1, ch2, STR2_NEXT);
++    j(MATCH);
++    BLOCK_COMMENT("} string_indexof DO3");
++  }
++
++  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
++    Label DO1_LOOP;
++
++    BLOCK_COMMENT("string_indexof DO1 {");
++    bind(DO1);
++    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
++    sub(result_tmp, haystack_len, 1);
++    mv(tmp3, result_tmp);
++    if (haystack_chr_shift) {
++      slli(tmp3, result_tmp, haystack_chr_shift);
++    }
++    add(haystack, haystack, tmp3);
++    neg(hlen_neg, tmp3);
++
++    bind(DO1_LOOP);
++    add(tmp3, haystack, hlen_neg);
++    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
++    beq(ch1, ch2, MATCH);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, DO1_LOOP);
++    BLOCK_COMMENT("} string_indexof DO1");
++  }
++
++  bind(NOMATCH);
++  mv(result, -1);
++  j(DONE);
++
++  bind(MATCH);
++  srai(t0, hlen_neg, haystack_chr_shift);
++  add(result, result_tmp, t0);
++
++  bind(DONE);
++}
++
++// Compare strings.
++void MacroAssembler::string_compare(Register str1, Register str2,
++                                       Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
++                                       Register tmp3, int ae)
++{
++  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
++          DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
++          SHORT_LOOP_START, TAIL_CHECK, L;
++
++  const int STUB_THRESHOLD = 64 + 8;
++  bool isLL = ae == StrIntrinsicNode::LL;
++  bool isLU = ae == StrIntrinsicNode::LU;
++  bool isUL = ae == StrIntrinsicNode::UL;
++
++  bool str1_isL = isLL || isLU;
++  bool str2_isL = isLL || isUL;
++
++  // for L strings, 1 byte for 1 character
++  // for U strings, 2 bytes for 1 character
++  int str1_chr_size = str1_isL ? 1 : 2;
++  int str2_chr_size = str2_isL ? 1 : 2;
++  int minCharsInWord = isLL ? wordSize : wordSize / 2;
++
++  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
++
++  BLOCK_COMMENT("string_compare {");
++
++  // Bizzarely, the counts are passed in bytes, regardless of whether they
++  // are L or U strings, however the result is always in characters.
++  if (!str1_isL) {
++    sraiw(cnt1, cnt1, 1);
++  }
++  if (!str2_isL) {
++    sraiw(cnt2, cnt2, 1);
++  }
++
++  // Compute the minimum of the string lengths and save the difference in result.
++  sub(result, cnt1, cnt2);
++  bgt(cnt1, cnt2, L);
++  mv(cnt2, cnt1);
++  bind(L);
++
++  // A very short string
++  li(t0, minCharsInWord);
++  ble(cnt2, t0, SHORT_STRING);
++
++  // Compare longwords
++  // load first parts of strings and finish initialization while loading
++  {
++    if (str1_isL == str2_isL) { // LL or UU
++      // load 8 bytes once to compare
++      ld(tmp1, Address(str1));
++      beq(str1, str2, DONE);
++      ld(tmp2, Address(str2));
++      li(t0, STUB_THRESHOLD);
++      bge(cnt2, t0, STUB);
++      sub(cnt2, cnt2, minCharsInWord);
++      beqz(cnt2, TAIL_CHECK);
++      // convert cnt2 from characters to bytes
++      if (!str1_isL) {
++        slli(cnt2, cnt2, 1);
++      }
++      add(str2, str2, cnt2);
++      add(str1, str1, cnt2);
++      sub(cnt2, zr, cnt2);
++    } else if (isLU) { // LU case
++      lwu(tmp1, Address(str1));
++      ld(tmp2, Address(str2));
++      li(t0, STUB_THRESHOLD);
++      bge(cnt2, t0, STUB);
++      addi(cnt2, cnt2, -4);
++      add(str1, str1, cnt2);
++      sub(cnt1, zr, cnt2);
++      slli(cnt2, cnt2, 1);
++      add(str2, str2, cnt2);
++      inflate_lo32(tmp3, tmp1);
++      mv(tmp1, tmp3);
++      sub(cnt2, zr, cnt2);
++      addi(cnt1, cnt1, 4);
++    } else { // UL case
++      ld(tmp1, Address(str1));
++      lwu(tmp2, Address(str2));
++      li(t0, STUB_THRESHOLD);
++      bge(cnt2, t0, STUB);
++      addi(cnt2, cnt2, -4);
++      slli(t0, cnt2, 1);
++      sub(cnt1, zr, t0);
++      add(str1, str1, t0);
++      add(str2, str2, cnt2);
++      inflate_lo32(tmp3, tmp2);
++      mv(tmp2, tmp3);
++      sub(cnt2, zr, cnt2);
++      addi(cnt1, cnt1, 8);
++    }
++    addi(cnt2, cnt2, isUL ? 4 : 8);
++    bgez(cnt2, TAIL);
++    xorr(tmp3, tmp1, tmp2);
++    bnez(tmp3, DIFFERENCE);
++
++    // main loop
++    bind(NEXT_WORD);
++    if (str1_isL == str2_isL) { // LL or UU
++      add(t0, str1, cnt2);
++      ld(tmp1, Address(t0));
++      add(t0, str2, cnt2);
++      ld(tmp2, Address(t0));
++      addi(cnt2, cnt2, 8);
++    } else if (isLU) { // LU case
++      add(t0, str1, cnt1);
++      lwu(tmp1, Address(t0));
++      add(t0, str2, cnt2);
++      ld(tmp2, Address(t0));
++      addi(cnt1, cnt1, 4);
++      inflate_lo32(tmp3, tmp1);
++      mv(tmp1, tmp3);
++      addi(cnt2, cnt2, 8);
++    } else { // UL case
++      add(t0, str2, cnt2);
++      lwu(tmp2, Address(t0));
++      add(t0, str1, cnt1);
++      ld(tmp1, Address(t0));
++      inflate_lo32(tmp3, tmp2);
++      mv(tmp2, tmp3);
++      addi(cnt1, cnt1, 8);
++      addi(cnt2, cnt2, 4);
++    }
++    bgez(cnt2, TAIL);
++
++    xorr(tmp3, tmp1, tmp2);
++    beqz(tmp3, NEXT_WORD);
++    j(DIFFERENCE);
++    bind(TAIL);
++    xorr(tmp3, tmp1, tmp2);
++    bnez(tmp3, DIFFERENCE);
++    // Last longword.  In the case where length == 4 we compare the
++    // same longword twice, but that's still faster than another
++    // conditional branch.
++    if (str1_isL == str2_isL) { // LL or UU
++      ld(tmp1, Address(str1));
++      ld(tmp2, Address(str2));
++    } else if (isLU) { // LU case
++      lwu(tmp1, Address(str1));
++      ld(tmp2, Address(str2));
++      inflate_lo32(tmp3, tmp1);
++      mv(tmp1, tmp3);
++    } else { // UL case
++      lwu(tmp2, Address(str2));
++      ld(tmp1, Address(str1));
++      inflate_lo32(tmp3, tmp2);
++      mv(tmp2, tmp3);
++    }
++    bind(TAIL_CHECK);
++    xorr(tmp3, tmp1, tmp2);
++    beqz(tmp3, DONE);
++
++    // Find the first different characters in the longwords and
++    // compute their difference.
++    bind(DIFFERENCE);
++    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
++    srl(tmp1, tmp1, result);
++    srl(tmp2, tmp2, result);
++    if (isLL) {
++      andi(tmp1, tmp1, 0xFF);
++      andi(tmp2, tmp2, 0xFF);
++    } else {
++      andi(tmp1, tmp1, 0xFFFF);
++      andi(tmp2, tmp2, 0xFFFF);
++    }
++    sub(result, tmp1, tmp2);
++    j(DONE);
++  }
++
++  bind(STUB);
++  RuntimeAddress stub = NULL;
++  switch (ae) {
++    case StrIntrinsicNode::LL:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
++      break;
++    case StrIntrinsicNode::UU:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
++      break;
++    case StrIntrinsicNode::LU:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
++      break;
++    case StrIntrinsicNode::UL:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
++  trampoline_call(stub);
++  j(DONE);
++
++  bind(SHORT_STRING);
++  // Is the minimum length zero?
++  beqz(cnt2, DONE);
++  // arrange code to do most branches while loading and loading next characters
++  // while comparing previous
++  (this->*str1_load_chr)(tmp1, Address(str1), t0);
++  addi(str1, str1, str1_chr_size);
++  addi(cnt2, cnt2, -1);
++  beqz(cnt2, SHORT_LAST_INIT);
++  (this->*str2_load_chr)(cnt1, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  j(SHORT_LOOP_START);
++  bind(SHORT_LOOP);
++  addi(cnt2, cnt2, -1);
++  beqz(cnt2, SHORT_LAST);
++  bind(SHORT_LOOP_START);
++  (this->*str1_load_chr)(tmp2, Address(str1), t0);
++  addi(str1, str1, str1_chr_size);
++  (this->*str2_load_chr)(t0, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
++  addi(cnt2, cnt2, -1);
++  beqz(cnt2, SHORT_LAST2);
++  (this->*str1_load_chr)(tmp1, Address(str1), t0);
++  addi(str1, str1, str1_chr_size);
++  (this->*str2_load_chr)(cnt1, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  beq(tmp2, t0, SHORT_LOOP);
++  sub(result, tmp2, t0);
++  j(DONE);
++  bind(SHORT_LOOP_TAIL);
++  sub(result, tmp1, cnt1);
++  j(DONE);
++  bind(SHORT_LAST2);
++  beq(tmp2, t0, DONE);
++  sub(result, tmp2, t0);
++
++  j(DONE);
++  bind(SHORT_LAST_INIT);
++  (this->*str2_load_chr)(cnt1, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  bind(SHORT_LAST);
++  beq(tmp1, cnt1, DONE);
++  sub(result, tmp1, cnt1);
++
++  bind(DONE);
++
++  BLOCK_COMMENT("} string_compare");
++}
++
++void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
++                                      Register tmp4, Register tmp5, Register tmp6, Register result,
++                                      Register cnt1, int elem_size) {
++  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
++  Register tmp1 = t0;
++  Register tmp2 = t1;
++  Register cnt2 = tmp2;  // cnt2 only used in array length compare
++  Register elem_per_word = tmp6;
++  int log_elem_size = exact_log2(elem_size);
++  int length_offset = arrayOopDesc::length_offset_in_bytes();
++  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
++
++  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
++  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
++  li(elem_per_word, wordSize / elem_size);
++
++  BLOCK_COMMENT("arrays_equals {");
++
++  // if (a1 == a2), return true
++  beq(a1, a2, SAME);
++
++  mv(result, false);
++  beqz(a1, DONE);
++  beqz(a2, DONE);
++  lwu(cnt1, Address(a1, length_offset));
++  lwu(cnt2, Address(a2, length_offset));
++  bne(cnt2, cnt1, DONE);
++  beqz(cnt1, SAME);
++
++  slli(tmp5, cnt1, 3 + log_elem_size);
++  sub(tmp5, zr, tmp5);
++  add(a1, a1, base_offset);
++  add(a2, a2, base_offset);
++  ld(tmp3, Address(a1, 0));
++  ld(tmp4, Address(a2, 0));
++  ble(cnt1, elem_per_word, SHORT); // short or same
++
++  // Main 16 byte comparison loop with 2 exits
++  bind(NEXT_DWORD); {
++    ld(tmp1, Address(a1, wordSize));
++    ld(tmp2, Address(a2, wordSize));
++    sub(cnt1, cnt1, 2 * wordSize / elem_size);
++    blez(cnt1, TAIL);
++    bne(tmp3, tmp4, DONE);
++    ld(tmp3, Address(a1, 2 * wordSize));
++    ld(tmp4, Address(a2, 2 * wordSize));
++    add(a1, a1, 2 * wordSize);
++    add(a2, a2, 2 * wordSize);
++    ble(cnt1, elem_per_word, TAIL2);
++  } beq(tmp1, tmp2, NEXT_DWORD);
++  j(DONE);
++
++  bind(TAIL);
++  xorr(tmp4, tmp3, tmp4);
++  xorr(tmp2, tmp1, tmp2);
++  sll(tmp2, tmp2, tmp5);
++  orr(tmp5, tmp4, tmp2);
++  j(IS_TMP5_ZR);
++
++  bind(TAIL2);
++  bne(tmp1, tmp2, DONE);
++
++  bind(SHORT);
++  xorr(tmp4, tmp3, tmp4);
++  sll(tmp5, tmp4, tmp5);
++
++  bind(IS_TMP5_ZR);
++  bnez(tmp5, DONE);
++
++  bind(SAME);
++  mv(result, true);
++  // That's it.
++  bind(DONE);
++
++  BLOCK_COMMENT("} array_equals");
++}
++
++// Compare Strings
++
++// For Strings we're passed the address of the first characters in a1
++// and a2 and the length in cnt1.
++// elem_size is the element size in bytes: either 1 or 2.
++// There are two implementations.  For arrays >= 8 bytes, all
++// comparisons (including the final one, which may overlap) are
++// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
++// halfword, then a short, and then a byte.
++
++void MacroAssembler::string_equals(Register a1, Register a2,
++                                      Register result, Register cnt1, int elem_size)
++{
++  Label SAME, DONE, SHORT, NEXT_WORD;
++  Register tmp1 = t0;
++  Register tmp2 = t1;
++
++  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
++  assert_different_registers(a1, a2, result, cnt1, t0, t1);
++
++  BLOCK_COMMENT("string_equals {");
++
++  mv(result, false);
++
++  // Check for short strings, i.e. smaller than wordSize.
++  sub(cnt1, cnt1, wordSize);
++  bltz(cnt1, SHORT);
++
++  // Main 8 byte comparison loop.
++  bind(NEXT_WORD); {
++    ld(tmp1, Address(a1, 0));
++    add(a1, a1, wordSize);
++    ld(tmp2, Address(a2, 0));
++    add(a2, a2, wordSize);
++    sub(cnt1, cnt1, wordSize);
++    bne(tmp1, tmp2, DONE);
++  } bgtz(cnt1, NEXT_WORD);
++
++  // Last longword.  In the case where length == 4 we compare the
++  // same longword twice, but that's still faster than another
++  // conditional branch.
++  // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
++  // length == 4.
++  add(tmp1, a1, cnt1);
++  ld(tmp1, Address(tmp1, 0));
++  add(tmp2, a2, cnt1);
++  ld(tmp2, Address(tmp2, 0));
++  bne(tmp1, tmp2, DONE);
++  j(SAME);
++
++  bind(SHORT);
++  Label TAIL03, TAIL01;
++
++  // 0-7 bytes left.
++  andi(t0, cnt1, 4);
++  beqz(t0, TAIL03);
++  {
++    lwu(tmp1, Address(a1, 0));
++    add(a1, a1, 4);
++    lwu(tmp2, Address(a2, 0));
++    add(a2, a2, 4);
++    bne(tmp1, tmp2, DONE);
++  }
++
++  bind(TAIL03);
++  // 0-3 bytes left.
++  andi(t0, cnt1, 2);
++  beqz(t0, TAIL01);
++  {
++    lhu(tmp1, Address(a1, 0));
++    add(a1, a1, 2);
++    lhu(tmp2, Address(a2, 0));
++    add(a2, a2, 2);
++    bne(tmp1, tmp2, DONE);
++  }
++
++  bind(TAIL01);
++  if (elem_size == 1) { // Only needed when comparing 1-byte elements
++    // 0-1 bytes left.
++    andi(t0, cnt1, 1);
++    beqz(t0, SAME);
++    {
++      lbu(tmp1, a1, 0);
++      lbu(tmp2, a2, 0);
++      bne(tmp1, tmp2, DONE);
++    }
++  }
++
++  // Arrays are equal.
++  bind(SAME);
++  mv(result, true);
++
++  // That's it.
++  bind(DONE);
++  BLOCK_COMMENT("} string_equals");
++}
++
++typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
++typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
++                                                              bool is_far, bool is_unordered);
++
++static conditional_branch_insn conditional_branches[] =
++{
++  /* SHORT branches */
++  (conditional_branch_insn)&Assembler::beq,
++  (conditional_branch_insn)&Assembler::bgt,
++  NULL, // BoolTest::overflow
++  (conditional_branch_insn)&Assembler::blt,
++  (conditional_branch_insn)&Assembler::bne,
++  (conditional_branch_insn)&Assembler::ble,
++  NULL, // BoolTest::no_overflow
++  (conditional_branch_insn)&Assembler::bge,
++
++  /* UNSIGNED branches */
++  (conditional_branch_insn)&Assembler::beq,
++  (conditional_branch_insn)&Assembler::bgtu,
++  NULL,
++  (conditional_branch_insn)&Assembler::bltu,
++  (conditional_branch_insn)&Assembler::bne,
++  (conditional_branch_insn)&Assembler::bleu,
++  NULL,
++  (conditional_branch_insn)&Assembler::bgeu
++};
++
++static float_conditional_branch_insn float_conditional_branches[] =
++{
++  /* FLOAT SHORT branches */
++  (float_conditional_branch_insn)&MacroAssembler::float_beq,
++  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
++  NULL,  // BoolTest::overflow
++  (float_conditional_branch_insn)&MacroAssembler::float_blt,
++  (float_conditional_branch_insn)&MacroAssembler::float_bne,
++  (float_conditional_branch_insn)&MacroAssembler::float_ble,
++  NULL, // BoolTest::no_overflow
++  (float_conditional_branch_insn)&MacroAssembler::float_bge,
++
++  /* DOUBLE SHORT branches */
++  (float_conditional_branch_insn)&MacroAssembler::double_beq,
++  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
++  NULL,
++  (float_conditional_branch_insn)&MacroAssembler::double_blt,
++  (float_conditional_branch_insn)&MacroAssembler::double_bne,
++  (float_conditional_branch_insn)&MacroAssembler::double_ble,
++  NULL,
++  (float_conditional_branch_insn)&MacroAssembler::double_bge
++};
++
++void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
++  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
++         "invalid conditional branch index");
++  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
++}
++
++// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
++// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
++void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
++  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
++         "invalid float conditional branch index");
++  int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask);
++  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
++                                               (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
++}
++
++void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
++  switch (cmpFlag) {
++    case BoolTest::eq:
++    case BoolTest::le:
++      beqz(op1, L, is_far);
++      break;
++    case BoolTest::ne:
++    case BoolTest::gt:
++      bnez(op1, L, is_far);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
++  switch (cmpFlag) {
++    case BoolTest::eq:
++      beqz(op1, L, is_far);
++      break;
++    case BoolTest::ne:
++      bnez(op1, L, is_far);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
++  Label L;
++  cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
++  mv(dst, src);
++  bind(L);
++}
++
++// Set dst to NaN if any NaN input.
++void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
++                                  bool is_double, bool is_min) {
++  assert_different_registers(dst, src1, src2);
++
++  Label Done;
++  fsflags(zr);
++  if (is_double) {
++    is_min ? fmin_d(dst, src1, src2)
++           : fmax_d(dst, src1, src2);
++    // Checking NaNs
++    flt_d(zr, src1, src2);
++  } else {
++    is_min ? fmin_s(dst, src1, src2)
++           : fmax_s(dst, src1, src2);
++    // Checking NaNs
++    flt_s(zr, src1, src2);
++  }
++
++  frflags(t0);
++  beqz(t0, Done);
++
++  // In case of NaNs
++  is_double ? fadd_d(dst, src1, src2)
++            : fadd_s(dst, src1, src2);
++
++  bind(Done);
++}
++
++#endif // COMPILER2
++
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index c6b71bdbc3c..2ef28771e2e 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -851,6 +851,109 @@ class MacroAssembler: public Assembler {
+   void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
+   void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
+ 
++public:
++  void string_compare(Register str1, Register str2,
++                      Register cnt1, Register cnt2, Register result,
++                      Register tmp1, Register tmp2, Register tmp3,
++                      int ae);
++
++  void string_indexof_char_short(Register str1, Register cnt1,
++                                 Register ch, Register result,
++                                 bool isL);
++
++  void string_indexof_char(Register str1, Register cnt1,
++                           Register ch, Register result,
++                           Register tmp1, Register tmp2,
++                           Register tmp3, Register tmp4,
++                           bool isL);
++
++  void string_indexof(Register str1, Register str2,
++                      Register cnt1, Register cnt2,
++                      Register tmp1, Register tmp2,
++                      Register tmp3, Register tmp4,
++                      Register tmp5, Register tmp6,
++                      Register result, int ae);
++
++  void string_indexof_linearscan(Register haystack, Register needle,
++                                 Register haystack_len, Register needle_len,
++                                 Register tmp1, Register tmp2,
++                                 Register tmp3, Register tmp4,
++                                 int needle_con_cnt, Register result, int ae);
++
++  void arrays_equals(Register r1, Register r2,
++                     Register tmp3, Register tmp4,
++                     Register tmp5, Register tmp6,
++                     Register result, Register cnt1,
++                     int elem_size);
++
++  void string_equals(Register r1, Register r2,
++                     Register result, Register cnt1,
++                     int elem_size);
++
++  // refer to conditional_branches and float_conditional_branches
++  static const int bool_test_bits = 3;
++  static const int neg_cond_bits = 2;
++  static const int unsigned_branch_mask = 1 << bool_test_bits;
++  static const int double_branch_mask = 1 << bool_test_bits;
 +
-+  // Return address:
-+  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
-+  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
++  // cmp
++  void cmp_branch(int cmpFlag,
++                  Register op1, Register op2,
++                  Label& label, bool is_far = false);
 +
-+  // return address of param, zero origin index.
-+  public Address getNativeParamAddr(int idx) {
-+    return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
++  void float_cmp_branch(int cmpFlag,
++                        FloatRegister op1, FloatRegister op2,
++                        Label& label, bool is_far = false);
++
++  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
++                                    Label& L, bool is_far = false);
++
++  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
++                               Label& L, bool is_far = false);
++
++  void enc_cmove(int cmpFlag,
++                 Register op1, Register op2,
++                 Register dst, Register src);
++
++  void spill(Register r, bool is64, int offset) {
++    is64 ? sd(r, Address(sp, offset))
++         : sw(r, Address(sp, offset));
 +  }
 +
-+  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
++  void spill(FloatRegister f, bool is64, int offset) {
++    is64 ? fsd(f, Address(sp, offset))
++         : fsw(f, Address(sp, offset));
++  }
 +
-+  public Address addressOfInterpreterFrameLocals() {
-+    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
++  void spill(VectorRegister v, int offset) {
++    add(t0, sp, offset);
++    vs1r_v(v, t0);
++  }
++
++  void unspill(Register r, bool is64, int offset) {
++    is64 ? ld(r, Address(sp, offset))
++         : lw(r, Address(sp, offset));
++  }
++
++  void unspillu(Register r, bool is64, int offset) {
++    is64 ? ld(r, Address(sp, offset))
++         : lwu(r, Address(sp, offset));
 +  }
 +
-+  private Address addressOfInterpreterFrameBCX() {
-+    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
-+  }
++  void unspill(FloatRegister f, bool is64, int offset) {
++    is64 ? fld(f, Address(sp, offset))
++         : flw(f, Address(sp, offset));
++  }
++
++  void unspill(VectorRegister v, int offset) {
++    add(t0, sp, offset);
++    vl1r_v(v, t0);
++  }
++
++  void minmax_FD(FloatRegister dst,
++                 FloatRegister src1, FloatRegister src2,
++                 bool is_double, bool is_min);
++
+ };
+ 
+ #ifdef ASSERT
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 13546ab328b..2e7eed8fb52 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -997,7 +997,7 @@ void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+ #endif
+ 
+ void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+-  C2_MacroAssembler _masm(&cbuf);
++  MacroAssembler _masm(&cbuf);
+   Assembler::CompressibleRegion cr(&_masm);
+   __ ebreak();
+ }
+@@ -1015,7 +1015,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
+ #endif
+ 
+   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes.
+     for (int i = 0; i < _count; i++) {
+       __ nop();
+@@ -1074,7 +1074,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+   assert_cond(ra_ != NULL);
+   Compile* C = ra_->C;
+-  C2_MacroAssembler _masm(&cbuf);
++  MacroAssembler _masm(&cbuf);
+ 
+   // n.b. frame size includes space for return pc and fp
+   const int framesize = C->output()->frame_size_in_bytes();
+@@ -1150,7 +1150,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+   assert_cond(ra_ != NULL);
+   Compile* C = ra_->C;
+-  C2_MacroAssembler _masm(&cbuf);
++  MacroAssembler _masm(&cbuf);
+   assert_cond(C != NULL);
+   int framesize = C->output()->frame_size_in_bytes();
+ 
+@@ -1251,7 +1251,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
+   int dst_offset = ra_->reg2offset(dst_lo);
+ 
+   if (cbuf != NULL) {
+-    C2_MacroAssembler _masm(cbuf);
++    MacroAssembler _masm(cbuf);
+     Assembler::CompressibleRegion cr(&_masm);
+     switch (src_lo_rc) {
+       case rc_int:
+@@ -1371,7 +1371,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+ #endif
+ 
+ void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+-  C2_MacroAssembler _masm(&cbuf);
++  MacroAssembler _masm(&cbuf);
+ 
+   assert_cond(ra_ != NULL);
+   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+@@ -1422,7 +1422,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
+ void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
+ {
+   // This is the unverified entry point.
+-  C2_MacroAssembler _masm(&cbuf);
++  MacroAssembler _masm(&cbuf);
+ 
+   Label skip;
+   __ cmp_klass(j_rarg0, t1, t0, skip);
+@@ -1449,7 +1449,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
+   // j #exception_blob_entry_point
+   // Note that the code buffer's insts_mark is always relative to insts.
+   // That's why we must use the macroassembler to generate a handler.
+-  C2_MacroAssembler _masm(&cbuf);
++  MacroAssembler _masm(&cbuf);
+   address base = __ start_a_stub(size_exception_handler());
+   if (base == NULL) {
+     ciEnv::current()->record_failure("CodeCache is full");
+@@ -1467,7 +1467,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
+ {
+   // Note that the code buffer's insts_mark is always relative to insts.
+   // That's why we must use the macroassembler to generate a handler.
+-  C2_MacroAssembler _masm(&cbuf);
++  MacroAssembler _masm(&cbuf);
+   address base = __ start_a_stub(size_deopt_handler());
+   if (base == NULL) {
+     ciEnv::current()->record_failure("CodeCache is full");
+@@ -1848,7 +1848,7 @@ encode %{
+   // BEGIN Non-volatile memory access
+ 
+   enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Assembler::CompressibleRegion cr(&_masm);
+     int64_t con = (int64_t)$src$$constant;
+     Register dst_reg = as_Register($dst$$reg);
+@@ -1856,7 +1856,7 @@ encode %{
+   %}
+ 
+   enc_class riscv_enc_mov_p(iRegP dst, immP src) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Register dst_reg = as_Register($dst$$reg);
+     address con = (address)$src$$constant;
+     if (con == NULL || con == (address)1) {
+@@ -1875,7 +1875,7 @@ encode %{
+   %}
+ 
+   enc_class riscv_enc_mov_p1(iRegP dst) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Assembler::CompressibleRegion cr(&_masm);
+     Register dst_reg = as_Register($dst$$reg);
+     __ li(dst_reg, 1);
+@@ -1893,12 +1893,12 @@ encode %{
+   %}
+ 
+   enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     __ load_byte_map_base($dst$$Register);
+   %}
+ 
+   enc_class riscv_enc_mov_n(iRegN dst, immN src) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Register dst_reg = as_Register($dst$$reg);
+     address con = (address)$src$$constant;
+     if (con == NULL) {
+@@ -1911,13 +1911,13 @@ encode %{
+   %}
+ 
+   enc_class riscv_enc_mov_zero(iRegNorP dst) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Register dst_reg = as_Register($dst$$reg);
+     __ mv(dst_reg, zr);
+   %}
+ 
+   enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Register dst_reg = as_Register($dst$$reg);
+     address con = (address)$src$$constant;
+     if (con == NULL) {
+@@ -1930,42 +1930,42 @@ encode %{
+   %}
+ 
+   enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+                /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+                /*result as bool*/ true);
+   %}
+ 
+   enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+                /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+                /*result as bool*/ true);
+   %}
+ 
+   enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+                /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+                /*result as bool*/ true);
+   %}
+ 
+   enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+                /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+                /*result as bool*/ true);
+   %}
+ 
+   enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+                /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+                /*result as bool*/ true);
+   %}
+ 
+   enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+                /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+                /*result as bool*/ true);
+@@ -1974,13 +1974,13 @@ encode %{
+   // compare and branch instruction encodings
+ 
+   enc_class riscv_enc_j(label lbl) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Label* L = $lbl$$label;
+     __ j(*L);
+   %}
+ 
+   enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Label* L = $lbl$$label;
+     switch ($cmp$$cmpcode) {
+       case(BoolTest::ge):
+@@ -2004,7 +2004,7 @@ encode %{
+ 
+     Label miss;
+     Label done;
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
+                                      NULL, &miss);
+     if ($primary) {
+@@ -2023,7 +2023,7 @@ encode %{
+   %}
+ 
+   enc_class riscv_enc_java_static_call(method meth) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+ 
+     address addr = (address)$meth$$method;
+     address call = NULL;
+@@ -2055,7 +2055,7 @@ encode %{
+   %}
+ 
+   enc_class riscv_enc_java_dynamic_call(method meth) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     int method_index = resolved_method_index(cbuf);
+     address call = __ ic_call((address)$meth$$method, method_index);
+     if (call == NULL) {
+@@ -2065,7 +2065,7 @@ encode %{
+   %}
+ 
+   enc_class riscv_enc_call_epilog() %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     if (VerifyStackAtCalls) {
+       // Check that stack depth is unchanged: find majik cookie on stack
+       __ call_Unimplemented();
+@@ -2073,7 +2073,7 @@ encode %{
+   %}
+ 
+   enc_class riscv_enc_java_to_runtime(method meth) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+ 
+     // some calls to generated routines (arraycopy code) are scheduled
+     // by C2 as runtime calls. if so we can call them using a jr (they
+@@ -2102,7 +2102,7 @@ encode %{
+ 
+   // using the cr register as the bool result: 0 for success; others failed.
+   enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Register flag = t1;
+     Register oop = as_Register($object$$reg);
+     Register box = as_Register($box$$reg);
+@@ -2189,7 +2189,7 @@ encode %{
+ 
+   // using cr flag to indicate the fast_unlock result: 0 for success; others failed.
+   enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Register flag = t1;
+     Register oop = as_Register($object$$reg);
+     Register box = as_Register($box$$reg);
+@@ -2262,7 +2262,7 @@ encode %{
+   // arithmetic encodings
+ 
+   enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Register dst_reg = as_Register($dst$$reg);
+     Register src1_reg = as_Register($src1$$reg);
+     Register src2_reg = as_Register($src2$$reg);
+@@ -2270,7 +2270,7 @@ encode %{
+   %}
+ 
+   enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Register dst_reg = as_Register($dst$$reg);
+     Register src1_reg = as_Register($src1$$reg);
+     Register src2_reg = as_Register($src2$$reg);
+@@ -2278,7 +2278,7 @@ encode %{
+   %}
+ 
+   enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Register dst_reg = as_Register($dst$$reg);
+     Register src1_reg = as_Register($src1$$reg);
+     Register src2_reg = as_Register($src2$$reg);
+@@ -2286,7 +2286,7 @@ encode %{
+   %}
+ 
+   enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Register dst_reg = as_Register($dst$$reg);
+     Register src1_reg = as_Register($src1$$reg);
+     Register src2_reg = as_Register($src2$$reg);
+@@ -2294,14 +2294,14 @@ encode %{
+   %}
+ 
+   enc_class riscv_enc_tail_call(iRegP jump_target) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Assembler::CompressibleRegion cr(&_masm);
+     Register target_reg = as_Register($jump_target$$reg);
+     __ jr(target_reg);
+   %}
+ 
+   enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Assembler::CompressibleRegion cr(&_masm);
+     Register target_reg = as_Register($jump_target$$reg);
+     // exception oop should be in x10
+@@ -2312,12 +2312,12 @@ encode %{
+   %}
+ 
+   enc_class riscv_enc_rethrow() %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
+   %}
+ 
+   enc_class riscv_enc_ret() %{
+-    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
+     Assembler::CompressibleRegion cr(&_masm);
+     __ ret();
+   %}
+@@ -8506,7 +8506,7 @@ instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
+   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_branch" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                   as_Register($op2$$reg), *($lbl$$label));
+   %}
+ 
+@@ -8526,7 +8526,7 @@ instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
+   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_loop" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                   as_Register($op2$$reg), *($lbl$$label));
+   %}
+ 
+@@ -8585,7 +8585,7 @@ instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
+   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_branch" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                   as_Register($op2$$reg), *($lbl$$label));
+   %}
+ 
+@@ -8604,7 +8604,7 @@ instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
+   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_loop" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                   as_Register($op2$$reg), *($lbl$$label));
+   %}
+ 
+@@ -8625,7 +8625,7 @@ instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
+   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_branch" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                   as_Register($op2$$reg), *($lbl$$label));
+   %}
+ 
+@@ -8645,7 +8645,7 @@ instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
+   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_loop" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                   as_Register($op2$$reg), *($lbl$$label));
+   %}
+ 
+@@ -8666,7 +8666,7 @@ instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
+   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_branch" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                   as_Register($op2$$reg), *($lbl$$label));
+   %}
+ 
+@@ -8686,7 +8686,7 @@ instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
+   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_loop" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                   as_Register($op2$$reg), *($lbl$$label));
+   %}
+ 
+@@ -8741,7 +8741,7 @@ instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+   format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%}
+ 
+   ins_encode %{
+-    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+                         as_FloatRegister($op2$$reg), *($lbl$$label));
+   %}
+ 
+@@ -8759,7 +8759,7 @@ instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+   format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%}
+ 
+   ins_encode %{
+-    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+                         as_FloatRegister($op2$$reg), *($lbl$$label));
+   %}
+ 
+@@ -9080,7 +9080,7 @@ instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
+   format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+   %}
+ 
+@@ -9095,7 +9095,7 @@ instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
+   format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+   %}
+ 
+@@ -9138,7 +9138,7 @@ instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
+   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_branch" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+   %}
+ 
+@@ -9153,7 +9153,7 @@ instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
+   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_loop" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+   %}
+ 
+@@ -9171,7 +9171,7 @@ instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
+   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_branch" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+   %}
+ 
+@@ -9189,7 +9189,7 @@ instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
+   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_loop" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+   %}
+ 
+@@ -9207,7 +9207,7 @@ instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
+   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_branch" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+   %}
+ 
+@@ -9225,7 +9225,7 @@ instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
+   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_loop" %}
+ 
+   ins_encode %{
+-    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                   as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+   %}
+ 
+@@ -9276,7 +9276,7 @@ instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+   format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%}
+ 
+   ins_encode %{
+-    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+                         as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
+   %}
+ 
+@@ -9292,7 +9292,7 @@ instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+   format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%}
+ 
+   ins_encode %{
+-    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+                         as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
+   %}
+ 
+@@ -9616,7 +9616,7 @@ instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop)
+          %}
+ 
+   ins_encode %{
+-    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
++    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
+                  as_Register($op1$$reg), as_Register($op2$$reg),
+                  as_Register($dst$$reg), as_Register($src$$reg));
+   %}
+@@ -9673,7 +9673,7 @@ instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop)
+          %}
+ 
+   ins_encode %{
+-    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
++    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
+                  as_Register($op1$$reg), as_Register($op2$$reg),
+                  as_Register($dst$$reg), as_Register($src$$reg));
+   %}
+@@ -9691,7 +9691,7 @@ instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop)
+          %}
+ 
+   ins_encode %{
+-    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
++    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
+                  as_Register($op1$$reg), as_Register($op2$$reg),
+                  as_Register($dst$$reg), as_Register($src$$reg));
+   %}
+
+From 115cd21290080b157d0ca8b7080e66ebd814fbdb Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 16:15:18 +0800
+Subject: [PATCH 091/140] Revert JDK-8222297: IRT_ENTRY/IRT_LEAF etc are the
+ same as JRT && JDK-8263709: Cleanup THREAD/TRAPS/CHECK usage in JRT_ENTRY
+ routines
+
+---
+ src/hotspot/cpu/riscv/interpreterRT_riscv.cpp | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
+index d93530d8564..776b0787238 100644
+--- a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
++++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
+@@ -278,12 +278,12 @@ class SlowSignatureHandler
+ };
+ 
+ 
+-JRT_ENTRY(address,
+-          InterpreterRuntime::slow_signature_handler(JavaThread* current,
++IRT_ENTRY(address,
++          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
+                                                      Method* method,
+                                                      intptr_t* from,
+                                                      intptr_t* to))
+-  methodHandle m(current, (Method*)method);
++  methodHandle m(thread, (Method*)method);
+   assert(m->is_native(), "sanity check");
+ 
+   // handle arguments
+@@ -292,4 +292,4 @@ JRT_ENTRY(address,
+ 
+   // return result handler
+   return Interpreter::result_handler(m->result_type());
+-JRT_END
++IRT_END
+
+From 6cbf43d5f095aef93ef0bf595f51019a03cc1989 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 16:20:06 +0800
+Subject: [PATCH 092/140] Revert JDK-8245289: Clean up offset code in
+ JavaClasses
+
+---
+ src/hotspot/cpu/riscv/methodHandles_riscv.cpp  | 18 +++++++++---------
+ .../templateInterpreterGenerator_riscv.cpp     |  2 +-
+ 2 files changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+index 4442b5991b1..e070b8096a6 100644
+--- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+@@ -53,7 +53,7 @@ void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_
+     verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class),
+                  "MH argument is a Class");
+   }
+-  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset()));
++  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
+ }
+ 
+ #ifdef ASSERT
+@@ -140,13 +140,13 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
+ 
+   // Load the invoker, as MH -> MH.form -> LF.vmentry
+   __ verify_oop(recv);
+-  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2);
++  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2);
+   __ verify_oop(method_temp);
+-  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2);
+   __ verify_oop(method_temp);
+-  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2);
+   __ verify_oop(method_temp);
+-  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg);
++  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg);
+ 
+   if (VerifyMethodHandles && !for_compiler_entry) {
+     // make sure recv is already on stack
+@@ -284,10 +284,10 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
+                    "MemberName required for invokeVirtual etc.");
+     }
+ 
+-    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset()));
+-    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset()));
+-    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset()));
+-    Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset()));
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()));
++    Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes()));
+ 
+     Register temp1_recv_klass = temp1;
+     if (iid != vmIntrinsics::_linkToStatic) {
+diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+index 8aea4eca048..ce6166030b4 100644
+--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+@@ -894,7 +894,7 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
+ 
+   address entry = __ pc();
+ 
+-  const int referent_offset = java_lang_ref_Reference::referent_offset();
++  const int referent_offset = java_lang_ref_Reference::referent_offset;
+   guarantee(referent_offset > 0, "referent offset not initialized");
+ 
+   Label slow_path;
+
+From 8c9b9f4246f4ede3c31f59749f9d4bc625f106b3 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 16:30:35 +0800
+Subject: [PATCH 093/140] Revert JDK-8242629: Remove references to deprecated
+ java.util.Observer and Observable
+
+---
+ .../runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java   | 2 --
+ .../classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java   | 2 --
+ .../sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java | 2 --
+ 3 files changed, 6 deletions(-)
+
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
+index f2e224f28ee..5c2b6e0e3ea 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
+@@ -34,8 +34,6 @@
+ import sun.jvm.hotspot.runtime.riscv64.*;
+ import sun.jvm.hotspot.types.*;
+ import sun.jvm.hotspot.utilities.*;
+-import sun.jvm.hotspot.utilities.Observable;
+-import sun.jvm.hotspot.utilities.Observer;
+ 
+ public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess {
+   private static AddressField  lastJavaFPField;
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
+index df280005d72..e372bc5f7be 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
+@@ -34,8 +34,6 @@
+ import sun.jvm.hotspot.runtime.*;
+ import sun.jvm.hotspot.types.*;
+ import sun.jvm.hotspot.utilities.*;
+-import sun.jvm.hotspot.utilities.Observable;
+-import sun.jvm.hotspot.utilities.Observer;
+ 
+ /** Specialization of and implementation of abstract methods of the
+     Frame class for the riscv64 family of CPUs. */
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
+index d0ad2b559a6..850758a7ed4 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
+@@ -31,8 +31,6 @@
+ import sun.jvm.hotspot.types.*;
+ import sun.jvm.hotspot.runtime.*;
+ import sun.jvm.hotspot.utilities.*;
+-import sun.jvm.hotspot.utilities.Observable;
+-import sun.jvm.hotspot.utilities.Observer;
+ 
+ public class RISCV64JavaCallWrapper extends JavaCallWrapper {
+   private static AddressField lastJavaFPField;
+
+From 43f2a4fec6b4922fa8c187deda310ad636aeed2e Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 16:33:56 +0800
+Subject: [PATCH 094/140] Revert JDK-8256155: Allow multiple large page sizes
+ to be used on Linux
+
+---
+ src/hotspot/os/linux/os_linux.cpp | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
+index 6f75e623a9a..7fc9588301b 100644
+--- a/src/hotspot/os/linux/os_linux.cpp
++++ b/src/hotspot/os/linux/os_linux.cpp
+@@ -4078,7 +4078,8 @@ size_t os::Linux::find_large_page_size() {
+     IA64_ONLY(256 * M)
+     PPC_ONLY(4 * M)
+     S390_ONLY(1 * M)
+-    SPARC_ONLY(4 * M);
++    SPARC_ONLY(4 * M)
++    RISCV64_ONLY(2 * M);
+ #endif // ZERO
+ 
+   FILE *fp = fopen("/proc/meminfo", "r");
+
+From a93191be0155882a0f4d92bba4de9fdf4f508a4a Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 16:38:53 +0800
+Subject: [PATCH 095/140] Revert JDK-8252204: AArch64: Implement SHA3
+ accelerator/intrinsic
+
+---
+ src/hotspot/cpu/riscv/vm_version_riscv.cpp | 5 -----
+ 1 file changed, 5 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+index d4b79162d84..50ee7edb708 100644
+--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+@@ -82,11 +82,6 @@ void VM_Version::initialize() {
+     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+   }
+ 
+-  if (UseSHA3Intrinsics) {
+-    warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
+-    FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
+-  }
+-
+   if (UseCRC32Intrinsics) {
+     warning("CRC32 intrinsics are not available on this CPU.");
+     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
+
+From 29acd4f1bb99e856418f7d9d3da4f205812b1663 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 16:43:23 +0800
+Subject: [PATCH 096/140] Revert JDK-8253717: Relocate stack overflow code out
+ of thread.hpp/cpp && JDK-8255766: Fix linux+arm64 build after 8254072
+
+---
+ src/hotspot/cpu/riscv/macroAssembler_riscv.cpp               | 2 +-
+ src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp                | 4 ++--
+ src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp | 2 +-
+ 3 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index d175a62aeeb..d94074b4a3c 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -1666,7 +1666,7 @@ void MacroAssembler::bang_stack_size(Register size, Register tmp) {
+   // was post-decremented.)  Skip this address by starting at i=1, and
+   // touch a few more pages below.  N.B.  It is important to touch all
+   // the way down to and including i=StackShadowPages.
+-  for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
++  for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
+     // this could be any sized move but this is can be a debugging crumb
+     // so the bigger the better.
+     sub(tmp, tmp, os::vm_page_size());
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+index ae414224c5b..dc3ac548d73 100644
+--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -1252,7 +1252,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+   __ nop();
+ 
+   // Generate stack overflow check
+-  __ bang_stack_with_offset(checked_cast<int>(StackOverflow::stack_shadow_zone_size()));
++  __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size());
+ 
+   // Generate a new frame for the wrapper.
+   __ enter();
+@@ -1551,7 +1551,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+   Label reguard;
+   Label reguard_done;
+   __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));
+-  __ mv(t1, StackOverflow::stack_guard_yellow_reserved_disabled);
++  __ mv(t1, JavaThread::stack_guard_yellow_reserved_disabled);
+   __ beq(t0, t1, reguard);
+   __ bind(reguard_done);
+ 
+diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+index ce6166030b4..e639fa7e12f 100644
+--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+@@ -1248,7 +1248,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
+   {
+     Label no_reguard;
+     __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset())));
+-    __ addi(t1, zr, (u1)StackOverflow::stack_guard_yellow_reserved_disabled);
++    __ addi(t1, zr, (u1)JavaThread::stack_guard_yellow_reserved_disabled);
+     __ bne(t0, t1, no_reguard);
+ 
+     __ pusha(); // only save smashed registers
+
+From 6fa17c662dd2488108809e77dcff921bb475813c Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 16:50:51 +0800
+Subject: [PATCH 097/140] Revert JDK-8258459: Decouple gc_globals.hpp from
+ globals.hpp
+
+---
+ src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+index 1f4409a9c9a..84b1afc7dc6 100644
+--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+@@ -28,7 +28,6 @@
+ #include "asm/macroAssembler.inline.hpp"
+ #include "gc/shared/barrierSetAssembler.hpp"
+ #include "gc/shared/collectedHeap.hpp"
+-#include "gc/shared/tlab_globals.hpp"
+ #include "interpreter/interp_masm.hpp"
+ #include "interpreter/interpreter.hpp"
+ #include "interpreter/interpreterRuntime.hpp"
+
+From bcc26e749ccc20db5a4ba51c2cf8740a908a8a74 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 16:56:58 +0800
+Subject: [PATCH 098/140] Revert JDK-8223136: Move compressed oops functions to
+ CompressedOops class
+
+---
+ .../cpu/riscv/macroAssembler_riscv.cpp        | 64 +++++++++----------
+ .../cpu/riscv/macroAssembler_riscv.hpp        |  1 -
+ src/hotspot/cpu/riscv/riscv.ad                | 10 +--
+ 3 files changed, 37 insertions(+), 38 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index d94074b4a3c..becc1656358 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -1318,10 +1318,10 @@ int MacroAssembler::patch_oop(address insn_addr, address o) {
+ void MacroAssembler::reinit_heapbase() {
+   if (UseCompressedOops) {
+     if (Universe::is_fully_initialized()) {
+-      mv(xheapbase, CompressedOops::ptrs_base());
++      mv(xheapbase, Universe::narrow_ptrs_base());
+     } else {
+       int32_t offset = 0;
+-      la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset);
++      la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset);
+       ld(xheapbase, Address(xheapbase, offset));
+     }
+   }
+@@ -1596,8 +1596,8 @@ void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, R
+ void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
+   if (UseCompressedClassPointers) {
+       lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
+-    if (CompressedKlassPointers::base() == NULL) {
+-      slli(tmp, tmp, CompressedKlassPointers::shift());
++    if (Universe::narrow_klass_base() == NULL) {
++      slli(tmp, tmp, Universe::narrow_klass_shift());
+       beq(trial_klass, tmp, L);
+       return;
+     }
+@@ -1745,9 +1745,9 @@ void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
+ // Algorithm must match CompressedOops::encode.
+ void MacroAssembler::encode_heap_oop(Register d, Register s) {
+   verify_oop(s, "broken oop in encode_heap_oop");
+-  if (CompressedOops::base() == NULL) {
+-    if (CompressedOops::shift() != 0) {
+-      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+       srli(d, s, LogMinObjAlignmentInBytes);
+     } else {
+       mv(d, s);
+@@ -1758,9 +1758,9 @@ void MacroAssembler::encode_heap_oop(Register d, Register s) {
+     bgez(d, notNull);
+     mv(d, zr);
+     bind(notNull);
+-    if (CompressedOops::shift() != 0) {
+-      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
+-      srli(d, d, CompressedOops::shift());
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      srli(d, d, Universe::narrow_oop_shift());
+     }
+   }
+ }
+@@ -1799,9 +1799,9 @@ void  MacroAssembler::decode_klass_not_null(Register r) {
+ void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
+   assert(UseCompressedClassPointers, "should only be used for compressed headers");
+ 
+-  if (CompressedKlassPointers::base() == NULL) {
+-    if (CompressedKlassPointers::shift() != 0) {
+-      assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++  if (Universe::narrow_klass_base() == NULL) {
++    if (Universe::narrow_klass_shift() != 0) {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+       slli(dst, src, LogKlassAlignmentInBytes);
+     } else {
+       mv(dst, src);
+@@ -1815,10 +1815,10 @@ void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register
+   }
+ 
+   assert_different_registers(src, xbase);
+-  li(xbase, (uintptr_t)CompressedKlassPointers::base());
++  li(xbase, (uintptr_t)Universe::narrow_klass_base());
+ 
+-  if (CompressedKlassPointers::shift() != 0) {
+-    assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++  if (Universe::narrow_klass_shift() != 0) {
++    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+     assert_different_registers(t0, xbase);
+     shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
+   } else {
+@@ -1835,9 +1835,9 @@ void MacroAssembler::encode_klass_not_null(Register r) {
+ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
+   assert(UseCompressedClassPointers, "should only be used for compressed headers");
+ 
+-  if (CompressedKlassPointers::base() == NULL) {
+-    if (CompressedKlassPointers::shift() != 0) {
+-      assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++  if (Universe::narrow_klass_base() == NULL) {
++    if (Universe::narrow_klass_shift() != 0) {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+       srli(dst, src, LogKlassAlignmentInBytes);
+     } else {
+       mv(dst, src);
+@@ -1845,8 +1845,8 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register
+     return;
+   }
+ 
+-  if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 &&
+-      CompressedKlassPointers::shift() == 0) {
++  if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 &&
++      Universe::narrow_klass_shift() == 0) {
+     zero_extend(dst, src, 32);
+     return;
+   }
+@@ -1857,10 +1857,10 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register
+   }
+ 
+   assert_different_registers(src, xbase);
+-  li(xbase, (intptr_t)CompressedKlassPointers::base());
++  li(xbase, (intptr_t)Universe::narrow_klass_base());
+   sub(dst, src, xbase);
+-  if (CompressedKlassPointers::shift() != 0) {
+-    assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
++  if (Universe::narrow_klass_shift() != 0) {
++    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+     srli(dst, dst, LogKlassAlignmentInBytes);
+   }
+   if (xbase == xheapbase) {
+@@ -1878,22 +1878,22 @@ void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
+   // Cannot assert, unverified entry point counts instructions (see .ad file)
+   // vtableStubs also counts instructions in pd_code_size_limit.
+   // Also do not verify_oop as this is called by verify_oop.
+-  if (CompressedOops::shift() != 0) {
+-    assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+     slli(dst, src, LogMinObjAlignmentInBytes);
+-    if (CompressedOops::base() != NULL) {
++    if (Universe::narrow_oop_base() != NULL) {
+       add(dst, xheapbase, dst);
+     }
+   } else {
+-    assert(CompressedOops::base() == NULL, "sanity");
++    assert(Universe::narrow_oop_base() == NULL, "sanity");
+     mv(dst, src);
+   }
+ }
+ 
+ void  MacroAssembler::decode_heap_oop(Register d, Register s) {
+-  if (CompressedOops::base() == NULL) {
+-    if (CompressedOops::shift() != 0 || d != s) {
+-      slli(d, s, CompressedOops::shift());
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0 || d != s) {
++      slli(d, s, Universe::narrow_oop_shift());
+     }
+   } else {
+     Label done;
+@@ -3004,7 +3004,7 @@ void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
+   InstructionMark im(this);
+   RelocationHolder rspec = metadata_Relocation::spec(index);
+   code_section()->relocate(inst_mark(), rspec);
+-  narrowKlass nk = CompressedKlassPointers::encode(k);
++  narrowKlass nk = Klass::encode_klass(k);
+   li32(dst, nk);
+   zero_extend(dst, dst, 32);
+ }
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index 2ef28771e2e..953bca3cbd8 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -29,7 +29,6 @@
+ 
+ #include "asm/assembler.hpp"
+ #include "metaprogramming/enableIf.hpp"
+-#include "oops/compressedOops.hpp"
+ 
+ // MacroAssembler extends Assembler by frequently used macros.
+ //
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 2e7eed8fb52..24214964243 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1407,7 +1407,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
+   st->print_cr("# MachUEPNode");
+   if (UseCompressedClassPointers) {
+     st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
+-    if (CompressedKlassPointers::shift() != 0) {
++    if (Universe::narrow_klass_shift() != 0) {
+       st->print_cr("\tdecode_klass_not_null t0, t0");
+     }
+   } else {
+@@ -3255,7 +3255,7 @@ operand indOffL(iRegP reg, immLOffset off)
+ 
+ operand indirectN(iRegN reg)
+ %{
+-  predicate(CompressedOops::shift() == 0);
++  predicate(Universe::narrow_oop_shift() == 0);
+   constraint(ALLOC_IN_RC(ptr_reg));
+   match(DecodeN reg);
+   op_cost(0);
+@@ -3270,7 +3270,7 @@ operand indirectN(iRegN reg)
+ 
+ operand indOffIN(iRegN reg, immIOffset off)
+ %{
+-  predicate(CompressedOops::shift() == 0);
++  predicate(Universe::narrow_oop_shift() == 0);
+   constraint(ALLOC_IN_RC(ptr_reg));
+   match(AddP (DecodeN reg) off);
+   op_cost(0);
+@@ -3285,7 +3285,7 @@ operand indOffIN(iRegN reg, immIOffset off)
+ 
+ operand indOffLN(iRegN reg, immLOffset off)
+ %{
+-  predicate(CompressedOops::shift() == 0);
++  predicate(Universe::narrow_oop_shift() == 0);
+   constraint(ALLOC_IN_RC(ptr_reg));
+   match(AddP (DecodeN reg) off);
+   op_cost(0);
+@@ -7947,7 +7947,7 @@ instruct convP2I(iRegINoSp dst, iRegP src) %{
+ // in case of 32bit oops (heap < 4Gb).
+ instruct convN2I(iRegINoSp dst, iRegN src)
+ %{
+-  predicate(CompressedOops::shift() == 0);
++  predicate(Universe::narrow_oop_shift() == 0);
+   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
+ 
+   ins_cost(ALU_COST);
+
+From 81d8ea9077484f1dd20033390cbd3c1844b1b966 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 17:11:20 +0800
+Subject: [PATCH 099/140] Revert JDK-8247912: Make narrowOop a scoped enum
+
+---
+ src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index becc1656358..e2841c28c37 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -1305,7 +1305,7 @@ int MacroAssembler::patch_oop(address insn_addr, address o) {
+   // instruction.
+   if (NativeInstruction::is_li32_at(insn_addr)) {
+     // Move narrow OOP
+-    uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
++    narrowOop n = CompressedOops::encode((oop)o);
+     return patch_imm_in_li32(insn_addr, (int32_t)n);
+   } else if (NativeInstruction::is_movptr_at(insn_addr)) {
+     // Move wide OOP
+
+From f980e03cb17804ff72958dd13505058048c04da8 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 17:20:05 +0800
+Subject: [PATCH 100/140] Revert JDK-8260467: Move well-known classes from
+ systemDictionary.hpp to vmClasses.hpp
+
+---
+ src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 11 +++++------
+ src/hotspot/cpu/riscv/methodHandles_riscv.hpp |  4 ++--
+ 2 files changed, 7 insertions(+), 8 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+index e070b8096a6..fd907f77afb 100644
+--- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+@@ -27,7 +27,6 @@
+ #include "precompiled.hpp"
+ #include "asm/macroAssembler.hpp"
+ #include "classfile/javaClasses.inline.hpp"
+-#include "classfile/vmClasses.hpp"
+ #include "interpreter/interpreter.hpp"
+ #include "interpreter/interpreterRuntime.hpp"
+ #include "memory/allocation.inline.hpp"
+@@ -50,7 +49,7 @@
+ void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
+   assert_cond(_masm != NULL);
+   if (VerifyMethodHandles) {
+-    verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class),
++    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
+                  "MH argument is a Class");
+   }
+   __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
+@@ -68,11 +67,11 @@ static int check_nonzero(const char* xname, int x) {
+ 
+ #ifdef ASSERT
+ void MethodHandles::verify_klass(MacroAssembler* _masm,
+-                                 Register obj, vmClassID klass_id,
++                                 Register obj, SystemDictionary::WKID klass_id,
+                                  const char* error_message) {
+   assert_cond(_masm != NULL);
+-  InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id);
+-  Klass* klass = vmClasses::klass_at(klass_id);
++  InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id);
++  Klass* klass = SystemDictionary::well_known_klass(klass_id);
+   Register temp = t1;
+   Register temp2 = t0; // used by MacroAssembler::cmpptr
+   Label L_ok, L_bad;
+@@ -280,7 +279,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
+     // The method is a member invoker used by direct method handles.
+     if (VerifyMethodHandles) {
+       // make sure the trailing argument really is a MemberName (caller responsibility)
+-      verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName),
++      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
+                    "MemberName required for invokeVirtual etc.");
+     }
+ 
+diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
+index f73aba29d67..65493eba764 100644
+--- a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
+@@ -36,11 +36,11 @@ enum /* platform_dependent_constants */ {
+   static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
+ 
+   static void verify_klass(MacroAssembler* _masm,
+-                           Register obj, vmClassID klass_id,
++                           Register obj, SystemDictionary::WKID klass_id,
+                            const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
+ 
+   static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
+-    verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle),
++    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
+                  "reference is a MH");
+   }
+ 
+
+From 2c68b064100b5abaca80926e213280ea82ff161a Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 17:32:15 +0800
+Subject: [PATCH 101/140] Revert JDK-8268858: Determine register pressure
+ automatically by the number of available registers for allocation
+
+---
+ src/hotspot/cpu/riscv/c2_globals_riscv.hpp |  2 ++
+ src/hotspot/cpu/riscv/riscv.ad             | 27 ----------------------
+ 2 files changed, 2 insertions(+), 27 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
+index 6c301cdae04..33d78fb2f6f 100644
+--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
+@@ -44,8 +44,10 @@ define_pd_global(intx, CompileThreshold,             10000);
+ 
+ define_pd_global(intx, OnStackReplacePercentage,     140);
+ define_pd_global(intx, ConditionalMoveLimit,         0);
++define_pd_global(intx, FLOATPRESSURE,                32);
+ define_pd_global(intx, FreqInlineSize,               325);
+ define_pd_global(intx, MinJumpTableSize,             10);
++define_pd_global(intx, INTPRESSURE,                  24);
+ define_pd_global(intx, InteriorEntryAlignment,       16);
+ define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
+ define_pd_global(intx, LoopUnrollLimit,              60);
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 24214964243..c5e0ae23029 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1719,33 +1719,6 @@ bool Matcher::is_spillable_arg(int reg)
+   return can_be_java_arg(reg);
+ }
+ 
+-uint Matcher::int_pressure_limit()
+-{
+-  // A derived pointer is live at CallNode and then is flagged by RA
+-  // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip
+-  // derived pointers and lastly fail to spill after reaching maximum
+-  // number of iterations. Lowering the default pressure threshold to
+-  // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become
+-  // a high register pressure area of the code so that split_DEF can
+-  // generate DefinitionSpillCopy for the derived pointer.
+-  uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1;
+-  if (!PreserveFramePointer) {
+-    // When PreserveFramePointer is off, frame pointer is allocatable,
+-    // but different from other SOC registers, it is excluded from
+-    // fatproj's mask because its save type is No-Save. Decrease 1 to
+-    // ensure high pressure at fatproj when PreserveFramePointer is off.
+-    // See check_pressure_at_fatproj().
+-    default_int_pressure_threshold--;
+-  }
+-  return (INTPRESSURE == -1) ? default_int_pressure_threshold : INTPRESSURE;
+-}
+-
+-uint Matcher::float_pressure_limit()
+-{
+-  // _FLOAT_REG_mask is generated by adlc from the float_reg register class.
+-  return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE;
+-}
+-
+ bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
+   return false;
+ }
+
+From 932ebd6238ea7703dc3164e4506af332f6847592 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 17:51:12 +0800
+Subject: [PATCH 102/140] Revert JDK-8276563: Undefined Behaviour in class
+ Assembler && 8257882: Implement linkToNative intrinsic on AArch64 (the
+ register part)
+
+---
+ .../cpu/riscv/globalDefinitions_riscv.hpp     |   2 -
+ src/hotspot/cpu/riscv/register_riscv.cpp      |   4 -
+ src/hotspot/cpu/riscv/register_riscv.hpp      | 123 +++++++++++++-----
+ 3 files changed, 91 insertions(+), 38 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
+index 2936837d951..ffd420da024 100644
+--- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
+@@ -47,6 +47,4 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
+ 
+ #define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false
+ 
+-#define USE_POINTERS_TO_REGISTER_IMPL_ARRAY
+-
+ #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
+index 96cf1996a83..ef60cb3bb05 100644
+--- a/src/hotspot/cpu/riscv/register_riscv.cpp
++++ b/src/hotspot/cpu/riscv/register_riscv.cpp
+@@ -26,10 +26,6 @@
+ #include "precompiled.hpp"
+ #include "register_riscv.hpp"
+ 
+-REGISTER_IMPL_DEFINITION(Register, RegisterImpl, RegisterImpl::number_of_registers);
+-REGISTER_IMPL_DEFINITION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers);
+-REGISTER_IMPL_DEFINITION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers);
+-
+ const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers *
+                                           RegisterImpl::max_slots_per_register;
+ 
+diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
+index d697751f55f..f64a06eb89f 100644
+--- a/src/hotspot/cpu/riscv/register_riscv.hpp
++++ b/src/hotspot/cpu/riscv/register_riscv.hpp
+@@ -47,13 +47,13 @@ typedef VMRegImpl* VMReg;
+ 
+ // Use Register as shortcut
+ class RegisterImpl;
+-typedef const RegisterImpl* Register;
++typedef RegisterImpl* Register;
+ 
+-inline constexpr Register as_Register(int encoding);
++inline Register as_Register(int encoding) {
++  return (Register)(intptr_t) encoding;
++}
+ 
+ class RegisterImpl: public AbstractRegisterImpl {
+-  static constexpr Register first();
+-
+  public:
+   enum {
+     number_of_registers      = 32,
+@@ -66,16 +66,16 @@ class RegisterImpl: public AbstractRegisterImpl {
+   };
+ 
+   // derived registers, offsets, and addresses
+-  const Register successor() const { return this + 1; }
++  const Register successor() const { return as_Register(encoding() + 1); }
+ 
+   // construction
+-  inline friend constexpr Register as_Register(int encoding);
++  inline friend Register as_Register(int encoding);
+ 
+   VMReg as_VMReg() const;
+ 
+   // accessors
+   int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
+-  int encoding_nocheck() const    { return this - first(); }
++  int encoding_nocheck() const    { return (intptr_t)this; }
+   bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
+   const char* name() const;
+ 
+@@ -93,9 +93,11 @@ class RegisterImpl: public AbstractRegisterImpl {
+     return encoding_nocheck() >= compressed_register_base &&
+            encoding_nocheck() <= compressed_register_top;
+   }
+-};
+ 
+-REGISTER_IMPL_DECLARATION(Register, RegisterImpl, RegisterImpl::number_of_registers);
++  // Return the bit which represents this register.  This is intended
++  // to be ORed into a bitmask: for usage see class RegSet below.
++  uint64_t bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; }
++};
+ 
+ // The integer registers of the RISCV architecture
+ 
+@@ -136,14 +138,14 @@ CONSTANT_REGISTER_DECLARATION(Register, x31,  (31));
+ 
+ // Use FloatRegister as shortcut
+ class FloatRegisterImpl;
+-typedef const FloatRegisterImpl* FloatRegister;
++typedef FloatRegisterImpl* FloatRegister;
+ 
+-inline constexpr FloatRegister as_FloatRegister(int encoding);
++inline FloatRegister as_FloatRegister(int encoding) {
++  return (FloatRegister)(intptr_t) encoding;
++}
+ 
+ // The implementation of floating point registers for the architecture
+ class FloatRegisterImpl: public AbstractRegisterImpl {
+-  static constexpr FloatRegister first();
+-
+  public:
+   enum {
+     number_of_registers     = 32,
+@@ -155,18 +157,16 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
+   };
+ 
+   // construction
+-  inline friend constexpr FloatRegister as_FloatRegister(int encoding);
++  inline friend FloatRegister as_FloatRegister(int encoding);
+ 
+   VMReg as_VMReg() const;
+ 
+   // derived registers, offsets, and addresses
+-  FloatRegister successor() const {
+-    return as_FloatRegister((encoding() + 1) % (unsigned)number_of_registers);
+-  }
++  FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
+ 
+   // accessors
+   int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
+-  int encoding_nocheck() const    { return this - first(); }
++  int encoding_nocheck() const    { return (intptr_t)this; }
+   int is_valid() const            { return (unsigned)encoding_nocheck() < number_of_registers; }
+   const char* name() const;
+ 
+@@ -186,8 +186,6 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
+   }
+ };
+ 
+-REGISTER_IMPL_DECLARATION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers);
+-
+ // The float registers of the RISCV architecture
+ 
+ CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
+@@ -227,14 +225,14 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
+ 
+ // Use VectorRegister as shortcut
+ class VectorRegisterImpl;
+-typedef const VectorRegisterImpl* VectorRegister;
++typedef VectorRegisterImpl* VectorRegister;
+ 
+-inline constexpr VectorRegister as_VectorRegister(int encoding);
++inline VectorRegister as_VectorRegister(int encoding) {
++  return (VectorRegister)(intptr_t) encoding;
++}
+ 
+ // The implementation of vector registers for RVV
+ class VectorRegisterImpl: public AbstractRegisterImpl {
+-  static constexpr VectorRegister first();
+-
+  public:
+   enum {
+     number_of_registers    = 32,
+@@ -242,23 +240,21 @@ class VectorRegisterImpl: public AbstractRegisterImpl {
+   };
+ 
+   // construction
+-  inline friend constexpr VectorRegister as_VectorRegister(int encoding);
++  inline friend VectorRegister as_VectorRegister(int encoding);
+ 
+   VMReg as_VMReg() const;
+ 
+   // derived registers, offsets, and addresses
+-  VectorRegister successor() const { return this + 1; }
++  VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
+ 
+   // accessors
+   int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
+-  int encoding_nocheck() const    { return this - first(); }
++  int encoding_nocheck() const    { return (intptr_t)this; }
+   bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
+   const char* name() const;
+ 
+ };
+ 
+-REGISTER_IMPL_DECLARATION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers);
+-
+ // The vector registers of RVV
+ CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1));
+ 
+@@ -315,8 +311,71 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
+   static const int max_fpr;
+ };
+ 
+-typedef AbstractRegSet<Register> RegSet;
+-typedef AbstractRegSet<FloatRegister> FloatRegSet;
+-typedef AbstractRegSet<VectorRegister> VectorRegSet;
++// A set of registers
++class RegSet {
++  uint32_t _bitset;
++
++  RegSet(uint32_t bitset) : _bitset(bitset) { }
 +
-+  public int getInterpreterFrameBCI() {
-+    // FIXME: this is not atomic with respect to GC and is unsuitable
-+    // for use in a non-debugging, or reflective, system. Need to
-+    // figure out how to express this.
-+    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
-+    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
-+    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
-+    return bcpToBci(bcp, method);
-+  }
++public:
 +
-+  public Address addressOfInterpreterFrameMDX() {
-+    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
-+  }
++  RegSet() : _bitset(0) { }
 +
-+  // expression stack
-+  // (the max_stack arguments are used by the GC; see class FrameClosure)
++  RegSet(Register r1) : _bitset(r1->bit()) { }
 +
-+  public Address addressOfInterpreterFrameExpressionStack() {
-+    Address monitorEnd = interpreterFrameMonitorEnd().address();
-+    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
++  RegSet operator+(const RegSet aSet) const {
++    RegSet result(_bitset | aSet._bitset);
++    return result;
 +  }
 +
-+  public int getInterpreterFrameExpressionStackDirection() { return -1; }
-+
-+  // top of expression stack
-+  public Address addressOfInterpreterFrameTOS() {
-+    return getSP();
++  RegSet operator-(const RegSet aSet) const {
++    RegSet result(_bitset & ~aSet._bitset);
++    return result;
 +  }
 +
-+  /** Expression stack from top down */
-+  public Address addressOfInterpreterFrameTOSAt(int slot) {
-+    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
++  RegSet &operator+=(const RegSet aSet) {
++    *this = *this + aSet;
++    return *this;
 +  }
 +
-+  public Address getInterpreterFrameSenderSP() {
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(isInterpretedFrame(), "interpreted frame expected");
-+    }
-+    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++  RegSet &operator-=(const RegSet aSet) {
++    *this = *this - aSet;
++    return *this;
 +  }
 +
-+  // Monitors
-+  public BasicObjectLock interpreterFrameMonitorBegin() {
-+    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
++  static RegSet of(Register r1) {
++    return RegSet(r1);
 +  }
 +
-+  public BasicObjectLock interpreterFrameMonitorEnd() {
-+    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
-+    if (Assert.ASSERTS_ENABLED) {
-+      // make sure the pointer points inside the frame
-+      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
-+      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
-+    }
-+    return new BasicObjectLock(result);
++  static RegSet of(Register r1, Register r2) {
++    return of(r1) + r2;
 +  }
 +
-+  public int interpreterFrameMonitorSize() {
-+    return BasicObjectLock.size();
++  static RegSet of(Register r1, Register r2, Register r3) {
++    return of(r1, r2) + r3;
 +  }
 +
-+  // Method
-+  public Address addressOfInterpreterFrameMethod() {
-+    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
++  static RegSet of(Register r1, Register r2, Register r3, Register r4) {
++    return of(r1, r2, r3) + r4;
 +  }
 +
-+  // Constant pool cache
-+  public Address addressOfInterpreterFrameCPCache() {
-+    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
-+  }
++  static RegSet range(Register start, Register end) {
++    uint32_t bits = ~0;
++    bits <<= start->encoding();
++    bits <<= 31 - end->encoding();
++    bits >>= 31 - end->encoding();
 +
-+  // Entry frames
-+  public JavaCallWrapper getEntryFrameCallWrapper() {
-+    return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
++    return RegSet(bits);
 +  }
 +
-+  protected Address addressOfSavedOopResult() {
-+    // offset is 2 for compiler2 and 3 for compiler1
-+    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
-+                               VM.getVM().getAddressSize());
-+  }
++  uint32_t bits() const { return _bitset; }
 +
-+  protected Address addressOfSavedReceiver() {
-+    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
-+  }
++private:
 +
-+  private void dumpStack() {
-+    for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
-+         AddressOps.lt(addr, getSP());
-+         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
-+      System.out.println(addr + ": " + addr.getAddressAt(0));
-+    }
-+    System.out.println("-----------------------");
-+    for (Address addr = getSP();
-+         AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
-+         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
-+      System.out.println(addr + ": " + addr.getAddressAt(0));
-+    }
++  Register first() {
++    uint32_t first = _bitset & -_bitset;
++    return first ? as_Register(exact_log2(first)) : noreg;
 +  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
++};
+ 
+ #endif // CPU_RISCV_REGISTER_RISCV_HPP
+
+From 9c85aa8d3387d795f9c2f4795ffc7f9d7f814d92 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 19:24:49 +0800
+Subject: [PATCH 103/140] Revert JDK-8240363: Refactor Compile::Output() to its
+ own Phase
+
+---
+ .../cpu/riscv/macroAssembler_riscv.cpp        |  2 +-
+ src/hotspot/cpu/riscv/riscv.ad                | 20 +++++++++----------
+ 2 files changed, 11 insertions(+), 11 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index e2841c28c37..656334f326b 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -3027,7 +3027,7 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
+     CompileTask* task = ciEnv::current()->task();
+     in_scratch_emit_size =
+       (task != NULL && is_c2_compile(task->comp_level()) &&
+-       Compile::current()->output()->in_scratch_emit_size());
++       Compile::current()->in_scratch_emit_size());
+ #endif
+     if (!in_scratch_emit_size) {
+       address stub = emit_trampoline_stub(offset(), entry.target());
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index c5e0ae23029..d736750d02d 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1029,7 +1029,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
+ //=============================================================================
+ const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
+ 
+-int ConstantTable::calculate_table_base_offset() const {
++int Compile::ConstantTable::calculate_table_base_offset() const {
+   return 0;  // absolute addressing, no offset
+ }
+ 
+@@ -1058,9 +1058,9 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+   assert_cond(st != NULL && ra_ != NULL);
+   Compile* C = ra_->C;
+ 
+-  int framesize = C->output()->frame_slots() << LogBytesPerInt;
++  int framesize = C->frame_slots() << LogBytesPerInt;
+ 
+-  if (C->output()->need_stack_bang(framesize)) {
++  if (C->need_stack_bang(framesize)) {
+     st->print("# stack bang size=%d\n\t", framesize);
+   }
+ 
+@@ -1077,7 +1077,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+   MacroAssembler _masm(&cbuf);
+ 
+   // n.b. frame size includes space for return pc and fp
+-  const int framesize = C->output()->frame_size_in_bytes();
++  const int framesize = C->frame_size_in_bytes();
+ 
+   // insert a nop at the start of the prolog so we can patch in a
+   // branch if we need to invalidate the method later
+@@ -1085,8 +1085,8 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ 
+   assert_cond(C != NULL);
+ 
+-  int bangsize = C->output()->bang_size_in_bytes();
+-  if (C->output()->need_stack_bang(bangsize)) {
++  int bangsize = C->bang_size_in_bytes();
++  if (C->need_stack_bang(bangsize)) {
+     __ generate_stack_overflow_check(bangsize);
+   }
+ 
+@@ -1096,12 +1096,12 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+     Unimplemented();
+   }
+ 
+-  C->output()->set_frame_complete(cbuf.insts_size());
++  C->set_frame_complete(cbuf.insts_size());
+ 
+   if (C->has_mach_constant_base_node()) {
+     // NOTE: We set the table base offset here because users might be
+     // emitted before MachConstantBaseNode.
+-    ConstantTable& constant_table = C->output()->constant_table();
++    Compile::ConstantTable& constant_table = C->constant_table();
+     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
+   }
+ }
+@@ -1125,7 +1125,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+   assert_cond(st != NULL && ra_ != NULL);
+   Compile* C = ra_->C;
+   assert_cond(C != NULL);
+-  int framesize = C->output()->frame_size_in_bytes();
++  int framesize = C->frame_size_in_bytes();
+ 
+   st->print("# pop frame %d\n\t", framesize);
+ 
+@@ -1152,7 +1152,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+   Compile* C = ra_->C;
+   MacroAssembler _masm(&cbuf);
+   assert_cond(C != NULL);
+-  int framesize = C->output()->frame_size_in_bytes();
++  int framesize = C->frame_size_in_bytes();
+ 
+   __ remove_frame(framesize);
+ 
+
+From 3a58114310a56ebca04ba44b4883d205096eb844 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 19:36:09 +0800
+Subject: [PATCH 104/140] Revert RotateLeft && RotateRight matching rules
+
+---
+ src/hotspot/cpu/riscv/riscv.ad   |  2 -
+ src/hotspot/cpu/riscv/riscv_b.ad | 76 --------------------------------
+ 2 files changed, 78 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index d736750d02d..1e6495692da 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1505,8 +1505,6 @@ const bool Matcher::match_rule_supported(int opcode) {
+     case Op_PopCountL:
+       return UsePopCountInstruction;
+ 
+-    case Op_RotateRight:
+-    case Op_RotateLeft:
+     case Op_CountLeadingZerosI:
+     case Op_CountLeadingZerosL:
+     case Op_CountTrailingZerosI:
+diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad
+index 4488c1c4031..b9e04c432e1 100644
+--- a/src/hotspot/cpu/riscv/riscv_b.ad
++++ b/src/hotspot/cpu/riscv/riscv_b.ad
+@@ -25,82 +25,6 @@
+ 
+ // RISCV Bit-Manipulation Extension Architecture Description File
+ 
+-instruct rorI_imm_rvb(iRegINoSp dst, iRegI src, immI shift) %{
+-  predicate(UseRVB);
+-  match(Set dst (RotateRight src shift));
+-
+-  format %{ "roriw  $dst, $src, ($shift & 0x1f)\t#@rorI_imm_rvb" %}
+-
+-  ins_cost(ALU_COST);
+-  ins_encode %{
+-    __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x1f);
+-  %}
+-
+-  ins_pipe(ialu_reg_shift);
+-%}
+-
+-instruct rorL_imm_rvb(iRegLNoSp dst, iRegL src, immI shift) %{
+-  predicate(UseRVB);
+-  match(Set dst (RotateRight src shift));
+-
+-  format %{ "rori  $dst, $src, ($shift & 0x3f)\t#@rorL_imm_rvb" %}
+-
+-  ins_cost(ALU_COST);
+-  ins_encode %{
+-    __ rori(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x3f);
+-  %}
+-
+-  ins_pipe(ialu_reg_shift);
+-%}
+-
+-instruct rorI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{
+-  predicate(UseRVB);
+-  match(Set dst (RotateRight src shift));
+-
+-  format %{ "rorw  $dst, $src, $shift\t#@rorI_reg_rvb" %}
+-  ins_cost(ALU_COST);
+-  ins_encode %{
+-    __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
+-  %}
+-  ins_pipe(ialu_reg_reg);
+-%}
+-
+-instruct rorL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{
+-  predicate(UseRVB);
+-  match(Set dst (RotateRight src shift));
+-
+-  format %{ "ror  $dst, $src, $shift\t#@rorL_reg_rvb" %}
+-  ins_cost(ALU_COST);
+-  ins_encode %{
+-    __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
+-  %}
+-  ins_pipe(ialu_reg_reg);
+-%}
+-
+-instruct rolI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{
+-  predicate(UseRVB);
+-  match(Set dst (RotateLeft src shift));
+-
+-  format %{ "rolw  $dst, $src, $shift\t#@rolI_reg_rvb" %}
+-  ins_cost(ALU_COST);
+-  ins_encode %{
+-    __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
+-  %}
+-  ins_pipe(ialu_reg_reg);
+-%}
+-
+-instruct rolL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{
+-  predicate(UseRVB);
+-  match(Set dst (RotateLeft src shift));
+-
+-  format %{ "rol  $dst, $src, $shift\t#@rolL_reg_rvb" %}
+-  ins_cost(ALU_COST);
+-  ins_encode %{
+-    __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
+-  %}
+-  ins_pipe(ialu_reg_reg);
+-%}
+-
+ // Convert oop into int for vectors alignment masking
+ instruct convP2I_rvb(iRegINoSp dst, iRegP src) %{
+   predicate(UseRVB);
+
+From 21577388eda0218eeb4b28bc71ecf5737d40639e Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 19:49:28 +0800
+Subject: [PATCH 105/140] Revert JDK-8230565: ZGC: Redesign C2 load barrier to
+ expand on the MachNode level
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 14 ++++----------
+ 1 file changed, 4 insertions(+), 10 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 1e6495692da..533eaf843e3 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -4324,7 +4324,6 @@ instruct loadRange(iRegINoSp dst, memory mem)
+ instruct loadP(iRegPNoSp dst, memory mem)
+ %{
+   match(Set dst (LoadP mem));
+-  predicate(n->as_Load()->barrier_data() == 0);
+ 
+   ins_cost(LOAD_COST);
+   format %{ "ld  $dst, $mem\t# ptr, #@loadP" %}
+@@ -5060,8 +5059,6 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoS
+ 
+ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
+ %{
+-  predicate(n->as_LoadStore()->barrier_data() == 0);
+-
+   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ 
+   ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+@@ -5181,7 +5178,7 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegL
+ 
+ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
+ %{
+-  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
++  predicate(needs_acquiring_load_reserved(n));
+ 
+   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+ 
+@@ -5327,7 +5324,6 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne
+ 
+ instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
+ %{
+-  predicate(n->as_LoadStore()->barrier_data() == 0);
+   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ 
+   ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
+@@ -5462,7 +5458,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN
+ 
+ instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
+ %{
+-  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
++  predicate(needs_acquiring_load_reserved(n));
+ 
+   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+ 
+@@ -5592,7 +5588,6 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne
+ 
+ instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
+ %{
+-  predicate(n->as_LoadStore()->barrier_data() == 0);
+   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ 
+   ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+@@ -5731,7 +5726,7 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN
+ 
+ instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
+ %{
+-  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
++  predicate(needs_acquiring_load_reserved(n));
+ 
+   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+ 
+@@ -5798,7 +5793,6 @@ instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev)
+ 
+ instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev)
+ %{
+-  predicate(n->as_LoadStore()->barrier_data() == 0);
+   match(Set prev (GetAndSetP mem newv));
+ 
+   ins_cost(ALU_COST);
+@@ -5865,7 +5859,7 @@ instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
+ 
+ instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev)
+ %{
+-  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
++  predicate(needs_acquiring_load_reserved(n));
+ 
+   match(Set prev (GetAndSetP mem newv));
+ 
+
+From 4673921af60f4779d4322256f92bb60a850cb035 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 19:51:09 +0800
+Subject: [PATCH 106/140] Revert JDK-8252990: Intrinsify Unsafe.storeStoreFence
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 533eaf843e3..5fa3b85c001 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -7537,7 +7537,6 @@ instruct membar_release() %{
+ 
+ instruct membar_storestore() %{
+   match(MemBarStoreStore);
+-  match(StoreStoreFence);
+   ins_cost(ALU_COST);
+ 
+   format %{ "MEMBAR-store-store\t#@membar_storestore" %}
+
+From e254a03e87ffc6d8f563dbd7db1b607a95657263 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 19:54:02 +0800
+Subject: [PATCH 107/140] Revert JDK-8255150: Add utility methods to check long
+ indexes and ranges && JDK-8252372: Check if cloning is required to move loads
+ out of loops in PhaseIdealLoop::split_if_with_blocks_post()
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 33 ---------------------------------
+ 1 file changed, 33 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 5fa3b85c001..388e65f623d 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -7621,17 +7621,6 @@ instruct castPP(iRegPNoSp dst)
+   ins_pipe(pipe_class_empty);
+ %}
+ 
+-instruct castLL(iRegL dst)
+-%{
+-  match(Set dst (CastLL dst));
+-
+-  size(0);
+-  format %{ "# castLL of $dst, #@castLL" %}
+-  ins_encode(/* empty encoding */);
+-  ins_cost(0);
+-  ins_pipe(pipe_class_empty);
+-%}
+-
+ instruct castII(iRegI dst)
+ %{
+   match(Set dst (CastII dst));
+@@ -7654,28 +7643,6 @@ instruct checkCastPP(iRegPNoSp dst)
+   ins_pipe(pipe_class_empty);
+ %}
+ 
+-instruct castFF(fRegF dst)
+-%{
+-  match(Set dst (CastFF dst));
+-
+-  size(0);
+-  format %{ "# castFF of $dst" %}
+-  ins_encode(/* empty encoding */);
+-  ins_cost(0);
+-  ins_pipe(pipe_class_empty);
+-%}
+-
+-instruct castDD(fRegD dst)
+-%{
+-  match(Set dst (CastDD dst));
+-
+-  size(0);
+-  format %{ "# castDD of $dst" %}
+-  ins_encode(/* empty encoding */);
+-  ins_cost(0);
+-  ins_pipe(pipe_class_empty);
+-%}
+-
+ // ============================================================================
+ // Convert Instructions
+ 
+
+From 2c1820363992d09ef0cd2ed2553c04e0f7afd91f Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 20:02:14 +0800
+Subject: [PATCH 108/140] Revert reset_label part of JDK-8248411: [aarch64]
+ Insufficient error handling when CodeBuffer is exhausted
+
+---
+ src/hotspot/cpu/riscv/macroAssembler_riscv.cpp |  2 +-
+ src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 14 +++++---------
+ 2 files changed, 6 insertions(+), 10 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index 656334f326b..37ccf132986 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -3784,7 +3784,7 @@ address MacroAssembler::zero_words(Register ptr, Register cnt)
+     if (StubRoutines::riscv::complete()) {
+       address tpc = trampoline_call(zero_blocks);
+       if (tpc == NULL) {
+-        DEBUG_ONLY(reset_labels(around));
++        DEBUG_ONLY(reset_labels1(around));
+         postcond(pc() == badAddress);
+         return NULL;
+       }
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index 953bca3cbd8..45ffc663963 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -815,17 +815,13 @@ class MacroAssembler: public Assembler {
+ private:
+ 
+ #ifdef ASSERT
+-  // Template short-hand support to clean-up after a failed call to trampoline
++  // Macro short-hand support to clean-up after a failed call to trampoline
+   // call generation (see trampoline_call() below), when a set of Labels must
+   // be reset (before returning).
+-  template<typename Label, typename... More>
+-  void reset_labels(Label& lbl, More&... more) {
+-    lbl.reset(); reset_labels(more...);
+-  }
+-  template<typename Label>
+-  void reset_labels(Label& lbl) {
+-    lbl.reset();
+-  }
++#define reset_labels1(L1) L1.reset()
++#define reset_labels2(L1, L2) L1.reset(); L2.reset()
++#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3)
++#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5)
+ #endif
+   void repne_scan(Register addr, Register value, Register count, Register tmp);
+ 
+
+From 014972a0778b8c5568fae9e92d286b634cb44674 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 20:30:49 +0800
+Subject: [PATCH 109/140] Revert JDK-8242289: C2: Support platform-specific
+ node cloning in Matcher
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 12 +-----------
+ 1 file changed, 1 insertion(+), 11 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 388e65f623d..7cd6c2995ba 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1765,20 +1765,10 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
+ 
+ const bool Matcher::convi2l_type_required = false;
+ 
+-// Should the Matcher clone input 'm' of node 'n'?
+-bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
+-  assert_cond(m != NULL);
+-  if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
+-    mstack.push(m, Visit);           // m = ShiftCntV
+-    return true;
+-  }
+-  return false;
+-}
+-
+ // Should the Matcher clone shifts on addressing modes, expecting them
+ // to be subsumed into complex addressing expressions or compute them
+ // into registers?
+-bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
+   return clone_base_plus_offset_address(m, mstack, address_visited);
+ }
+ 
+
+From d15e155e9b84f4789cfbb1cf75382be859b0a8ca Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 20:40:00 +0800
+Subject: [PATCH 110/140] Revert JDK-8255782: Turn UseTLAB and ResizeTLAB from
+ product_pd to product, defaulting to "true"
+
+---
+ src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 2 ++
+ src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 2 ++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+index 8f2f4e0e81d..25e00bea901 100644
+--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+@@ -34,6 +34,8 @@
+ 
+ #ifndef TIERED
+ define_pd_global(bool, BackgroundCompilation,        true );
++define_pd_global(bool, UseTLAB,                      true );
++define_pd_global(bool, ResizeTLAB,                   true );
+ define_pd_global(bool, InlineIntrinsics,             true );
+ define_pd_global(bool, PreferInterpreterNativeStubs, false);
+ define_pd_global(bool, ProfileTraps,                 false);
+diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
+index 33d78fb2f6f..3da1f1c6d86 100644
+--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
+@@ -33,6 +33,8 @@
+ // (see c2_globals.hpp).  Alpha-sorted.
+ 
+ define_pd_global(bool, BackgroundCompilation,        true);
++define_pd_global(bool, UseTLAB,                      true);
++define_pd_global(bool, ResizeTLAB,                   true);
+ define_pd_global(bool, CICompileOSR,                 true);
+ define_pd_global(bool, InlineIntrinsics,             true);
+ define_pd_global(bool, PreferInterpreterNativeStubs, false);
+
+From f3fa0cfa987743b4ee83332ddf71add421561908 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 20:49:57 +0800
+Subject: [PATCH 111/140] Revert JDK-8265245: depChecker_<cpu> don't have any
+ functionalities
+
+---
+ src/hotspot/cpu/riscv/depChecker_riscv.hpp | 32 ++++++++++++++++++++++
+ 1 file changed, 32 insertions(+)
+ create mode 100644 src/hotspot/cpu/riscv/depChecker_riscv.hpp
+
+diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
 new file mode 100644
-index 000000000..4d79e3ee4
+index 00000000000..e9ff307b647
 --- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
-@@ -0,0 +1,58 @@
++++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
+@@ -0,0 +1,32 @@
 +/*
-+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -59934,435 +80563,1447 @@ index 000000000..4d79e3ee4
 + *
 + */
 +
-+package sun.jvm.hotspot.runtime.riscv64;
-+
-+import java.util.*;
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.types.*;
-+import sun.jvm.hotspot.runtime.*;
-+
-+public class RISCV64JavaCallWrapper extends JavaCallWrapper {
-+  private static AddressField lastJavaFPField;
-+
-+  static {
-+    VM.registerVMInitializedObserver(new Observer() {
-+        public void update(Observable o, Object data) {
-+          initialize(VM.getVM().getTypeDataBase());
-+        }
-+      });
-+  }
-+
-+  private static synchronized void initialize(TypeDataBase db) {
-+    Type type = db.lookupType("JavaFrameAnchor");
-+
-+    lastJavaFPField  = type.getAddressField("_last_Java_fp");
-+  }
-+
-+  public RISCV64JavaCallWrapper(Address addr) {
-+    super(addr);
-+  }
++#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
++#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
 +
-+  public Address getLastJavaFP() {
-+    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
-+  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
-new file mode 100644
-index 000000000..d7187a5f8
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
-@@ -0,0 +1,53 @@
-+/*
-+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++// Nothing to do on riscv
 +
-+package sun.jvm.hotspot.runtime.riscv64;
++#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
+
+From 97a3d4d3b98a450aa316eaa94103cf8473d12d50 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 20:58:34 +0800
+Subject: [PATCH 112/140] Revert JDK-8241438: Move IntelJccErratum mitigation
+ code to platform-specific code
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 18 ------------------
+ 1 file changed, 18 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index 7cd6c2995ba..fc6823daf8b 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -740,13 +740,6 @@ class HandlerImpl {
+   }
+ };
+ 
+-class Node::PD {
+-public:
+-  enum NodeFlags {
+-    _last_flag = Node::_last_flag
+-  };
+-};
+-
+ bool is_CAS(int opcode, bool maybe_volatile);
+ 
+ // predicate controlling translation of CompareAndSwapX
+@@ -805,17 +798,6 @@ void reg_mask_init() {
+   }
+ }
+ 
+-void PhaseOutput::pd_perform_mach_node_analysis() {
+-}
+-
+-int MachNode::pd_alignment_required() const {
+-  return 1;
+-}
+-
+-int MachNode::compute_padding(int current_offset) const {
+-  return 0;
+-}
+-
+ // is_CAS(int opcode, bool maybe_volatile)
+ //
+ // return true if opcode is one of the possible CompareAndSwapX
+
+From 8a3e7b81b79918a4f2feb4d9226ab8be6c43c28a Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 21:03:47 +0800
+Subject: [PATCH 113/140] Revert JDK-8260355: AArch64: deoptimization stub
+ should save vector registers
+
+---
+ src/hotspot/cpu/riscv/registerMap_riscv.cpp | 45 ---------------------
+ src/hotspot/cpu/riscv/registerMap_riscv.hpp |  1 -
+ 2 files changed, 46 deletions(-)
+ delete mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.cpp
+
+diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.cpp b/src/hotspot/cpu/riscv/registerMap_riscv.cpp
+deleted file mode 100644
+index 26c1edc36ff..00000000000
+--- a/src/hotspot/cpu/riscv/registerMap_riscv.cpp
++++ /dev/null
+@@ -1,45 +0,0 @@
+-/*
+- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#include "precompiled.hpp"
+-#include "runtime/registerMap.hpp"
+-#include "vmreg_riscv.inline.hpp"
+-
+-address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const {
+-  if (base_reg->is_VectorRegister()) {
+-    assert(base_reg->is_concrete(), "must pass base reg");
+-    int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) /
+-                       VectorRegisterImpl::max_slots_per_register;
+-    intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size;
+-    address base_location = location(base_reg);
+-    if (base_location != NULL) {
+-      return base_location + offset_in_bytes;
+-    } else {
+-      return NULL;
+-    }
+-  } else {
+-    return location(base_reg->next(slot_idx));
+-  }
+-}
+diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
+index f34349811a9..fef8ca9b64e 100644
+--- a/src/hotspot/cpu/riscv/registerMap_riscv.hpp
++++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
+@@ -33,7 +33,6 @@
+   // This is the hook for finding a register in an "well-known" location,
+   // such as a register block of a predetermined format.
+   address pd_location(VMReg reg) const { return NULL; }
+-  address pd_location(VMReg base_reg, int slot_idx) const;
+ 
+   // no PD state to clear or copy:
+   void pd_clear() {}
+
+From 5fc20f93a312f9189b55c5236c15a55b3da10cf9 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 21:05:37 +0800
+Subject: [PATCH 114/140] Revert JDK-8250914: Matcher::stack_direction() is
+ unused
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index fc6823daf8b..c21508b6e7c 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -2326,6 +2326,9 @@ encode %{
+ //         SP meets the minimum alignment.
+ 
+ frame %{
++  // What direction does stack grow in (assumed to be same for C & Java)
++  stack_direction(TOWARDS_LOW);
 +
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.runtime.*;
+   // These three registers define part of the calling convention
+   // between compiled code and the interpreter.
+ 
+
+From aab3322fd2507a3aeae39c69ba871400dd342834 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 21:15:45 +0800
+Subject: [PATCH 115/140] Revert CacheWB*Node matching rules
+
+---
+ src/hotspot/cpu/riscv/riscv.ad | 8 --------
+ 1 file changed, 8 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index c21508b6e7c..e410bd06aa6 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -1475,14 +1475,6 @@ const bool Matcher::match_rule_supported(int opcode) {
+   }
+ 
+   switch (opcode) {
+-    case Op_CacheWB:           // fall through
+-    case Op_CacheWBPreSync:    // fall through
+-    case Op_CacheWBPostSync:
+-      if (!VM_Version::supports_data_cache_line_flush()) {
+-        return false;
+-      }
+-      break;
+-
+     case Op_PopCountI:
+     case Op_PopCountL:
+       return UsePopCountInstruction;
+
+From 705981aaff19b442b55df8a038aab9c61133bc3a Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 21:21:10 +0800
+Subject: [PATCH 116/140] Revert JDK-8263595: Remove oop type punning in
+ JavaCallArguments
+
+---
+ src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
+index bc4e5758256..df3c0267eea 100644
+--- a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
++++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
+@@ -65,8 +65,9 @@ class JNITypes : private AllStatic {
+   }
+ 
+   // Oops are stored in native format in one JavaCallArgument slot at *to.
+-  static inline void    put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); }
+-  static inline void    put_obj(jobject       from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; }
++  static inline void    put_obj(oop  from, intptr_t *to)                { *(oop *)(to +   0  ) =  from; }
++  static inline void    put_obj(oop  from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) =  from; }
++  static inline void    put_obj(oop *from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) = *from; }
+ 
+   // Floats are stored in native format in one JavaCallArgument slot at *to.
+   static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
+
+From bba22725b9f1386d8899941ccee3e8dc7f9a4a6f Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 21:33:01 +0800
+Subject: [PATCH 117/140] Revert JDK-8260012: Reduce inclusion of
+ collectedHeap.hpp and heapInspection.hpp
+
+---
+ src/hotspot/cpu/riscv/frame_riscv.cpp | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
+index 40ec584b994..d4fcbdcbbde 100644
+--- a/src/hotspot/cpu/riscv/frame_riscv.cpp
++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
+@@ -598,7 +598,7 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result)
+         oop* obj_p = (oop*)tos_addr;
+         obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
+       }
+-      assert(Universe::is_in_heap_or_null(obj), "sanity check");
++      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
+       *oop_result = obj;
+       break;
+     }
+
+From 49000a43408aba29d3dc9ee4e03219e6f85be602 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 21:35:21 +0800
+Subject: [PATCH 118/140] Revert JDK-8271869: AArch64: build errors with GCC11
+ in frame::saved_oop_result
+
+---
+ src/hotspot/cpu/riscv/frame_riscv.inline.hpp | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
+index 5ac1bf57f57..abd5bda7e49 100644
+--- a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
++++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
+@@ -230,8 +230,6 @@ inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
+ 
+ 
+ // Compiled frames
+-PRAGMA_DIAG_PUSH
+-PRAGMA_NONNULL_IGNORED
+ inline oop frame::saved_oop_result(RegisterMap* map) const {
+   oop* result_adr = (oop *)map->location(x10->as_VMReg());
+   guarantee(result_adr != NULL, "bad register save location");
+@@ -243,6 +241,5 @@ inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
+   guarantee(result_adr != NULL, "bad register save location");
+   *result_adr = obj;
+ }
+-PRAGMA_DIAG_POP
+ 
+ #endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP
+
+From 14a46a85e65f6fec09ac566d49a6232216881adb Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 21:40:43 +0800
+Subject: [PATCH 119/140] Revert JDK-8230392: Define AArch64 as
+ MULTI_COPY_ATOMIC
+
+---
+ src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
+index ffd420da024..606f0fa0da3 100644
+--- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
+@@ -33,10 +33,6 @@ const int StackAlignmentInBytes = 16;
+ // 32-bit integer argument values are extended to 64 bits.
+ const bool CCallingConventionRequiresIntsAsLongs = false;
+ 
+-// RISCV has adopted a multicopy atomic model closely following
+-// that of ARMv8.
+-#define CPU_MULTI_COPY_ATOMIC
+-
+ // To be safe, we deoptimize when we come across an access that needs
+ // patching. This is similar to what is done on aarch64.
+ #define DEOPTIMIZE_WHEN_PATCHING
+
+From 8740928267a831c62f1deb20c910e3c27716bc40 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 21:42:20 +0800
+Subject: [PATCH 120/140] Revert: JDK-8246689: Enable independent compressed
+ oops/class ptrs on Aarch64 JDK-8241825: Make compressed oops and compressed
+ class pointers independent (x86_64, PPC, S390)
+
+---
+ src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
+index 606f0fa0da3..acdf75d324e 100644
+--- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
+@@ -41,6 +41,4 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
+ 
+ #define SUPPORT_RESERVED_STACK_AREA
+ 
+-#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false
+-
+ #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
+
+From 94b40f4efccc19c8ac66eda6c57381a222b02d2d Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 21:50:49 +0800
+Subject: [PATCH 121/140] Revert JDK-8222637: Obsolete NeedsDeoptSuspend
+
+---
+ src/hotspot/cpu/riscv/globals_riscv.hpp | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
+index b78f258a764..a838a377829 100644
+--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
+@@ -32,6 +32,8 @@
+ // Sets the default values for platform dependent flags used by the runtime system.
+ // (see globals.hpp)
+ 
++define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
 +
-+public class RISCV64RegisterMap extends RegisterMap {
+ define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
+ define_pd_global(bool, TrapBasedNullChecks,      false);
+ define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
+
+From 09968c9fc102fd32bc628d3e6fd9d9adcbec4373 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 21:52:44 +0800
+Subject: [PATCH 122/140] Revert JDK-8220051: Remove global safepoint code
+
+---
+ src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
+index acdf75d324e..d6ce8da07b8 100644
+--- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
+@@ -41,4 +41,6 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
+ 
+ #define SUPPORT_RESERVED_STACK_AREA
+ 
++#define THREAD_LOCAL_POLL
 +
-+  /** This is the only public constructor */
-+  public RISCV64RegisterMap(JavaThread thread, boolean updateMap) {
-+    super(thread, updateMap);
-+  }
+ #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
+
+From 2f4fb2b5ac420d456421592dc09b81244636ba4d Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 22:00:52 +0800
+Subject: [PATCH 123/140] Revert JDK-8272873: C2: Inlining should not depend on
+ absolute call site counts
+
+---
+ src/hotspot/cpu/riscv/globals_riscv.hpp | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
+index a838a377829..b4f71c45ec1 100644
+--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
+@@ -41,6 +41,7 @@ define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs
+ define_pd_global(uintx, CodeCacheSegmentSize,    64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
+ define_pd_global(intx, CodeEntryAlignment,       64);
+ define_pd_global(intx, OptoLoopAlignment,        16);
++define_pd_global(intx, InlineFrequencyCount,     100);
+ 
+ #define DEFAULT_STACK_YELLOW_PAGES (2)
+ #define DEFAULT_STACK_RED_PAGES (1)
+
+From 2df3625eea16fc0d45c0e4cf12c9433f0ec070fd Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 22:02:13 +0800
+Subject: [PATCH 124/140] Revert JDK-8220049: Obsolete ThreadLocalHandshakes
+
+---
+ src/hotspot/cpu/riscv/globals_riscv.hpp | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
+index b4f71c45ec1..b7d85373c4a 100644
+--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
+@@ -76,6 +76,8 @@ define_pd_global(bool, CompactStrings, true);
+ // Clear short arrays bigger than one word in an arch-specific way
+ define_pd_global(intx, InitArrayShortSize, BytesPerLong);
+ 
++define_pd_global(bool, ThreadLocalHandshakes, true);
 +
-+  protected RISCV64RegisterMap(RegisterMap map) {
-+    super(map);
-+  }
+ define_pd_global(intx, InlineSmallCode,          1000);
+ 
+ #define ARCH_FLAGS(develop,                                                      \
+
+From a875c4caa423dd727cea1c891b17f4ded97e57d1 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sun, 30 Apr 2023 22:04:32 +0800
+Subject: [PATCH 125/140] Revert: JDK-8243208: Clean up JVMFlag implementation
+ JDK-8236625: Remove writeable macro from JVM flags declaration
+
+---
+ src/hotspot/cpu/riscv/globals_riscv.hpp | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
+index b7d85373c4a..0becd9efd35 100644
+--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
+@@ -82,9 +82,12 @@ define_pd_global(intx, InlineSmallCode,          1000);
+ 
+ #define ARCH_FLAGS(develop,                                                      \
+                    product,                                                      \
++                   diagnostic,                                                   \
++                   experimental,                                                 \
+                    notproduct,                                                   \
+                    range,                                                        \
+-                   constraint)                                                   \
++                   constraint,                                                   \
++                   writeable)                                                    \
+                                                                                  \
+   product(bool, NearCpool, true,                                                 \
+          "constant pool is close to instructions")                               \
+
+From 19a9e6e8c3dba77cf8be0f25b1aec394aeca0b25 Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Wed, 10 May 2023 09:44:12 +0800
+Subject: [PATCH 126/140] Revert JDK-8213436: Obsolete UseMembar &&
+ JDK-8188764: Obsolete AssumeMP and then remove all support for non-MP builds,
+ always enabled
+
+---
+ src/hotspot/cpu/riscv/globals_riscv.hpp    | 2 ++
+ src/hotspot/cpu/riscv/vm_version_riscv.cpp | 2 ++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
+index 0becd9efd35..e820898d87f 100644
+--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
+@@ -64,6 +64,8 @@ define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
+ define_pd_global(bool, RewriteBytecodes,     true);
+ define_pd_global(bool, RewriteFrequentPairs, true);
+ 
++define_pd_global(bool, UseMembar,            true);
 +
-+  public Object clone() {
-+    RISCV64RegisterMap retval = new RISCV64RegisterMap(this);
-+    return retval;
-+  }
+ define_pd_global(bool, PreserveFramePointer, false);
+ 
+ // GC Ergo Flags
+diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+index 50ee7edb708..f13e4269b77 100644
+--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+@@ -139,6 +139,8 @@ void VM_Version::initialize() {
+ #endif // COMPILER2
+ 
+   UNSUPPORTED_OPTION(CriticalJNINatives);
 +
-+  // no PD state to clear or copy:
-+  protected void clearPD() {}
-+  protected void initializePD() {}
-+  protected void initializeFromPD(RegisterMap map) {}
-+  protected Address getLocationPD(VMReg reg) { return null; }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
-index 7d7a6107c..948eabcab 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
-@@ -54,7 +54,7 @@ public class PlatformInfo {
++  FLAG_SET_DEFAULT(UseMembar, true);
+ }
  
-   public static boolean knownCPU(String cpu) {
-     final String[] KNOWN =
--        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"};
-+        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "riscv64"};
+ #ifdef COMPILER2
+
+From 0c4a9d1b6b3b3b31a1c105ff311414ae542764bb Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Mon, 1 May 2023 16:04:15 +0800
+Subject: [PATCH 127/140] Misc adaptations to jdk11u
+
+---
+ src/hotspot/cpu/riscv/c1_globals_riscv.hpp       |  2 +-
+ .../linux_riscv/vm_version_linux_riscv.cpp       | 16 ++++++++--------
+ 2 files changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+index 25e00bea901..9316d4be02e 100644
+--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+@@ -57,7 +57,7 @@ define_pd_global(uintx, CodeCacheMinBlockLength,     1);
+ define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
+ define_pd_global(uintx, MetaspaceSize,               12*M );
+ define_pd_global(bool, NeverActAsServerClassMachine, true );
+-define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
++define_pd_global(uint64_t, MaxRAM,                   1ULL*G);
+ define_pd_global(bool, CICompileOSR,                 true );
+ #endif // !TIERED
+ define_pd_global(bool, UseTypeProfile,               false);
+diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
+index 4623dbfad42..60260854db6 100644
+--- a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
++++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
+@@ -83,14 +83,14 @@ void VM_Version::get_os_cpu_info() {
  
-     for(String s : KNOWN) {
-       if(s.equals(cpu))
-diff --git a/src/utils/hsdis/hsdis.c b/src/utils/hsdis/hsdis.c
-index d0a6f4ea8..a29c7bf8b 100644
---- a/src/utils/hsdis/hsdis.c
-+++ b/src/utils/hsdis/hsdis.c
-@@ -28,9 +28,6 @@
- */
- 
- #include <config.h> /* required by bfd.h */
--#include <errno.h>
--#include <inttypes.h>
--#include <string.h>
+   uint64_t auxv = getauxval(AT_HWCAP);
  
- #include <libiberty.h>
- #include <bfd.h>
-@@ -479,6 +476,9 @@ static const char* native_arch_name() {
- #endif
- #ifdef LIBARCH_s390x
-   res = "s390:64-bit";
-+#endif
-+#ifdef LIBARCH_riscv64
-+  res = "riscv:rv64";
+-  static_assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP");
+-  static_assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP");
+-  static_assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP");
+-  static_assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP");
+-  static_assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP");
+-  static_assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP");
+-  static_assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP");
+-  static_assert(CPU_B == HWCAP_ISA_B, "Flag CPU_B must follow Linux HWCAP");
++  STATIC_ASSERT(CPU_I == HWCAP_ISA_I);
++  STATIC_ASSERT(CPU_M == HWCAP_ISA_M);
++  STATIC_ASSERT(CPU_A == HWCAP_ISA_A);
++  STATIC_ASSERT(CPU_F == HWCAP_ISA_F);
++  STATIC_ASSERT(CPU_D == HWCAP_ISA_D);
++  STATIC_ASSERT(CPU_C == HWCAP_ISA_C);
++  STATIC_ASSERT(CPU_V == HWCAP_ISA_V);
++  STATIC_ASSERT(CPU_B == HWCAP_ISA_B);
+   _features = auxv & (
+       HWCAP_ISA_I |
+       HWCAP_ISA_M |
+
+From 4ce5e05526029360ad15eb9639c9c05fac77ac8e Mon Sep 17 00:00:00 2001
+From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
+Date: Sat, 20 May 2023 17:51:52 +0800
+Subject: [PATCH 128/140] Save all call-clobbered registers for spark tests may
+ crash
+
+---
+ .../cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp    | 10 ++--------
+ 1 file changed, 2 insertions(+), 8 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+index bc847388f68..e191cbcee2a 100644
+--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+@@ -157,21 +157,15 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
+   __ j(done);
+ 
+   __ bind(runtime);
+-  // save the live input values
+-  RegSet saved = RegSet::of(pre_val);
+-  if (tosca_live) { saved += RegSet::of(x10); }
+-  if (obj != noreg) { saved += RegSet::of(obj); }
+-
+-  __ push_reg(saved, sp);
+ 
++  __ push_call_clobbered_registers();
+   if (expand_call) {
+     assert(pre_val != c_rarg1, "smashed arg");
+     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
+   } else {
+     __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
+   }
+-
+-  __ pop_reg(saved, sp);
++  __ pop_call_clobbered_registers();
+ 
+   __ bind(done);
+ 
+
+From 1b8778b0831571e9ac688bbd22afca4cf8f62407 Mon Sep 17 00:00:00 2001
+From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
+Date: Tue, 22 Aug 2023 16:17:31 +0800
+Subject: [PATCH 129/140] Build with gcc 13
+
+---
+ src/hotspot/cpu/riscv/macroAssembler_riscv.cpp        | 1 +
+ src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp | 1 +
+ 2 files changed, 2 insertions(+)
+
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index 37ccf132986..fd18bb77058 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -49,6 +49,7 @@
+ #include "runtime/thread.hpp"
+ #ifdef COMPILER2
+ #include "opto/compile.hpp"
++#include "opto/intrinsicnode.hpp"
+ #include "opto/node.hpp"
+ #include "opto/output.hpp"
  #endif
-   if (res == NULL)
-     res = "architecture not set in Makefile!";
-diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java
-index 7805918c2..a21307083 100644
---- a/test/hotspot/jtreg/compiler/c2/TestBit.java
-+++ b/test/hotspot/jtreg/compiler/c2/TestBit.java
-@@ -1,5 +1,6 @@
- /*
-  * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -34,7 +35,7 @@ import jdk.test.lib.process.ProcessTools;
-  *
-  * @run driver compiler.c2.TestBit
-  *
-- * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le"
-+ * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64"
-  * @requires vm.debug == true & vm.compiler2.enabled
+diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
+index 31d9254d8ad..ccceed643ed 100644
+--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
+@@ -24,6 +24,7 @@
   */
- public class TestBit {
-@@ -54,7 +55,8 @@ public class TestBit {
-         String expectedTestBitInstruction =
-             "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" :
-             "aarch64".equals(System.getProperty("os.arch")) ? "tb"   :
--            "amd64".equals(System.getProperty("os.arch"))   ? "test" : null;
-+            "amd64".equals(System.getProperty("os.arch"))   ? "test" :
-+            "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null;
  
-         if (expectedTestBitInstruction != null) {
-             output.shouldContain(expectedTestBitInstruction);
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
-index 558b4218f..9d875e33f 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
-@@ -1,5 +1,6 @@
- /*
-  * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli;
+ #include "precompiled.hpp"
++#include "memory/metaspaceShared.hpp"
+ #include "runtime/frame.inline.hpp"
+ #include "runtime/thread.inline.hpp"
  
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
- import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
-@@ -54,6 +56,8 @@ public class TestUseSHA1IntrinsicsOptionOnUnsupportedCPU {
-                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
-                 new GenericTestCaseForUnsupportedAArch64CPU(
-                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
-+                new GenericTestCaseForUnsupportedRISCV64CPU(
-+                        SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
-                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
-                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
-                 new GenericTestCaseForOtherCPU(
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
-index 3ed72bf0a..a7e277060 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
-@@ -1,5 +1,6 @@
+
+From 4c23be6665aec94462e82e3b4adcf7abb5b23981 Mon Sep 17 00:00:00 2001
+From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
+Date: Tue, 5 Sep 2023 15:37:43 +0800
+Subject: [PATCH 130/140] Fix copyright information
+
+---
+ make/autoconf/build-aux/config.guess                            | 2 +-
+ .../MyPackage/HeapMonitorEventsForTwoThreadsTest.java           | 1 +
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess
+index 15111d827ab..a88a9adec3f 100644
+--- a/make/autoconf/build-aux/config.guess
++++ b/make/autoconf/build-aux/config.guess
+@@ -1,6 +1,6 @@
+ #!/bin/sh
+ #
+-# Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved.
+ # Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
+ # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ #
+diff --git a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
+index f0b7aed5ceb..54640b245f8 100644
+--- a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
++++ b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
+@@ -1,4 +1,5 @@
  /*
-  * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2018, Google and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli;
+
+From 70a060f73c3617e58f881bcee19f1a3ce43f54ff Mon Sep 17 00:00:00 2001
+From: Chris Plummer <cjplummer@openjdk.org>
+Date: Thu, 2 Jul 2020 13:13:10 -0700
+Subject: [PATCH 131/140] 8247533: SA stack walking sometimes fails with
+ sun.jvm.hotspot.debugger.DebuggerException: get_thread_regs failed for a lwp
+
+Reviewed-by: sspitsyn, ysuenaga, dtitov
+---
+ .../native/libsaproc/LinuxDebuggerLocal.c     |  8 ++++++-
+ .../linux/native/libsaproc/ps_proc.c          |  3 ++-
+ .../native/libsaproc/MacosxDebuggerLocal.m    | 24 ++++++++++++-------
+ .../debugger/bsd/BsdDebuggerLocal.java        |  2 +-
+ .../jvm/hotspot/debugger/bsd/BsdThread.java   | 10 +++++---
+ .../debugger/linux/LinuxDebuggerLocal.java    |  2 +-
+ .../hotspot/debugger/linux/LinuxThread.java   | 10 +++++---
+ .../windbg/amd64/WindbgAMD64Thread.java       | 15 ++++++++----
+ .../windows/native/libsaproc/sawindbg.cpp     | 14 ++++++++---
+ 9 files changed, 61 insertions(+), 27 deletions(-)
+
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+index 45a927fb5ee..6f1887f8113 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+@@ -413,7 +413,13 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
  
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
- import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
-@@ -54,6 +56,8 @@ public class TestUseSHA256IntrinsicsOptionOnUnsupportedCPU {
-                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
-                 new GenericTestCaseForUnsupportedAArch64CPU(
-                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
-+                new GenericTestCaseForUnsupportedRISCV64CPU(
-+                        SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
-                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
-                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
-                 new GenericTestCaseForOtherCPU(
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
-index c05cf309d..e714fcc59 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
-@@ -1,5 +1,6 @@
+   struct ps_prochandle* ph = get_proc_handle(env, this_obj);
+   if (get_lwp_regs(ph, lwp_id, &gregs) != true) {
+-     THROW_NEW_DEBUGGER_EXCEPTION_("get_thread_regs failed for a lwp", 0);
++    // This is not considered fatal and does happen on occassion, usually with an
++    // ESRCH error. The root cause is not fully understood, but by ignoring this error
++    // and returning NULL, stacking walking code will get null registers and fallback
++    // to using the "last java frame" if setup.
++    fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: get_lwp_regs failed for lwp (%d)\n", lwp_id);
++    fflush(stdout);
++    return NULL;
+   }
+ 
+ #undef NPRGREG
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
+index de5254d859e..691c3f6684a 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
+@@ -144,7 +144,8 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
+ 
+ #ifdef PTRACE_GETREGS_REQ
+  if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
+-   print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid);
++   print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid,
++               errno, strerror(errno));
+    return false;
+  }
+  return true;
+diff --git a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
+index 18b8b4282fe..e46370a1f18 100644
+--- a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
++++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
+@@ -685,7 +685,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
+   JNIEnv *env, jobject this_obj,
+   jlong thread_id)
+ {
+-  print_debug("getThreadRegisterSet0 called\n");
++  print_debug("getThreadIntegerRegisterSet0 called\n");
+ 
+   struct ps_prochandle* ph = get_proc_handle(env, this_obj);
+   if (ph != NULL && ph->core != NULL) {
+@@ -705,7 +705,13 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
+   result = thread_get_state(tid, HSDB_THREAD_STATE, (thread_state_t)&state, &count);
+ 
+   if (result != KERN_SUCCESS) {
+-    print_error("getregs: thread_get_state(%d) failed (%d)\n", tid, result);
++    // This is not considered fatal. Unlike on Linux and Windows, we haven't seen a
++    // failure to get thread registers, but if it were to fail the response should
++    // be the same. By ignoring this error and returning NULL, stacking walking code
++    // will get null registers and fallback to using the "last java frame" if setup.
++    fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: thread_get_state failed (%d) for thread (%d)\n",
++            result, tid);
++    fflush(stdout);
+     return NULL;
+   }
+ 
+@@ -808,25 +814,25 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
+  */
+ JNIEXPORT jint JNICALL
+ Java_sun_jvm_hotspot_debugger_macosx_MacOSXDebuggerLocal_translateTID0(
+-  JNIEnv *env, jobject this_obj, jint tid) 
++  JNIEnv *env, jobject this_obj, jint tid)
+ {
+   print_debug("translateTID0 called on tid = 0x%x\n", (int)tid);
+ 
+   kern_return_t result;
+   thread_t foreign_tid, usable_tid;
+   mach_msg_type_name_t type;
+-  
++
+   foreign_tid = tid;
+-    
++
+   task_t gTask = getTask(env, this_obj);
+-  result = mach_port_extract_right(gTask, foreign_tid, 
+-				   MACH_MSG_TYPE_COPY_SEND, 
++  result = mach_port_extract_right(gTask, foreign_tid,
++				   MACH_MSG_TYPE_COPY_SEND,
+ 				   &usable_tid, &type);
+   if (result != KERN_SUCCESS)
+     return -1;
+-    
++
+   print_debug("translateTID0: 0x%x -> 0x%x\n", foreign_tid, usable_tid);
+-    
++
+   return (jint) usable_tid;
+ }
+ 
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
+index 655b450c3fc..d0557a7d254 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
+@@ -166,7 +166,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException
+                 } catch (InterruptedException x) {}
+              }
+              if (lastException != null) {
+-                throw new DebuggerException(lastException);
++                throw new DebuggerException(lastException.getMessage(), lastException);
+              } else {
+                 return task;
+              }
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
+index 0d637f30f14..c52d3a51d54 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
+@@ -1,5 +1,5 @@
  /*
-  * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli;
- 
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
- import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
-@@ -54,6 +56,8 @@ public class TestUseSHA512IntrinsicsOptionOnUnsupportedCPU {
-                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
-                 new GenericTestCaseForUnsupportedAArch64CPU(
-                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
-+                new GenericTestCaseForUnsupportedRISCV64CPU(
-+                        SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
-                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
-                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
-                 new GenericTestCaseForOtherCPU(
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
-index 58ce5366b..d52d81e26 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
-@@ -1,5 +1,6 @@
+@@ -67,8 +67,12 @@ public String toString() {
+     public ThreadContext getContext() throws IllegalThreadStateException {
+         long[] data = debugger.getThreadIntegerRegisterSet(unique_thread_id);
+         ThreadContext context = BsdThreadContextFactory.createThreadContext(debugger);
+-        for (int i = 0; i < data.length; i++) {
+-            context.setRegister(i, data[i]);
++        // null means we failed to get the register set for some reason. The caller
++        // is responsible for dealing with the set of null registers in that case.
++        if (data != null) {
++            for (int i = 0; i < data.length; i++) {
++                context.setRegister(i, data[i]);
++            }
+         }
+         return context;
+     }
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
+index cb6712b58ee..6a0648f508a 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
+@@ -173,7 +173,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException
+                 } catch (InterruptedException x) {}
+              }
+              if (lastException != null) {
+-                throw new DebuggerException(lastException);
++                throw new DebuggerException(lastException.getMessage(), lastException);
+              } else {
+                 return task;
+              }
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
+index 52307b9cdcf..3fe795d34bc 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
+@@ -1,5 +1,5 @@
  /*
-  * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -40,6 +41,7 @@ package compiler.intrinsics.sha.cli;
- 
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
- import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU;
-@@ -53,6 +55,8 @@ public class TestUseSHAOptionOnUnsupportedCPU {
-                         SHAOptionsBase.USE_SHA_OPTION),
-                 new GenericTestCaseForUnsupportedAArch64CPU(
-                         SHAOptionsBase.USE_SHA_OPTION),
-+                new GenericTestCaseForUnsupportedRISCV64CPU(
-+                        SHAOptionsBase.USE_SHA_OPTION),
-                 new UseSHASpecificTestCaseForUnsupportedCPU(
-                         SHAOptionsBase.USE_SHA_OPTION),
-                 new GenericTestCaseForOtherCPU(
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
-index faa9fdbae..50e549069 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
-@@ -1,5 +1,6 @@
+@@ -73,8 +73,12 @@ public String toString() {
+     public ThreadContext getContext() throws IllegalThreadStateException {
+         long[] data = debugger.getThreadIntegerRegisterSet(lwp_id);
+         ThreadContext context = LinuxThreadContextFactory.createThreadContext(debugger);
+-        for (int i = 0; i < data.length; i++) {
+-            context.setRegister(i, data[i]);
++        // null means we failed to get the register set for some reason. The caller
++        // is responsible for dealing with the set of null registers in that case.
++        if (data != null) {
++            for (int i = 0; i < data.length; i++) {
++                context.setRegister(i, data[i]);
++            }
+         }
+         return context;
+     }
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
+index ec5aea35e8c..377650a0a1c 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
+@@ -1,5 +1,5 @@
  /*
-  * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -32,26 +33,27 @@ import jdk.test.lib.cli.predicate.OrPredicate;
+@@ -30,9 +30,9 @@
  
- /**
-  * Generic test case for SHA-related options targeted to any CPU except
-- * AArch64, PPC, S390x, SPARC and X86.
-+ * AArch64, RISCV64, PPC, S390x, SPARC and X86.
-  */
- public class GenericTestCaseForOtherCPU extends
-         SHAOptionsBase.TestCase {
-     public GenericTestCaseForOtherCPU(String optionName) {
--        // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86.
-+        // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, SPARC and X86.
-         super(optionName, new NotPredicate(
-                               new OrPredicate(Platform::isAArch64,
-+                              new OrPredicate(Platform::isRISCV64,
-                               new OrPredicate(Platform::isS390x,
-                               new OrPredicate(Platform::isSparc,
-                               new OrPredicate(Platform::isPPC,
-                               new OrPredicate(Platform::isX64,
--                                              Platform::isX86)))))));
-+                                              Platform::isX86))))))));
+ class WindbgAMD64Thread implements ThreadProxy {
+   private WindbgDebugger debugger;
+-  private long           sysId;
++  private long           sysId; // SystemID for Windows thread, stored in OSThread::_thread_id
+   private boolean        gotID;
+-  private long           id;
++  private long           id;    // ThreadID for Windows thread,  returned by GetThreadIdBySystemId
+ 
+   // The address argument must be the address of the OSThread::_thread_id
+   WindbgAMD64Thread(WindbgDebugger debugger, Address addr) {
+@@ -50,8 +50,12 @@ class WindbgAMD64Thread implements ThreadProxy {
+   public ThreadContext getContext() throws IllegalThreadStateException {
+     long[] data = debugger.getThreadIntegerRegisterSet(getThreadID());
+     WindbgAMD64ThreadContext context = new WindbgAMD64ThreadContext(debugger);
+-    for (int i = 0; i < data.length; i++) {
+-      context.setRegister(i, data[i]);
++    // null means we failed to get the register set for some reason. The caller
++    // is responsible for dealing with the set of null registers in that case.
++    if (data != null) {
++        for (int i = 0; i < data.length; i++) {
++            context.setRegister(i, data[i]);
++        }
+     }
+     return context;
+   }
+@@ -86,6 +90,7 @@ public String toString() {
+   private long getThreadID() {
+     if (!gotID) {
+        id = debugger.getThreadIdFromSysId(sysId);
++       gotID = true;
      }
  
-     @Override
-     protected void verifyWarnings() throws Throwable {
-         String shouldPassMessage = String.format("JVM should start with "
-                 + "option '%s' without any warnings", optionName);
--        // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of
-+        // Verify that on non-x86, non-SPARC, non-AArch64 CPU and non-RISCV64 usage of
-         //  SHA-related options will not cause any warnings.
-         CommandLineOptionTest.verifySameJVMStartup(null,
-                 new String[] { ".*" + optionName + ".*" }, shouldPassMessage,
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
-new file mode 100644
-index 000000000..d81b5b53f
---- /dev/null
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
-@@ -0,0 +1,102 @@
-+/*
-+ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ */
+     return id;
+diff --git a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
+index 314cf69c957..e3b218b4dae 100644
+--- a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
++++ b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
+@@ -45,6 +45,7 @@
+ 
+ #include <limits.h>
+ #include <windows.h>
++#include <inttypes.h>
+ 
+ #define DEBUG_NO_IMPLEMENTATION
+ #include <dbgeng.h>
+@@ -765,9 +766,16 @@ JNIEXPORT jlong JNICALL Java_sun_jvm_hotspot_debugger_windbg_WindbgDebuggerLocal
+   CHECK_EXCEPTION_(0);
+ 
+   ULONG id = 0;
+-  COM_VERIFY_OK_(ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id),
+-                 "Windbg Error: GetThreadIdBySystemId failed!", 0);
+-
++  HRESULT hr = ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id);
++  if (hr != S_OK) {
++    // This is not considered fatal and does happen on occassion, usually with an
++    // 0x80004002 "No such interface supported". The root cause is not fully understood,
++    // but by ignoring this error and returning NULL, stacking walking code will get
++    // null registers and fallback to using the "last java frame" if setup.
++   printf("WARNING: GetThreadIdBySystemId failed with 0x%x for sysId (%" PRIu64 ")\n",
++           hr, sysId);
++    return -1;
++  }
+   return (jlong) id;
+ }
+ 
+
+From 2cadd133d25e05be6ab9b16024a37bed79af1f15 Mon Sep 17 00:00:00 2001
+From: Xiaolin Zheng <xlinzheng@openjdk.org>
+Date: Wed, 30 Mar 2022 09:04:55 +0000
+Subject: [PATCH 132/140] 8283737: riscv: MacroAssembler::stop() should emit
+ fixed-length instruction sequence
+
+Reviewed-by: fyang, shade
+---
+ src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index fd18bb77058..b72a553da2f 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -542,8 +542,11 @@ void MacroAssembler::resolve_jobject(Register value, Register thread, Register t
+ void MacroAssembler::stop(const char* msg) {
+   address ip = pc();
+   pusha();
+-  li(c_rarg0, (uintptr_t)(address)msg);
+-  li(c_rarg1, (uintptr_t)(address)ip);
++  // The length of the instruction sequence emitted should be independent
++  // of the values of msg and ip so that the size of mach nodes for scratch
++  // emit and normal emit matches.
++  mv(c_rarg0, (address)msg);
++  mv(c_rarg1, (address)ip);
+   mv(c_rarg2, sp);
+   mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
+   jalr(c_rarg3);
+
+From 729e0db14cb320aedf1f12051e667513bddbb8e8 Mon Sep 17 00:00:00 2001
+From: Xiaolin Zheng <xlinzheng@openjdk.org>
+Date: Sun, 24 Apr 2022 02:17:03 +0000
+Subject: [PATCH 133/140] 8285437: riscv: Fix MachNode size mismatch for
+ MacroAssembler::verify_oops*
+
+Reviewed-by: shade, fyang
+---
+ src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index b72a553da2f..9f80f7e2650 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -389,7 +389,10 @@ void MacroAssembler::verify_oop(Register reg, const char* s) {
+   push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
+ 
+   mv(c_rarg0, reg); // c_rarg0 : x10
+-  li(t0, (uintptr_t)(address)b);
++  // The length of the instruction sequence emitted should be independent
++  // of the values of the local char buffer address so that the size of mach
++  // nodes for scratch emit and normal emit matches.
++  mv(t0, (address)b);
+ 
+   // call indirectly to solve generation ordering problem
+   int32_t offset = 0;
+@@ -425,7 +428,10 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
+     ld(x10, addr);
+   }
+ 
+-  li(t0, (uintptr_t)(address)b);
++  // The length of the instruction sequence emitted should be independent
++  // of the values of the local char buffer address so that the size of mach
++  // nodes for scratch emit and normal emit matches.
++  mv(t0, (address)b);
+ 
+   // call indirectly to solve generation ordering problem
+   int32_t offset = 0;
+
+From 5cab06c6f09f4b62d54d8d291b1a23f796a085c1 Mon Sep 17 00:00:00 2001
+From: Xiaolin Zheng <xlinzheng@openjdk.org>
+Date: Mon, 30 May 2022 07:45:50 +0000
+Subject: [PATCH 134/140] 8287418: riscv: Fix correctness issue of
+ MacroAssembler::movptr
+
+Reviewed-by: fjiang, yadongwang, fyang
+---
+ src/hotspot/cpu/riscv/assembler_riscv.cpp      | 14 +++++++-------
+ src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 18 +++++++++---------
+ src/hotspot/cpu/riscv/macroAssembler_riscv.hpp |  3 ++-
+ src/hotspot/cpu/riscv/nativeInst_riscv.cpp     |  2 +-
+ 4 files changed, 19 insertions(+), 18 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp
+index f15ef5304c5..a5f688cda1f 100644
+--- a/src/hotspot/cpu/riscv/assembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp
+@@ -282,9 +282,9 @@ void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) {
+   }
+ #endif
+   assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1),
+-         "48-bit overflow in address constant");
+-  // Load upper 32 bits
+-  int32_t imm = imm64 >> 16;
++         "bit 47 overflows in address constant");
++  // Load upper 31 bits
++  int32_t imm = imm64 >> 17;
+   int64_t upper = imm, lower = imm;
+   lower = (lower << 52) >> 52;
+   upper -= lower;
+@@ -292,13 +292,13 @@ void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) {
+   lui(Rd, upper);
+   addi(Rd, Rd, lower);
+ 
+-  // Load the rest 16 bits.
++  // Load the rest 17 bits.
+   slli(Rd, Rd, 11);
+-  addi(Rd, Rd, (imm64 >> 5) & 0x7ff);
+-  slli(Rd, Rd, 5);
++  addi(Rd, Rd, (imm64 >> 6) & 0x7ff);
++  slli(Rd, Rd, 6);
+ 
+   // This offset will be used by following jalr/ld.
+-  offset = imm64 & 0x1f;
++  offset = imm64 & 0x3f;
+ }
+ 
+ void Assembler::movptr(Register Rd, uintptr_t imm64) {
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index 9f80f7e2650..f592d7585da 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -1158,12 +1158,12 @@ static int patch_offset_in_pc_relative(address branch, int64_t offset) {
+ 
+ static int patch_addr_in_movptr(address branch, address target) {
+   const int MOVPTR_INSTRUCTIONS_NUM = 6;                                        // lui + addi + slli + addi + slli + addi/jalr/load
+-  int32_t lower = ((intptr_t)target << 36) >> 36;
+-  int64_t upper = ((intptr_t)target - lower) >> 28;
+-  Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[47:28] + target[27] ==> branch[31:12]
+-  Assembler::patch(branch + 4,  31, 20, (lower >> 16) & 0xfff);                 // Addi.            target[27:16] ==> branch[31:20]
+-  Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff);                  // Addi.            target[15: 5] ==> branch[31:20]
+-  Assembler::patch(branch + 20, 31, 20, lower & 0x1f);                          // Addi/Jalr/Load.  target[ 4: 0] ==> branch[31:20]
++  int32_t lower = ((intptr_t)target << 35) >> 35;
++  int64_t upper = ((intptr_t)target - lower) >> 29;
++  Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[48:29] + target[28] ==> branch[31:12]
++  Assembler::patch(branch + 4,  31, 20, (lower >> 17) & 0xfff);                 // Addi.            target[28:17] ==> branch[31:20]
++  Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff);                  // Addi.            target[16: 6] ==> branch[31:20]
++  Assembler::patch(branch + 20, 31, 20, lower & 0x3f);                          // Addi/Jalr/Load.  target[ 5: 0] ==> branch[31:20]
+   return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
+ }
+ 
+@@ -1235,9 +1235,9 @@ static long get_offset_of_pc_relative(address insn_addr) {
+ 
+ static address get_target_of_movptr(address insn_addr) {
+   assert_cond(insn_addr != NULL);
+-  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28;    // Lui.
+-  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16;                        // Addi.
+-  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5;                         // Addi.
++  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29;    // Lui.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17;                        // Addi.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6;                         // Addi.
+   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20));                              // Addi/Jalr/Load.
+   return (address) target_address;
+ }
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index 45ffc663963..792c1fc2103 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -827,7 +827,8 @@ class MacroAssembler: public Assembler {
+ 
+   // Return true if an address is within the 48-bit RISCV64 address space.
+   bool is_valid_riscv64_address(address addr) {
+-    return ((uintptr_t)addr >> 48) == 0;
++    // sv48: must have bits 63–48 all equal to bit 47
++    return ((uintptr_t)addr >> 47) == 0;
+   }
+ 
+   void ld_constant(Register dest, const Address &const_addr) {
+diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
+index bfe84fa4e30..27011ad1283 100644
+--- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
+@@ -89,7 +89,7 @@ bool NativeInstruction::is_movptr_at(address instr) {
+          is_addi_at(instr + instruction_size) && // Addi
+          is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11
+          is_addi_at(instr + instruction_size * 3) && // Addi
+-         is_slli_shift_at(instr + instruction_size * 4, 5) && // Slli Rd, Rs, 5
++         is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6
+          (is_addi_at(instr + instruction_size * 5) ||
+           is_jalr_at(instr + instruction_size * 5) ||
+           is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load
+
+From 41d73298bf28473b3ba2483e61a39c188eddfde3 Mon Sep 17 00:00:00 2001
+From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
+Date: Fri, 22 Sep 2023 16:57:56 +0800
+Subject: [PATCH 135/140] Fix: Fixed-length mv() mistakenly redirected to li()
+ during reshaping
+
+---
+ src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 6 ++++++
+ src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 3 +--
+ 2 files changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+index f592d7585da..f851cc1e413 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -1343,6 +1343,12 @@ void MacroAssembler::mv(Register Rd, Address dest) {
+   movptr(Rd, dest.target());
+ }
+ 
++void MacroAssembler::mv(Register Rd, address addr) {
++  // Here in case of use with relocation, use fix length instruction
++  // movptr instead of li
++  movptr(Rd, addr);
++}
++
+ void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
+   if (src.is_register()) {
+     mv(Rd, src.as_register());
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index 792c1fc2103..65f91532661 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -540,8 +540,6 @@ class MacroAssembler: public Assembler {
+   }
+ 
+   // mv
+-  void mv(Register Rd, address addr)                    { li(Rd, (int64_t)addr);  }
+-
+   inline void mv(Register Rd, int imm64)                { li(Rd, (int64_t)imm64); }
+   inline void mv(Register Rd, long imm64)               { li(Rd, (int64_t)imm64); }
+   inline void mv(Register Rd, long long imm64)          { li(Rd, (int64_t)imm64); }
+@@ -552,6 +550,7 @@ class MacroAssembler: public Assembler {
+   inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
+ 
+   void mv(Register Rd, Address dest);
++  void mv(Register Rd, address dest);
+   void mv(Register Rd, RegisterOrConstant src);
+ 
+   // logic
+
+From 26f4b26a98507ec03a2329bfcbaab393247fe83f Mon Sep 17 00:00:00 2001
+From: Xiaolin Zheng <xlinzheng@openjdk.org>
+Date: Fri, 2 Sep 2022 07:01:02 +0000
+Subject: [PATCH 136/140] 8293100: RISC-V: Need to save and restore
+ callee-saved FloatRegisters in StubGenerator::generate_call_stub
+
+Reviewed-by: yadongwang, fjiang, shade, vkempik
+---
+ src/hotspot/cpu/riscv/frame_riscv.hpp         |  2 +-
+ src/hotspot/cpu/riscv/riscv.ad                | 18 ++---
+ src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 74 +++++++++++++++++--
+ src/hotspot/cpu/riscv/vmreg_riscv.cpp         |  2 +-
+ 4 files changed, 80 insertions(+), 16 deletions(-)
+
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
+index 3b88f6d5a1a..18e021dcb94 100644
+--- a/src/hotspot/cpu/riscv/frame_riscv.hpp
++++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
+@@ -131,7 +131,7 @@
+     // Entry frames
+     // n.b. these values are determined by the layout defined in
+     // stubGenerator for the Java call stub
+-    entry_frame_after_call_words                     =  22,
++    entry_frame_after_call_words                     =  34,
+     entry_frame_call_wrapper_offset                  = -10,
+ 
+     // we don't need a save area
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+index e410bd06aa6..69696b272a5 100644
+--- a/src/hotspot/cpu/riscv/riscv.ad
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -8601,7 +8601,7 @@ instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
+   effect(USE lbl);
+ 
+   ins_cost(XFER_COST + BRANCH_COST);
+-  format %{ "float_b$cmp $op1, $op2 \t#@cmpF_branch"%}
++  format %{ "float_b$cmp $op1, $op2, $lbl \t#@cmpF_branch"%}
+ 
+   ins_encode %{
+     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
+@@ -8618,7 +8618,7 @@ instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
+   effect(USE lbl);
+ 
+   ins_cost(XFER_COST + BRANCH_COST);
+-  format %{ "float_b$cmp $op1, $op2\t#@cmpF_loop"%}
++  format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%}
+ 
+   ins_encode %{
+     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
+@@ -8636,7 +8636,7 @@ instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+   effect(USE lbl);
+ 
+   ins_cost(XFER_COST + BRANCH_COST);
+-  format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%}
++  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%}
+ 
+   ins_encode %{
+     __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+@@ -8654,7 +8654,7 @@ instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+   effect(USE lbl);
+ 
+   ins_cost(XFER_COST + BRANCH_COST);
+-  format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%}
++  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%}
+ 
+   ins_encode %{
+     __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+@@ -8929,7 +8929,7 @@ instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{
+   effect(USE lbl);
+ 
+   ins_cost(BRANCH_COST);
+-  format %{ "far_b$cmp $cr, zr, L\t#@far_cmpFlag_branch"%}
++  format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%}
+ 
+   ins_encode %{
+     __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true);
+@@ -9138,7 +9138,7 @@ instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
+   effect(USE lbl);
+ 
+   ins_cost(XFER_COST + BRANCH_COST * 2);
+-  format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_branch"%}
++  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%}
+ 
+   ins_encode %{
+     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
+@@ -9154,7 +9154,7 @@ instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
+   effect(USE lbl);
+ 
+   ins_cost(XFER_COST + BRANCH_COST * 2);
+-  format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_loop"%}
++  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%}
+ 
+   ins_encode %{
+     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
+@@ -9171,7 +9171,7 @@ instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+   effect(USE lbl);
+ 
+   ins_cost(XFER_COST + BRANCH_COST * 2);
+-  format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%}
++  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%}
+ 
+   ins_encode %{
+     __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+@@ -9187,7 +9187,7 @@ instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+   effect(USE lbl);
+ 
+   ins_cost(XFER_COST + BRANCH_COST * 2);
+-  format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%}
++  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%}
+ 
+   ins_encode %{
+     __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+index 74c38c3d044..9970229c5c5 100644
+--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+@@ -118,16 +118,28 @@ class StubGenerator: public StubCodeGenerator {
+   // we don't need to save x6-x7 and x28-x31 which both C and Java treat as
+   // volatile
+   //
+-  // we save x18-x27 which Java uses as temporary registers and C
+-  // expects to be callee-save
++  // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary
++  // registers and C expects to be callee-save
+   //
+   // so the stub frame looks like this when we enter Java code
+   //
+   //     [ return_from_Java     ] <--- sp
+   //     [ argument word n      ]
+   //      ...
+-  // -22 [ argument word 1      ]
+-  // -21 [ saved x27            ] <--- sp_after_call
++  // -34 [ argument word 1      ]
++  // -33 [ saved f27            ] <--- sp_after_call
++  // -32 [ saved f26            ]
++  // -31 [ saved f25            ]
++  // -30 [ saved f24            ]
++  // -29 [ saved f23            ]
++  // -28 [ saved f22            ]
++  // -27 [ saved f21            ]
++  // -26 [ saved f20            ]
++  // -25 [ saved f19            ]
++  // -24 [ saved f18            ]
++  // -23 [ saved f9             ]
++  // -22 [ saved f8             ]
++  // -21 [ saved x27            ]
+   // -20 [ saved x26            ]
+   // -19 [ saved x25            ]
+   // -18 [ saved x24            ]
+@@ -152,7 +164,20 @@ class StubGenerator: public StubCodeGenerator {
+ 
+   // Call stub stack layout word offsets from fp
+   enum call_stub_layout {
+-    sp_after_call_off  = -21,
++    sp_after_call_off  = -33,
++
++    f27_off            = -33,
++    f26_off            = -32,
++    f25_off            = -31,
++    f24_off            = -30,
++    f23_off            = -29,
++    f22_off            = -28,
++    f21_off            = -27,
++    f20_off            = -26,
++    f19_off            = -25,
++    f18_off            = -24,
++    f9_off             = -23,
++    f8_off             = -22,
+ 
+     x27_off            = -21,
+     x26_off            = -20,
+@@ -198,6 +223,19 @@ class StubGenerator: public StubCodeGenerator {
+ 
+     const Address thread        (fp, thread_off         * wordSize);
+ 
++    const Address f27_save      (fp, f27_off            * wordSize);
++    const Address f26_save      (fp, f26_off            * wordSize);
++    const Address f25_save      (fp, f25_off            * wordSize);
++    const Address f24_save      (fp, f24_off            * wordSize);
++    const Address f23_save      (fp, f23_off            * wordSize);
++    const Address f22_save      (fp, f22_off            * wordSize);
++    const Address f21_save      (fp, f21_off            * wordSize);
++    const Address f20_save      (fp, f20_off            * wordSize);
++    const Address f19_save      (fp, f19_off            * wordSize);
++    const Address f18_save      (fp, f18_off            * wordSize);
++    const Address f9_save       (fp, f9_off             * wordSize);
++    const Address f8_save       (fp, f8_off             * wordSize);
 +
-+package compiler.intrinsics.sha.cli.testcases;
+     const Address x27_save      (fp, x27_off            * wordSize);
+     const Address x26_save      (fp, x26_off            * wordSize);
+     const Address x25_save      (fp, x25_off            * wordSize);
+@@ -244,6 +282,19 @@ class StubGenerator: public StubCodeGenerator {
+     __ sd(x26, x26_save);
+     __ sd(x27, x27_save);
+ 
++    __ fsd(f8,  f8_save);
++    __ fsd(f9,  f9_save);
++    __ fsd(f18, f18_save);
++    __ fsd(f19, f19_save);
++    __ fsd(f20, f20_save);
++    __ fsd(f21, f21_save);
++    __ fsd(f22, f22_save);
++    __ fsd(f23, f23_save);
++    __ fsd(f24, f24_save);
++    __ fsd(f25, f25_save);
++    __ fsd(f26, f26_save);
++    __ fsd(f27, f27_save);
 +
-+import compiler.intrinsics.sha.cli.SHAOptionsBase;
-+import jdk.test.lib.process.ExitCode;
-+import jdk.test.lib.Platform;
-+import jdk.test.lib.cli.CommandLineOptionTest;
-+import jdk.test.lib.cli.predicate.AndPredicate;
-+import jdk.test.lib.cli.predicate.NotPredicate;
+     // install Java thread in global register now we have saved
+     // whatever value it held
+     __ mv(xthread, c_rarg7);
+@@ -335,6 +386,19 @@ class StubGenerator: public StubCodeGenerator {
+ #endif
+ 
+     // restore callee-save registers
++    __ fld(f27, f27_save);
++    __ fld(f26, f26_save);
++    __ fld(f25, f25_save);
++    __ fld(f24, f24_save);
++    __ fld(f23, f23_save);
++    __ fld(f22, f22_save);
++    __ fld(f21, f21_save);
++    __ fld(f20, f20_save);
++    __ fld(f19, f19_save);
++    __ fld(f18, f18_save);
++    __ fld(f9,  f9_save);
++    __ fld(f8,  f8_save);
 +
-+/**
-+ * Generic test case for SHA-related options targeted to RISCV64 CPUs
-+ * which don't support instruction required by the tested option.
-+ */
-+public class GenericTestCaseForUnsupportedRISCV64CPU extends
-+        SHAOptionsBase.TestCase {
-+    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) {
-+        super(optionName, new AndPredicate(Platform::isRISCV64,
-+                new NotPredicate(SHAOptionsBase.getPredicateForOption(
-+                        optionName))));
-+    }
+     __ ld(x27, x27_save);
+     __ ld(x26, x26_save);
+     __ ld(x25, x25_save);
+diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
+index 5d1187c2a27..c4338715f95 100644
+--- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp
++++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
+@@ -40,7 +40,7 @@ void VMRegImpl::set_regName() {
+   FloatRegister freg = ::as_FloatRegister(0);
+   for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
+     for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
+-      regName[i++] = reg->name();
++      regName[i++] = freg->name();
+     }
+     freg = freg->successor();
+   }
+
+From 69ea557c320ad7b2f35fc0e986af9b485f95addf Mon Sep 17 00:00:00 2001
+From: Xiaolin Zheng <xlinzheng@openjdk.org>
+Date: Fri, 28 Oct 2022 11:56:21 +0000
+Subject: [PATCH 137/140] 8295926: RISC-V: C1: Fix
+ LIRGenerator::do_LibmIntrinsic
+
+Reviewed-by: yadongwang, fyang
+---
+ .../cpu/riscv/c1_LIRGenerator_riscv.cpp       | 21 +++--
+ .../floatingpoint/TestLibmIntrinsics.java     | 80 +++++++++++++++++++
+ 2 files changed, 96 insertions(+), 5 deletions(-)
+ create mode 100644 test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
+
+diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+index f9242251491..c41819fc2ae 100644
+--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+@@ -679,19 +679,30 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
+ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
+   LIRItem value(x->argument_at(0), this);
+   value.set_destroys_register();
 +
-+    @Override
-+    protected void verifyWarnings() throws Throwable {
-+        String shouldPassMessage = String.format("JVM startup should pass with"
-+                + "option '-XX:-%s' without any warnings", optionName);
-+        //Verify that option could be disabled without any warnings.
-+        CommandLineOptionTest.verifySameJVMStartup(null, new String[] {
-+                        SHAOptionsBase.getWarningForUnsupportedCPU(optionName)
-+                }, shouldPassMessage, shouldPassMessage, ExitCode.OK,
-+                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                CommandLineOptionTest.prepareBooleanFlag(optionName, false));
+   LIR_Opr calc_result = rlock_result(x);
+   LIR_Opr result_reg = result_register_for(x->type());
 +
-+        shouldPassMessage = String.format("If JVM is started with '-XX:-"
-+                + "%s' '-XX:+%s', output should contain warning.",
-+                SHAOptionsBase.USE_SHA_OPTION, optionName);
-+
-+        // Verify that when the tested option is enabled, then
-+        // a warning will occur in VM output if UseSHA is disabled.
-+        if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) {
-+            CommandLineOptionTest.verifySameJVMStartup(
-+                    new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) },
-+                    null,
-+                    shouldPassMessage,
-+                    shouldPassMessage,
-+                    ExitCode.OK,
-+                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                    CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false),
-+                    CommandLineOptionTest.prepareBooleanFlag(optionName, true));
-+        }
-+    }
+   CallingConvention* cc = NULL;
+-  BasicTypeList signature(1);
+-  signature.append(T_DOUBLE);
+-  if (x->id() == vmIntrinsics::_dpow) { signature.append(T_DOUBLE); }
+-  cc = frame_map()->c_calling_convention(&signature);
+-  value.load_item_force(cc->at(0));
 +
-+    @Override
-+    protected void verifyOptionValues() throws Throwable {
-+        // Verify that option is disabled by default.
-+        CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-+                String.format("Option '%s' should be disabled by default",
-+                        optionName),
-+                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
+   if (x->id() == vmIntrinsics::_dpow) {
+     LIRItem value1(x->argument_at(1), this);
 +
-+        // Verify that option is disabled even if it was explicitly enabled
-+        // using CLI options.
-+        CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-+                String.format("Option '%s' should be off on unsupported "
-+                        + "RISCV64CPU even if set to true directly", optionName),
-+                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                CommandLineOptionTest.prepareBooleanFlag(optionName, true));
+     value1.set_destroys_register();
 +
-+        // Verify that option is disabled when +UseSHA was passed to JVM.
-+        CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-+                String.format("Option '%s' should be off on unsupported "
-+                        + "RISCV64CPU even if %s flag set to JVM",
-+                        optionName, CommandLineOptionTest.prepareBooleanFlag(
-+                            SHAOptionsBase.USE_SHA_OPTION, true)),
-+                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                CommandLineOptionTest.prepareBooleanFlag(
-+                        SHAOptionsBase.USE_SHA_OPTION, true));
-+    }
-+}
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java b/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java
++    BasicTypeList signature(2);
++    signature.append(T_DOUBLE);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
+     value1.load_item_force(cc->at(1));
++  } else {
++    BasicTypeList signature(1);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
+   }
++
+   switch (x->id()) {
+     case vmIntrinsics::_dexp:
+       if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); }
+diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
 new file mode 100644
-index 000000000..d3aafec8e
+index 00000000000..5c711efddea
 --- /dev/null
-+++ b/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java
-@@ -0,0 +1,153 @@
++++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
+@@ -0,0 +1,80 @@
 +/*
-+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
++ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Alibaba Group Holding Limited. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -60386,701 +82027,420 @@ index 000000000..d3aafec8e
 +
 +/*
 + * @test
-+ * @bug 8173585
-+ * @summary Test intrinsification of StringLatin1.indexOf(char). Note that
-+ * differing code paths are taken contingent upon the length of the input String.
-+ * Hence we must test against differing string lengths in order to validate
-+ * correct functionality. We also ensure the strings are long enough to trigger
-+ * the looping conditions of the individual code paths.
-+ *
-+ * Run with varing levels of AVX and SSE support, also without the intrinsic at all
-+ *
-+ * @library /compiler/patches /test/lib
-+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 compiler.intrinsics.string.TestStringLatin1IndexOfChar
-+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+UnlockDiagnosticVMOptions -XX:DisableIntrinsic=_indexOfL_char compiler.intrinsics.string.TestStringLatin1IndexOfChar
-+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseSSE=0 compiler.intrinsics.string.TestStringLatin1IndexOfChar
-+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=1 compiler.intrinsics.string.TestStringLatin1IndexOfChar
-+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=2 compiler.intrinsics.string.TestStringLatin1IndexOfChar
-+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=3 compiler.intrinsics.string.TestStringLatin1IndexOfChar
++ * @summary Test libm intrinsics
++ * @library /test/lib /
++ *
++ * @build jdk.test.whitebox.WhiteBox
++ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
++ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
++ *                   -XX:-BackgroundCompilation -XX:-UseOnStackReplacement
++ *                   compiler.floatingpoint.TestLibmIntrinsics
 + */
 +
-+package compiler.intrinsics.string;
-+
-+import jdk.test.lib.Asserts;
-+
-+public class TestStringLatin1IndexOfChar{
-+    private final static int MAX_LENGTH = 2048;//future proof for AVX-512 instructions
-+
-+    public static void main(String[] args) throws Exception {
-+        for (int i = 0; i < 1_000; ++i) {//repeat such that we enter into C2 code...
-+            findOneItem();
-+            withOffsetTest();
-+            testEmpty();
-+        }
-+    }
++package compiler.floatingpoint;
 +
-+    private static void testEmpty(){
-+        Asserts.assertEQ("".indexOf('a'), -1);
-+    }
++import compiler.whitebox.CompilerWhiteBoxTest;
++import jdk.test.whitebox.WhiteBox;
 +
-+    private final static char SEARCH_CHAR = 'z';
-+    private final static char INVERLEAVING_CHAR = 'a';
-+    private final static char MISSING_CHAR = 'd';
++import java.lang.reflect.Method;
 +
-+    private static void findOneItem(){
-+        //test strings of varying length ensuring that for all lengths one instance of the
-+        //search char can be found. We check what happens when the search character is in
-+        //each position of the search string (including first and last positions)
-+        for(int strLength : new int[]{1, 15, 31, 32, 79}){
-+            for(int searchPos = 0; searchPos < strLength; searchPos++){
-+                String totest = makeOneItemStringLatin1(strLength, searchPos);
++public class TestLibmIntrinsics {
 +
-+                int intri = totest.indexOf(SEARCH_CHAR);
-+                int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, 0);
-+                Asserts.assertEQ(intri, nonintri);
-+            }
-+        }
-+    }
++    private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
 +
-+    private static String makeOneItemStringLatin1(int length, int searchPos){
-+        StringBuilder sb = new StringBuilder(length);
++    private static final double pi = 3.1415926;
 +
-+        for(int n =0; n < length; n++){
-+            sb.append(searchPos==n?SEARCH_CHAR:INVERLEAVING_CHAR);
-+        }
++    private static final double expected = 2.5355263553695413;
 +
-+        return sb.toString();
++    static double m() {
++        return Math.pow(pi, Math.sin(Math.cos(Math.tan(Math.log(Math.log10(Math.exp(pi)))))));
 +    }
 +
-+    private static void withOffsetTest(){
-+        //progressivly move through string checking indexes and starting offset correctly processed
-+        //string is of form azaza, aazaazaa, aaazaaazaaa, etc
-+        //we find n s.t. maxlength = (n*3) + 2
-+        int maxaInstances = (MAX_LENGTH-2)/3;
-+
-+        for(int aInstances = 5; aInstances < MAX_LENGTH; aInstances++){
-+            String totest = makeWithOffsetStringLatin1(aInstances);
-+
-+            int startoffset;
-+            {
-+                int intri = totest.indexOf(SEARCH_CHAR);
-+                int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, 0);
-+
-+                Asserts.assertEQ(intri, nonintri);
-+                startoffset = intri+1;
-+            }
-+
-+            {
-+                int intri = totest.indexOf(SEARCH_CHAR, startoffset);
-+                int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, startoffset);
-+
-+                Asserts.assertEQ(intri, nonintri);
-+                startoffset = intri+1;
-+            }
++    static public void main(String[] args) throws NoSuchMethodException {
++        Method test_method = compiler.floatingpoint.TestLibmIntrinsics.class.getDeclaredMethod("m");
 +
-+            Asserts.assertEQ(totest.indexOf(SEARCH_CHAR, startoffset), -1);//only two SEARCH_CHAR per string
-+            Asserts.assertEQ(totest.indexOf(MISSING_CHAR), -1);
-+        }
-+    }
++        double interpreter_result = m();
 +
-+    private static String makeWithOffsetStringLatin1(int aInstances){
-+        StringBuilder sb = new StringBuilder((aInstances*3) + 2);
-+        for(int n =0; n < aInstances; n++){
-+            sb.append(INVERLEAVING_CHAR);
-+        }
++        // Compile with C1 if possible
++        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE);
 +
-+        sb.append(SEARCH_CHAR);
++        double c1_result = m();
 +
-+        for(int n =0; n < aInstances; n++){
-+            sb.append(INVERLEAVING_CHAR);
-+        }
++        WHITE_BOX.deoptimizeMethod(test_method);
 +
-+        sb.append(SEARCH_CHAR);
++        // Compile it with C2 if possible
++        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
 +
-+        for(int n =0; n < aInstances; n++){
-+            sb.append(INVERLEAVING_CHAR);
-+        }
-+        return sb.toString();
-+    }
++        double c2_result = m();
 +
-+    private static int indexOfCharNonIntrinsic(String value, int ch, int fromIndex) {
-+        //non intrinsic version of indexOfChar
-+        byte c = (byte)ch;
-+        for (int i = fromIndex; i < value.length(); i++) {
-+            if (value.charAt(i) == c) {
-+               return i;
-+            }
++        if (interpreter_result != c1_result ||
++            interpreter_result != c2_result ||
++            c1_result != c2_result) {
++            System.out.println("interpreter = " + interpreter_result + " c1 = " + c1_result + " c2 = " + c2_result);
++            throw new RuntimeException("Test Failed");
 +        }
-+        return -1;
 +    }
 +}
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
-index 2e3e2717a..8093d6598 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
-index 0e06a9e43..1ff9f36e1 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
-index c3cdbf374..f3531ea74 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
-index d33bd411f..589209447 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions
-  *      -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
-index 992fa4b51..907e21371 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8138583
-  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test
-- * @requires os.arch=="aarch64"
-+ * @requires os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
-index 3e79b3528..c41c0b606 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8138583
-  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test
-- * @requires os.arch=="aarch64"
-+ * @requires os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
-index 6603dd224..b626da40d 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8135028
-  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
-index d9a0c9880..92cd84a2f 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
-index 722db95ae..e72345799 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
-index f58f21feb..f4f67cf52 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+
+From ec57f23aa4001315a030cacd55aa5ef7c3269fbb Mon Sep 17 00:00:00 2001
+From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
+Date: Mon, 9 Oct 2023 11:07:34 +0800
+Subject: [PATCH 138/140] Fix test error after port 8295926
+
+---
+ .../jtreg/compiler/floatingpoint/TestLibmIntrinsics.java    | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
+index 5c711efddea..5a1b659bbe0 100644
+--- a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
++++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
+@@ -27,8 +27,8 @@
+  * @summary Test libm intrinsics
+  * @library /test/lib /
   *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
-index acb86812d..c5e38ba72 100644
---- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
-+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
-@@ -24,7 +24,7 @@
+- * @build jdk.test.whitebox.WhiteBox
+- * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
++ * @build sun.hotspot.WhiteBox
++ * @run driver ClassFileInstaller sun.hotspot.WhiteBox
+  * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+  *                   -XX:-BackgroundCompilation -XX:-UseOnStackReplacement
+  *                   compiler.floatingpoint.TestLibmIntrinsics
+@@ -37,7 +37,7 @@
+ package compiler.floatingpoint;
  
- /* @test
-  * @bug 8167409
-- * @requires (os.arch != "aarch64") & (os.arch != "arm")
-+ * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64")
-  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs
-  */
- package compiler.runtime.criticalnatives.argumentcorruption;
-diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
-index eab36f931..4437367b6 100644
---- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
-+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
-@@ -24,7 +24,7 @@
+ import compiler.whitebox.CompilerWhiteBoxTest;
+-import jdk.test.whitebox.WhiteBox;
++import sun.hotspot.WhiteBox;
  
- /* @test
-  * @bug 8167408
-- * @requires (os.arch != "aarch64") & (os.arch != "arm")
-+ * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64")
-  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp
-  */
- package compiler.runtime.criticalnatives.lookup;
-diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
-index 7774dabcb..284b51019 100644
---- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
-+++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
-@@ -61,15 +61,17 @@ public class IntrinsicPredicates {
+ import java.lang.reflect.Method;
  
-     public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE
-             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null),
-+              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null),
-               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha1" }, null),
-               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha1" }, null),
-               // x86 variants
-               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha" },  null),
-               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha" },  null),
--                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null))))));
-+                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null)))))));
+
+From b115ec4381ad3ad8cbe9ca3d225cb438538916ac Mon Sep 17 00:00:00 2001
+From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
+Date: Tue, 17 Oct 2023 14:22:49 +0800
+Subject: [PATCH 139/140] Revert JDK-8247533: SA stack walking sometimes fails
+ with sun.jvm.hotspot.debugger.DebuggerException: get_thread_regs failed for a
+ lwp
+
+---
+ .../native/libsaproc/LinuxDebuggerLocal.c     |  8 +------
+ .../linux/native/libsaproc/ps_proc.c          |  3 +--
+ .../native/libsaproc/MacosxDebuggerLocal.m    | 24 +++++++------------
+ .../debugger/bsd/BsdDebuggerLocal.java        |  2 +-
+ .../jvm/hotspot/debugger/bsd/BsdThread.java   | 10 +++-----
+ .../debugger/linux/LinuxDebuggerLocal.java    |  2 +-
+ .../hotspot/debugger/linux/LinuxThread.java   | 10 +++-----
+ .../windbg/amd64/WindbgAMD64Thread.java       | 15 ++++--------
+ .../windows/native/libsaproc/sawindbg.cpp     | 14 +++--------
+ 9 files changed, 27 insertions(+), 61 deletions(-)
+
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+index 6f1887f8113..45a927fb5ee 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+@@ -413,13 +413,7 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
  
-     public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE
-             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256"       }, null),
-+              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha256"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha256"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha256"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
-@@ -79,10 +81,11 @@ public class IntrinsicPredicates {
-               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
-               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
-               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
--                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
-+                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
+   struct ps_prochandle* ph = get_proc_handle(env, this_obj);
+   if (get_lwp_regs(ph, lwp_id, &gregs) != true) {
+-    // This is not considered fatal and does happen on occassion, usually with an
+-    // ESRCH error. The root cause is not fully understood, but by ignoring this error
+-    // and returning NULL, stacking walking code will get null registers and fallback
+-    // to using the "last java frame" if setup.
+-    fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: get_lwp_regs failed for lwp (%d)\n", lwp_id);
+-    fflush(stdout);
+-    return NULL;
++     THROW_NEW_DEBUGGER_EXCEPTION_("get_thread_regs failed for a lwp", 0);
+   }
  
-     public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
-             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512"       }, null),
-+              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha512"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha512"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha512"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
-@@ -92,7 +95,7 @@ public class IntrinsicPredicates {
-               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
-               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
-               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
--                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
-+                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
+ #undef NPRGREG
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
+index 691c3f6684a..de5254d859e 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
+@@ -144,8 +144,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
  
-     public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE
-             = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE,
-diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
-index 57256aa5a..16c199e37 100644
---- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
-+++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
-@@ -1,5 +1,6 @@
- /*
-  * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -112,7 +113,7 @@ public class CheckForProperDetailStackTrace {
-             // It's ok for ARM not to have symbols, because it does not support NMT detail
-             // when targeting thumb2. It's also ok for Windows not to have symbols, because
-             // they are only available if the symbols file is included with the build.
--            if (Platform.isWindows() || Platform.isARM()) {
-+            if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) {
-                 return; // we are done
-             }
-             output.reportDiagnosticSummary();
-diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
-index 127bb6abc..46be4dc98 100644
---- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
-+++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
-@@ -1,5 +1,6 @@
+ #ifdef PTRACE_GETREGS_REQ
+  if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
+-   print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid,
+-               errno, strerror(errno));
++   print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid);
+    return false;
+  }
+  return true;
+diff --git a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
+index e46370a1f18..18b8b4282fe 100644
+--- a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
++++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
+@@ -685,7 +685,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
+   JNIEnv *env, jobject this_obj,
+   jlong thread_id)
+ {
+-  print_debug("getThreadIntegerRegisterSet0 called\n");
++  print_debug("getThreadRegisterSet0 called\n");
+ 
+   struct ps_prochandle* ph = get_proc_handle(env, this_obj);
+   if (ph != NULL && ph->core != NULL) {
+@@ -705,13 +705,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
+   result = thread_get_state(tid, HSDB_THREAD_STATE, (thread_state_t)&state, &count);
+ 
+   if (result != KERN_SUCCESS) {
+-    // This is not considered fatal. Unlike on Linux and Windows, we haven't seen a
+-    // failure to get thread registers, but if it were to fail the response should
+-    // be the same. By ignoring this error and returning NULL, stacking walking code
+-    // will get null registers and fallback to using the "last java frame" if setup.
+-    fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: thread_get_state failed (%d) for thread (%d)\n",
+-            result, tid);
+-    fflush(stdout);
++    print_error("getregs: thread_get_state(%d) failed (%d)\n", tid, result);
+     return NULL;
+   }
+ 
+@@ -814,25 +808,25 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
+  */
+ JNIEXPORT jint JNICALL
+ Java_sun_jvm_hotspot_debugger_macosx_MacOSXDebuggerLocal_translateTID0(
+-  JNIEnv *env, jobject this_obj, jint tid)
++  JNIEnv *env, jobject this_obj, jint tid) 
+ {
+   print_debug("translateTID0 called on tid = 0x%x\n", (int)tid);
+ 
+   kern_return_t result;
+   thread_t foreign_tid, usable_tid;
+   mach_msg_type_name_t type;
+-
++  
+   foreign_tid = tid;
+-
++    
+   task_t gTask = getTask(env, this_obj);
+-  result = mach_port_extract_right(gTask, foreign_tid,
+-				   MACH_MSG_TYPE_COPY_SEND,
++  result = mach_port_extract_right(gTask, foreign_tid, 
++				   MACH_MSG_TYPE_COPY_SEND, 
+ 				   &usable_tid, &type);
+   if (result != KERN_SUCCESS)
+     return -1;
+-
++    
+   print_debug("translateTID0: 0x%x -> 0x%x\n", foreign_tid, usable_tid);
+-
++    
+   return (jint) usable_tid;
+ }
+ 
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
+index d0557a7d254..655b450c3fc 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
+@@ -166,7 +166,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException
+                 } catch (InterruptedException x) {}
+              }
+              if (lastException != null) {
+-                throw new DebuggerException(lastException.getMessage(), lastException);
++                throw new DebuggerException(lastException);
+              } else {
+                 return task;
+              }
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
+index c52d3a51d54..0d637f30f14 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
+@@ -1,5 +1,5 @@
  /*
-  * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -239,7 +240,7 @@ public class ReservedStackTest {
-         return Platform.isAix() ||
-             (Platform.isLinux() &&
-              (Platform.isPPC() || Platform.isS390x() || Platform.isX64() ||
--              Platform.isX86())) ||
-+              Platform.isX86() || Platform.isRISCV64())) ||
-             Platform.isOSX() ||
-             Platform.isSolaris();
+@@ -67,12 +67,8 @@ public String toString() {
+     public ThreadContext getContext() throws IllegalThreadStateException {
+         long[] data = debugger.getThreadIntegerRegisterSet(unique_thread_id);
+         ThreadContext context = BsdThreadContextFactory.createThreadContext(debugger);
+-        // null means we failed to get the register set for some reason. The caller
+-        // is responsible for dealing with the set of null registers in that case.
+-        if (data != null) {
+-            for (int i = 0; i < data.length; i++) {
+-                context.setRegister(i, data[i]);
+-            }
++        for (int i = 0; i < data.length; i++) {
++            context.setRegister(i, data[i]);
+         }
+         return context;
      }
-diff --git a/test/hotspot/jtreg/test_env.sh b/test/hotspot/jtreg/test_env.sh
-index 0c300d4fd..7f3698c47 100644
---- a/test/hotspot/jtreg/test_env.sh
-+++ b/test/hotspot/jtreg/test_env.sh
-@@ -185,6 +185,11 @@ if [ $? = 0 ]
- then
-   VM_CPU="arm"
- fi
-+grep "riscv64" vm_version.out > ${NULL}
-+if [ $? = 0 ]
-+then
-+  VM_CPU="riscv64"
-+fi
- grep "ppc" vm_version.out > ${NULL}
- if [ $? = 0 ]
- then
-diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
-index 77458554b..73e92855d 100644
---- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
-+++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
-@@ -1,5 +1,6 @@
- /*
-  * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -45,7 +46,7 @@ import java.util.Set;
-  */
- public class TestMutuallyExclusivePlatformPredicates {
-     private static enum MethodGroup {
--        ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
-+        ARCH("isRISCV64", "isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
-         BITNESS("is32bit", "is64bit"),
-         OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"),
-         VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"),
-diff --git a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java
-index cb3348a0f..bc0d1a743 100644
---- a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java
-+++ b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java
-@@ -63,13 +63,13 @@ public class thrinfo001 {
-         try {
-             t_a.join();
-         } catch (InterruptedException e) {}
-+        checkInfo(t_a, t_a.getThreadGroup(), 1);
- 
-         thrinfo001b t_b = new thrinfo001b();
-         t_b.setPriority(Thread.MIN_PRIORITY);
-         t_b.setDaemon(true);
-         checkInfo(t_b, t_b.getThreadGroup(), 2);
-         t_b.start();
--        checkInfo(t_b, t_b.getThreadGroup(), 2);
-         try {
-             t_b.join();
-         } catch (InterruptedException e) {}
-diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
-index 7990c49a1..bb8c79cdd 100644
---- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
-+++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
-@@ -1,5 +1,6 @@
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
+index 6a0648f508a..cb6712b58ee 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
+@@ -173,7 +173,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException
+                 } catch (InterruptedException x) {}
+              }
+              if (lastException != null) {
+-                throw new DebuggerException(lastException.getMessage(), lastException);
++                throw new DebuggerException(lastException);
+              } else {
+                 return task;
+              }
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
+index 3fe795d34bc..52307b9cdcf 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
+@@ -1,5 +1,5 @@
  /*
-  * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -54,8 +55,8 @@ public class TestCPUInformation {
-             Events.assertField(event, "hwThreads").atLeast(1);
-             Events.assertField(event, "cores").atLeast(1);
-             Events.assertField(event, "sockets").atLeast(1);
--            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390");
--            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390");
-+            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "RISCV64");
-+            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "RISCV64");
+@@ -73,12 +73,8 @@ public String toString() {
+     public ThreadContext getContext() throws IllegalThreadStateException {
+         long[] data = debugger.getThreadIntegerRegisterSet(lwp_id);
+         ThreadContext context = LinuxThreadContextFactory.createThreadContext(debugger);
+-        // null means we failed to get the register set for some reason. The caller
+-        // is responsible for dealing with the set of null registers in that case.
+-        if (data != null) {
+-            for (int i = 0; i < data.length; i++) {
+-                context.setRegister(i, data[i]);
+-            }
++        for (int i = 0; i < data.length; i++) {
++            context.setRegister(i, data[i]);
          }
+         return context;
      }
- }
-diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
-index f4ee0546c..a9cd63db9 100644
---- a/test/lib/jdk/test/lib/Platform.java
-+++ b/test/lib/jdk/test/lib/Platform.java
-@@ -1,5 +1,6 @@
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
+index 377650a0a1c..ec5aea35e8c 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
+@@ -1,5 +1,5 @@
  /*
-  * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -202,6 +203,10 @@ public class Platform {
-         return isArch("arm.*");
-     }
+@@ -30,9 +30,9 @@
  
-+    public static boolean isRISCV64() {
-+        return isArch("riscv64");
-+    }
-+
-     public static boolean isPPC() {
-         return isArch("ppc.*");
+ class WindbgAMD64Thread implements ThreadProxy {
+   private WindbgDebugger debugger;
+-  private long           sysId; // SystemID for Windows thread, stored in OSThread::_thread_id
++  private long           sysId;
+   private boolean        gotID;
+-  private long           id;    // ThreadID for Windows thread,  returned by GetThreadIdBySystemId
++  private long           id;
+ 
+   // The address argument must be the address of the OSThread::_thread_id
+   WindbgAMD64Thread(WindbgDebugger debugger, Address addr) {
+@@ -50,12 +50,8 @@ class WindbgAMD64Thread implements ThreadProxy {
+   public ThreadContext getContext() throws IllegalThreadStateException {
+     long[] data = debugger.getThreadIntegerRegisterSet(getThreadID());
+     WindbgAMD64ThreadContext context = new WindbgAMD64ThreadContext(debugger);
+-    // null means we failed to get the register set for some reason. The caller
+-    // is responsible for dealing with the set of null registers in that case.
+-    if (data != null) {
+-        for (int i = 0; i < data.length; i++) {
+-            context.setRegister(i, data[i]);
+-        }
++    for (int i = 0; i < data.length; i++) {
++      context.setRegister(i, data[i]);
      }
-diff --git a/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java b/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java
-new file mode 100644
-index 000000000..6852c0540
---- /dev/null
-+++ b/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java
-@@ -0,0 +1,221 @@
-+/*
-+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ */
-+package org.openjdk.bench.java.lang;
-+
-+import java.util.Random;
-+import org.openjdk.jmh.annotations.Benchmark;
-+import org.openjdk.jmh.annotations.BenchmarkMode;
-+import org.openjdk.jmh.annotations.OutputTimeUnit;
-+import org.openjdk.jmh.annotations.Mode;
-+import org.openjdk.jmh.annotations.Scope;
-+import org.openjdk.jmh.annotations.State;
-+
-+import java.util.concurrent.TimeUnit;
-+
-+/**
-+ * This benchmark can be used to measure performance between StringLatin1 and StringUTF16 in terms of
-+ * performance of the indexOf(char) and indexOf(String) methods which are intrinsified.
-+ * On x86 the behaviour of the indexOf method is contingent upon the length of the string
-+ */
-+@BenchmarkMode(Mode.AverageTime)
-+@OutputTimeUnit(TimeUnit.NANOSECONDS)
-+@State(Scope.Thread)
-+public class IndexOfBenchmark {
-+    private static final int loops = 100000;
-+    private static final Random rng = new Random(1999);
-+    private static final int pathCnt = 1000;
-+    private static final String [] latn1_short        = new String[pathCnt];
-+    private static final String [] latn1_sse4         = new String[pathCnt];
-+    private static final String [] latn1_avx2         = new String[pathCnt];
-+    private static final String [] latn1_mixedLength  = new String[pathCnt];
-+    private static final String [] utf16_short        = new String[pathCnt];
-+    private static final String [] utf16_sse4         = new String[pathCnt];
-+    private static final String [] utf16_avx2         = new String[pathCnt];
-+    private static final String [] utf16_mixedLength  = new String[pathCnt];
-+    static {
-+        for (int i = 0; i < pathCnt; i++) {
-+            latn1_short[i] = makeRndString(false, 15);
-+            latn1_sse4[i]  = makeRndString(false, 16);
-+            latn1_avx2[i]  = makeRndString(false, 32);
-+            utf16_short[i] = makeRndString(true, 7);
-+            utf16_sse4[i]  = makeRndString(true, 8);
-+            utf16_avx2[i]  = makeRndString(true, 16);
-+            latn1_mixedLength[i] = makeRndString(false, rng.nextInt(65));
-+            utf16_mixedLength[i] = makeRndString(true, rng.nextInt(65));
-+        }
-+    }
-+
-+    private static String makeRndString(boolean isUtf16, int length) {
-+        StringBuilder sb = new StringBuilder(length);
-+        if(length > 0){
-+            sb.append(isUtf16?'☺':'b');
-+
-+            for (int i = 1; i < length-1; i++) {
-+                sb.append((char)('b' + rng.nextInt(26)));
-+            }
-+
-+            sb.append(rng.nextInt(3) >= 1?'a':'b');//66.6% of time 'a' is in string
-+        }
-+        return sb.toString();
-+    }
-+
-+
-+    @Benchmark
-+    public static void latin1_mixed_char() {
-+        int ret = 0;
-+        for (String what : latn1_mixedLength) {
-+            ret += what.indexOf('a');
-+        }
-+    }
-+
-+    @Benchmark
-+    public static void utf16_mixed_char() {
-+        int ret = 0;
-+        for (String what : utf16_mixedLength) {
-+            ret += what.indexOf('a');
-+        }
-+    }
-+
-+    @Benchmark
-+    public static void latin1_mixed_String() {
-+        int ret = 0;
-+        for (String what : latn1_mixedLength) {
-+            ret += what.indexOf("a");
-+        }
-+    }
-+
-+    @Benchmark
-+    public static void utf16_mixed_String() {
-+        int ret = 0;
-+        for (String what : utf16_mixedLength) {
-+            ret += what.indexOf("a");
-+        }
-+    }
-+
-+    ////////// more detailed code path dependent tests //////////
-+
-+    @Benchmark
-+    public static void latin1_Short_char() {
-+        int ret = 0;
-+        for (String what : latn1_short) {
-+            ret += what.indexOf('a');
-+        }
-+    }
-+
-+    @Benchmark
-+    public static void latin1_SSE4_char() {
-+        int ret = 0;
-+        for (String what : latn1_sse4) {
-+            ret += what.indexOf('a');
-+        }
-+    }
-+
-+    @Benchmark
-+    public static void latin1_AVX2_char() {
-+        int ret = 0;
-+        for (String what : latn1_avx2) {
-+            ret += what.indexOf('a');
-+        }
-+    }
-+
-+    @Benchmark
-+    public static int utf16_Short_char() {
-+        int ret = 0;
-+        for (String what : utf16_short) {
-+            ret += what.indexOf('a');
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int utf16_SSE4_char() {
-+        int ret = 0;
-+        for (String what : utf16_sse4) {
-+            ret += what.indexOf('a');
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int utf16_AVX2_char() {
-+        int ret = 0;
-+        for (String what : utf16_avx2) {
-+            ret += what.indexOf('a');
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int latin1_Short_String() {
-+        int ret = 0;
-+        for (String what : latn1_short) {
-+            ret += what.indexOf("a");
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int latin1_SSE4_String() {
-+        int ret = 0;
-+        for (String what : latn1_sse4) {
-+            ret += what.indexOf("a");
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int latin1_AVX2_String() {
-+        int ret = 0;
-+        for (String what : latn1_avx2) {
-+            ret += what.indexOf("a");
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int utf16_Short_String() {
-+        int ret = 0;
-+        for (String what : utf16_short) {
-+            ret += what.indexOf("a");
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int utf16_SSE4_String() {
-+        int ret = 0;
-+        for (String what : utf16_sse4) {
-+            ret += what.indexOf("a");
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int utf16_AVX2_String() {
-+        int ret = 0;
-+        for (String what : utf16_avx2) {
-+            ret += what.indexOf("a");
-+        }
-+        return ret;
-+    }
-+}
--- 
-2.40.0.windows.1
+     return context;
+   }
+@@ -90,7 +86,6 @@ public String toString() {
+   private long getThreadID() {
+     if (!gotID) {
+        id = debugger.getThreadIdFromSysId(sysId);
+-       gotID = true;
+     }
+ 
+     return id;
+diff --git a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
+index e3b218b4dae..314cf69c957 100644
+--- a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
++++ b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
+@@ -45,7 +45,6 @@
+ 
+ #include <limits.h>
+ #include <windows.h>
+-#include <inttypes.h>
+ 
+ #define DEBUG_NO_IMPLEMENTATION
+ #include <dbgeng.h>
+@@ -766,16 +765,9 @@ JNIEXPORT jlong JNICALL Java_sun_jvm_hotspot_debugger_windbg_WindbgDebuggerLocal
+   CHECK_EXCEPTION_(0);
+ 
+   ULONG id = 0;
+-  HRESULT hr = ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id);
+-  if (hr != S_OK) {
+-    // This is not considered fatal and does happen on occassion, usually with an
+-    // 0x80004002 "No such interface supported". The root cause is not fully understood,
+-    // but by ignoring this error and returning NULL, stacking walking code will get
+-    // null registers and fallback to using the "last java frame" if setup.
+-   printf("WARNING: GetThreadIdBySystemId failed with 0x%x for sysId (%" PRIu64 ")\n",
+-           hr, sysId);
+-    return -1;
+-  }
++  COM_VERIFY_OK_(ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id),
++                 "Windbg Error: GetThreadIdBySystemId failed!", 0);
++
+   return (jlong) id;
+ }
+ 
 
+From 4b01e13731fc330ca3d57a5cd532c91bc66579c8 Mon Sep 17 00:00:00 2001
+From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
+Date: Wed, 31 Jan 2024 17:26:31 +0800
+Subject: [PATCH 140/140] Remove unused zSyscall_linux_riscv.hpp
+
+---
+ .../linux_riscv/gc/z/zSyscall_linux_riscv.hpp | 42 -------------------
+ 1 file changed, 42 deletions(-)
+ delete mode 100644 src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
+
+diff --git a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
+deleted file mode 100644
+index 1aa58f27871..00000000000
+--- a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
++++ /dev/null
+@@ -1,42 +0,0 @@
+-/*
+- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+- *
+- * This code is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License version 2 only, as
+- * published by the Free Software Foundation.
+- *
+- * This code is distributed in the hope that it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- * version 2 for more details (a copy is included in the LICENSE file that
+- * accompanied this code).
+- *
+- * You should have received a copy of the GNU General Public License version
+- * 2 along with this work; if not, write to the Free Software Foundation,
+- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+- *
+- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+- * or visit www.oracle.com if you need additional information or have any
+- * questions.
+- *
+- */
+-
+-#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
+-#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
+-
+-#include <sys/syscall.h>
+-
+-//
+-// Support for building on older Linux systems
+-//
+-
+-#ifndef SYS_memfd_create
+-#define SYS_memfd_create     279
+-#endif
+-#ifndef SYS_fallocate
+-#define SYS_fallocate        47
+-#endif
+-
+-#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
diff --git a/LoongArch64-support.patch b/LoongArch64-support.patch
new file mode 100644
index 0000000000000000000000000000000000000000..029b5085ced97aa3f49b7f4c9f71956193358025
--- /dev/null
+++ b/LoongArch64-support.patch
@@ -0,0 +1,116372 @@
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4
+--- a/make/autoconf/hotspot.m4	2024-01-10 05:19:49.000000000 +0800
++++ b/make/autoconf/hotspot.m4	2024-01-30 10:00:11.621434355 +0800
+@@ -34,6 +34,12 @@
+ # All valid JVM variants
+ VALID_JVM_VARIANTS="server client minimal core zero custom"
+ 
++#
++# This file has been modified by Loongson Technology in 2021. These
++# modifications are Copyright (c) 2020, 2021, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ ###############################################################################
+ # Check if the specified JVM variant should be built. To be used in shell if
+ # constructs, like this:
+@@ -340,6 +346,26 @@
+     HOTSPOT_TARGET_CPU_ARCH=arm
+   fi
+ 
++  # Override hotspot cpu definitions for MIPS and LOONGARCH platforms
++  if test "x$OPENJDK_TARGET_CPU" = xmips64el && test "x$HOTSPOT_TARGET_CPU" != xzero; then
++    HOTSPOT_TARGET_CPU=mips_64
++    HOTSPOT_TARGET_CPU_ARCH=mips
++  elif test "x$OPENJDK_TARGET_CPU" = xloongarch64 && test "x$HOTSPOT_TARGET_CPU" != xzero; then
++    HOTSPOT_TARGET_CPU=loongarch_64
++    HOTSPOT_TARGET_CPU_ARCH=loongarch
++  fi
++
++  # Disable compiler1 on linux-mips and linux-loongarch
++  if ! (HOTSPOT_CHECK_JVM_FEATURE(compiler1)); then
++    AC_MSG_CHECKING([if compiler1 should be built, $JVM_FEATURES])
++    if test "x$OPENJDK_TARGET_OS" = "xlinux" && test "x$HOTSPOT_TARGET_CPU_ARCH" = "xmips"; then
++      DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES compiler1"
++      AC_MSG_RESULT([no, platform not supported])
++    else
++      AC_MSG_RESULT([yes])
++    fi
++  fi
++
+   # Verify that dependencies are met for explicitly set features.
+   if HOTSPOT_CHECK_JVM_FEATURE(jvmti) && ! HOTSPOT_CHECK_JVM_FEATURE(services); then
+     AC_MSG_ERROR([Specified JVM feature 'jvmti' requires feature 'services'])
+@@ -424,10 +450,11 @@
+     JVM_FEATURES_jvmci=""
+     INCLUDE_JVMCI="false"
+   else
+-    # Only enable jvmci on x86_64, sparcv9 and aarch64
++    # Only enable jvmci on x86_64, sparcv9, aarch64 and loongarch64
+     if test "x$OPENJDK_TARGET_CPU" = "xx86_64" || \
+        test "x$OPENJDK_TARGET_CPU" = "xsparcv9" || \
+-       test "x$OPENJDK_TARGET_CPU" = "xaarch64" ; then
++       test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \
++       test "x$OPENJDK_TARGET_CPU" = "xloongarch64" ; then
+       AC_MSG_RESULT([yes])
+       JVM_FEATURES_jvmci="jvmci"
+       INCLUDE_JVMCI="true"
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
+--- a/make/autoconf/platform.m4	2024-01-10 05:19:49.000000000 +0800
++++ b/make/autoconf/platform.m4	2024-01-30 10:00:11.621434355 +0800
+@@ -23,6 +23,12 @@
+ # questions.
+ #
+ 
++#
++# This file has been modified by Loongson Technology in 2021. These
++# modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++# available on the same license terms set forth above.
++#
++
+ # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD.
+ # Converts autoconf style CPU name to OpenJDK style, into
+ # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN.
+@@ -554,6 +560,12 @@
+     HOTSPOT_$1_CPU_DEFINE=PPC64
+   elif test "x$OPENJDK_$1_CPU" = xppc64le; then
+     HOTSPOT_$1_CPU_DEFINE=PPC64
++  elif test "x$OPENJDK_$1_CPU" = xmips64; then
++    HOTSPOT_$1_CPU_DEFINE=MIPS64
++  elif test "x$OPENJDK_$1_CPU" = xmips64el; then
++    HOTSPOT_$1_CPU_DEFINE=MIPS64
++  elif test "x$OPENJDK_$1_CPU" = xloongarch64; then
++    HOTSPOT_$1_CPU_DEFINE=LOONGARCH64
+ 
+   # The cpu defines below are for zero, we don't support them directly.
+   elif test "x$OPENJDK_$1_CPU" = xsparc; then
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/make/CompileJavaModules.gmk b/make/CompileJavaModules.gmk
+--- a/make/CompileJavaModules.gmk	2024-01-10 05:19:49.000000000 +0800
++++ b/make/CompileJavaModules.gmk	2024-01-30 10:00:11.614767768 +0800
+@@ -430,6 +430,7 @@
+ 
+ jdk.internal.vm.compiler_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.aarch64=jdk.internal.vm.compiler \
++    --add-exports jdk.internal.vm.ci/jdk.vm.ci.loongarch64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.amd64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.code=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.code.site=jdk.internal.vm.compiler \
+@@ -437,6 +438,7 @@
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.common=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.aarch64=jdk.internal.vm.compiler \
++    --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.loongarch64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.amd64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.sparc=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.meta=jdk.internal.vm.compiler \
+@@ -456,6 +458,7 @@
+     org.graalvm.compiler.api.directives.test \
+     org.graalvm.compiler.api.test \
+     org.graalvm.compiler.asm.aarch64.test \
++    org.graalvm.compiler.asm.loongarch64.test \
+     org.graalvm.compiler.asm.amd64.test \
+     org.graalvm.compiler.asm.sparc.test \
+     org.graalvm.compiler.asm.test \
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp
+--- a/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp	2024-01-30 10:00:11.801432207 +0800
+@@ -52,3 +52,24 @@
+          "wrong type for addresses");
+ }
+ #endif // PRODUCT
++
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	2024-01-30 10:00:11.801432207 +0800
+@@ -1123,7 +1123,9 @@
+   }
+ }
+ 
+-
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   LIR_Opr src  = op->in_opr();
+@@ -1663,6 +1665,10 @@
+     __ csel(result->as_register(), opr1->as_register(), opr2->as_register(), acond);
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
++
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
+   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+ 
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
+--- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp	2024-01-30 10:00:11.801432207 +0800
+@@ -260,18 +260,29 @@
+   __ store(reg, addr);
+ }
+ 
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   LIR_Opr reg = new_register(T_INT);
+   __ load(generate_address(base, disp, T_INT), reg, info);
+-  __ cmp(condition, reg, LIR_OprFact::intConst(c));
++  __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt);
+ }
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
++
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   LIR_Opr reg1 = new_register(T_INT);
+   __ load(generate_address(base, disp, type), reg1, info);
+-  __ cmp(condition, reg, reg1);
++  __ cmp_branch(condition, reg, reg1, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+ 
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+ 
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/arm/c1_LIR_arm.cpp b/src/hotspot/cpu/arm/c1_LIR_arm.cpp
+--- a/src/hotspot/cpu/arm/c1_LIR_arm.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/arm/c1_LIR_arm.cpp	2024-01-30 10:00:11.821431969 +0800
+@@ -84,3 +84,24 @@
+ #endif // AARCH64
+ }
+ #endif // PRODUCT
++
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
+--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp	2024-01-30 10:00:11.818098676 +0800
+@@ -1150,6 +1150,9 @@
+   __ b(*(op->label()), acond);
+ }
+ 
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   LIR_Opr src  = op->in_opr();
+@@ -3082,6 +3085,10 @@
+   __ bind(*stub->continuation());
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
++
+ #ifdef ASSERT
+  // emit run-time assertion
+ void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp
+--- a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp	2024-01-30 10:00:11.818098676 +0800
+@@ -423,18 +423,27 @@
+   __ move(temp, addr);
+ }
+ 
+-
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   __ load(new LIR_Address(base, disp, T_INT), FrameMap::LR_opr, info);
+-  __ cmp(condition, FrameMap::LR_opr, c);
++  __ cmp_branch(condition, FrameMap::LR_opr, c, T_INT, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   __ load(new LIR_Address(base, disp, type), FrameMap::LR_opr, info);
+-  __ cmp(condition, reg, FrameMap::LR_opr);
++  __ cmp_branch(condition, reg, FrameMap::LR_opr, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+ 
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+   assert(left != result, "should be different registers");
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp b/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp	2024-01-30 10:00:11.834765144 +0800
+@@ -0,0 +1,132 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "ci/ciMethod.hpp"
++#include "interpreter/interpreter.hpp"
++#include "runtime/frame.inline.hpp"
++
++// asm based interpreter deoptimization helpers
++int AbstractInterpreter::size_activation(int max_stack,
++                                         int temps,
++                                         int extra_args,
++                                         int monitors,
++                                         int callee_params,
++                                         int callee_locals,
++                                         bool is_top_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++
++  // fixed size of an interpreter frame:
++  int overhead = frame::java_frame_sender_sp_offset -
++                 frame::interpreter_frame_initial_sp_offset;
++  // Our locals were accounted for by the caller (or last_frame_adjust
++  // on the transistion) Since the callee parameters already account
++  // for the callee's params we only need to account for the extra
++  // locals.
++  int size = overhead +
++         (callee_locals - callee_params)*Interpreter::stackElementWords +
++         monitors * frame::interpreter_frame_monitor_size() +
++         temps* Interpreter::stackElementWords + extra_args;
++
++  return size;
++}
++
++// How much stack a method activation needs in words.
++int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
++
++  const int entry_size    = frame::interpreter_frame_monitor_size();
++
++  // total overhead size: entry_size + (saved ebp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size;
++
++  const int stub_code = 6;  // see generate_call_stub
++  // return overhead_size + method->max_locals() + method->max_stack() + stub_code;
++  const int method_stack = (method->max_locals() + method->max_stack()) *
++          Interpreter::stackElementWords;
++  return overhead_size + method_stack + stub_code;
++}
++
++void AbstractInterpreter::layout_activation(Method* method,
++                                           int tempcount,
++                                           int popframe_extra_args,
++                                           int moncount,
++                                           int caller_actual_parameters,
++                                           int callee_param_count,
++                                           int callee_locals,
++                                           frame* caller,
++                                           frame* interpreter_frame,
++                                           bool is_top_frame,
++                                           bool is_bottom_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++  // If interpreter_frame!=NULL, set up the method, locals, and monitors.
++  // The frame interpreter_frame, if not NULL, is guaranteed to be the
++  // right size, as determined by a previous call to this method.
++  // It is also guaranteed to be walkable even though it is in a skeletal state
++
++  // fixed size of an interpreter frame:
++
++  int max_locals = method->max_locals() * Interpreter::stackElementWords;
++  int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords;
++
++#ifdef ASSERT
++  assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
++#endif
++
++  interpreter_frame->interpreter_frame_set_method(method);
++  // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
++  // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
++  // and sender_sp is fp+8
++  intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
++
++#ifdef ASSERT
++  if (caller->is_interpreted_frame()) {
++    assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
++  }
++#endif
++
++  interpreter_frame->interpreter_frame_set_locals(locals);
++  BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
++  BasicObjectLock* monbot = montop - moncount;
++  interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount);
++
++  //set last sp;
++  intptr_t*  esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords -
++                      popframe_extra_args;
++  interpreter_frame->interpreter_frame_set_last_sp(esp);
++  // All frames but the initial interpreter frame we fill in have a
++  // value for sender_sp that allows walking the stack but isn't
++  // truly correct. Correct the value here.
++  //
++  if (extra_locals != 0 &&
++      interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) {
++    interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals);
++  }
++  *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache();
++  *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror();
++}
++
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/assembler_loongarch.cpp b/src/hotspot/cpu/loongarch/assembler_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/assembler_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.cpp	2024-01-30 10:00:11.834765144 +0800
+@@ -0,0 +1,849 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#define STOP(error) block_comment(error); stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++// Implementation of AddressLiteral
++
++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
++  _is_lval = false;
++  _target = target;
++  _rspec = rspec_from_rtype(rtype, target);
++}
++
++// Implementation of Address
++
++Address Address::make_array(ArrayAddress adr) {
++  AddressLiteral base = adr.base();
++  Address index = adr.index();
++  assert(index._disp == 0, "must not have disp"); // maybe it can?
++  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
++  array._rspec = base._rspec;
++  return array;
++}
++
++// exceedingly dangerous constructor
++Address::Address(address loc, RelocationHolder spec) {
++  _base  = noreg;
++  _index = noreg;
++  _scale = no_scale;
++  _disp  = (intptr_t) loc;
++  _rspec = spec;
++}
++
++
++int Assembler::is_int_mask(int x) {
++  int xx = x;
++  int count = 0;
++
++  while (x != 0) {
++     x &= (x - 1);
++     count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++     return count;
++  } else {
++     return -1;
++  }
++}
++
++int Assembler::is_jlong_mask(jlong x) {
++  jlong  xx = x;
++  int count = 0;
++
++  while (x != 0) {
++     x &= (x - 1);
++     count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++     return count;
++  } else {
++     return -1;
++  }
++}
++
++int AbstractAssembler::code_fill_byte() {
++  return 0x00;                  // illegal instruction 0x00000000
++}
++
++// Now the Assembler instruction (identical for 32/64 bits)
++void Assembler::ld_b(Register rd, Address src) {
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_b(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_b(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_b(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_b(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_b(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_b(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_bu(Register rd, Address src) {
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_bu(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_bu(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_bu(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_bu(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_bu(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_bu(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_d(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_d(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_d(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_d(dst, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      ldptr_d(dst, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_d(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_d(dst, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      ldptr_d(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_d(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_h(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_h(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_h(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_h(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_h(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_h(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_h(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_hu(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_hu(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_hu(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_hu(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_hu(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_hu(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_hu(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ll_w(Register rd, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ll_w(rd, src.base(), src.disp());
++}
++
++void Assembler::ll_d(Register rd, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ll_d(rd, src.base(), src.disp());
++}
++
++void Assembler::ld_w(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_w(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_w(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_w(dst, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      ldptr_w(dst, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_w(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_w(dst, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      ldptr_w(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_w(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::ld_wu(Register rd, Address src){
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          ldx_wu(dst, base, index);
++        } else {
++          add_d(AT, base, index);
++          ld_wu(dst, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        ld_wu(dst, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      ldx_wu(dst, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      ld_wu(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      ldx_wu(dst, base, AT);
++    }
++  }
++}
++
++void Assembler::st_b(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_b(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_b(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_b(src, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_b(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_b(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_b(src, base, AT);
++    }
++  }
++}
++
++void Assembler::sc_w(Register rd, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sc_w(rd, dst.base(), dst.disp());
++}
++
++void Assembler::sc_d(Register rd, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sc_d(rd, dst.base(), dst.disp());
++}
++
++void Assembler::st_d(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_d(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_d(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_d(src, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      stptr_d(src, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_d(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_d(src, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      stptr_d(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_d(src, base, AT);
++    }
++  }
++}
++
++void Assembler::st_h(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_h(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_h(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_h(src, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_h(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_h(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_h(src, base, AT);
++    }
++  }
++}
++
++void Assembler::st_w(Register rd, Address dst) {
++  Register src   = rd;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    assert_different_registers(src, AT);
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          stx_w(src, base, index);
++        } else {
++          add_d(AT, base, index);
++          st_w(src, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        st_w(src, AT, disp);
++      }
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      if (scale == 0) {
++        add_d(AT, base, index);
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++      }
++      stptr_w(src, AT, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      stx_w(src, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      st_w(src, base, disp);
++    } else if (is_simm(disp, 16) && !(disp & 3)) {
++      stptr_w(src, base, disp);
++    } else {
++      assert_different_registers(src, AT);
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      stx_w(src, base, AT);
++    }
++  }
++}
++
++void Assembler::fld_s(FloatRegister fd, Address src) {
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fldx_s(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fld_s(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fld_s(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fldx_s(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fld_s(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fldx_s(fd, base, AT);
++    }
++  }
++}
++
++void Assembler::fld_d(FloatRegister fd, Address src) {
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fldx_d(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fld_d(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fld_d(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fldx_d(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fld_d(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fldx_d(fd, base, AT);
++    }
++  }
++}
++
++void Assembler::fst_s(FloatRegister fd, Address dst) {
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fstx_s(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fst_s(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fst_s(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fstx_s(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fst_s(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fstx_s(fd, base, AT);
++    }
++  }
++}
++
++void Assembler::fst_d(FloatRegister fd, Address dst) {
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (is_simm(disp, 12)) {
++      if (scale == 0) {
++        if (disp == 0) {
++          fstx_d(fd, base, index);
++        } else {
++          add_d(AT, base, index);
++          fst_d(fd, AT, disp);
++        }
++      } else {
++        alsl_d(AT, index, base, scale - 1);
++        fst_d(fd, AT, disp);
++      }
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++
++      if (scale == 0) {
++        add_d(AT, AT, index);
++      } else {
++        alsl_d(AT, index, AT, scale - 1);
++      }
++      fstx_d(fd, base, AT);
++    }
++  } else {
++    if (is_simm(disp, 12)) {
++      fst_d(fd, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      fstx_d(fd, base, AT);
++    }
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/assembler_loongarch.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/assembler_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp	2024-01-30 10:00:11.834765144 +0800
+@@ -0,0 +1,2827 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP
++
++#include "asm/register.hpp"
++#include "runtime/vm_version.hpp"
++
++class BiasedLockingCounters;
++
++
++// Note: A register location is represented via a Register, not
++//       via an address for efficiency & simplicity reasons.
++
++class ArrayAddress;
++
++class Address {
++ public:
++  enum ScaleFactor {
++    no_scale = -1,
++    times_1  =  0,
++    times_2  =  1,
++    times_4  =  2,
++    times_8  =  3,
++    times_ptr = times_8
++  };
++  static ScaleFactor times(int size) {
++    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
++    if (size == 8)  return times_8;
++    if (size == 4)  return times_4;
++    if (size == 2)  return times_2;
++    return times_1;
++  }
++
++ private:
++  Register         _base;
++  Register         _index;
++  ScaleFactor      _scale;
++  int              _disp;
++  RelocationHolder _rspec;
++
++  // Easily misused constructors make them private
++  Address(address loc, RelocationHolder spec);
++  Address(int disp, address loc, relocInfo::relocType rtype);
++  Address(int disp, address loc, RelocationHolder spec);
++
++ public:
++
++  // creation
++  Address()
++    : _base(noreg),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(0) {
++  }
++
++  // No default displacement otherwise Register can be implicitly
++  // converted to 0(Register) which is quite a different animal.
++
++  Address(Register base, int disp = 0)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(disp) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
++    : _base (base),
++      _index(index),
++      _scale(scale),
++      _disp (disp) {
++    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
++    assert_different_registers(_base, _index, AT);
++  }
++
++  // The following two overloads are used in connection with the
++  // ByteSize type (see sizes.hpp).  They simplify the use of
++  // ByteSize'd arguments in assembly code. Note that their equivalent
++  // for the optimized build are the member functions with int disp
++  // argument since ByteSize is mapped to an int type in that case.
++  //
++  // Note: DO NOT introduce similar overloaded functions for WordSize
++  // arguments as in the optimized mode, both ByteSize and WordSize
++  // are mapped to the same type and thus the compiler cannot make a
++  // distinction anymore (=> compiler errors).
++
++#ifdef ASSERT
++  Address(Register base, ByteSize disp)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(in_bytes(disp)) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
++    : _base(base),
++      _index(index),
++      _scale(scale),
++      _disp(in_bytes(disp)) {
++    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
++    assert_different_registers(_base, _index, AT);
++  }
++#endif // ASSERT
++
++  // accessors
++  bool        uses(Register reg) const { return _base == reg || _index == reg; }
++  Register    base()             const { return _base;  }
++  Register    index()            const { return _index; }
++  ScaleFactor scale()            const { return _scale; }
++  int         disp()             const { return _disp;  }
++
++  static Address make_array(ArrayAddress);
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class LIR_Assembler; // base/index/scale/disp
++};
++
++// Calling convention
++class Argument {
++ public:
++  enum {
++    n_register_parameters = 8,   // 8 integer registers used to pass parameters
++    n_float_register_parameters = 8   // 8 float registers used to pass parameters
++  };
++};
++
++//
++// AddressLiteral has been split out from Address because operands of this type
++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
++// the few instructions that need to deal with address literals are unique and the
++// MacroAssembler does not have to implement every instruction in the Assembler
++// in order to search for address literals that may need special handling depending
++// on the instruction and the platform. As small step on the way to merging i486/amd64
++// directories.
++//
++class AddressLiteral {
++  friend class ArrayAddress;
++  RelocationHolder _rspec;
++  // Typically we use AddressLiterals we want to use their rval
++  // However in some situations we want the lval (effect address) of the item.
++  // We provide a special factory for making those lvals.
++  bool _is_lval;
++
++  // If the target is far we'll need to load the ea of this to
++  // a register to reach it. Otherwise if near we can do rip
++  // relative addressing.
++
++  address          _target;
++
++ protected:
++  // creation
++  AddressLiteral()
++    : _is_lval(false),
++      _target(NULL)
++  {}
++
++  public:
++
++
++  AddressLiteral(address target, relocInfo::relocType rtype);
++
++  AddressLiteral(address target, RelocationHolder const& rspec)
++    : _rspec(rspec),
++      _is_lval(false),
++      _target(target)
++  {}
++
++  AddressLiteral addr() {
++    AddressLiteral ret = *this;
++    ret._is_lval = true;
++    return ret;
++  }
++
++
++ private:
++
++  address target() { return _target; }
++  bool is_lval() { return _is_lval; }
++
++  relocInfo::relocType reloc() const { return _rspec.type(); }
++  const RelocationHolder& rspec() const { return _rspec; }
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class Address;
++  friend class LIR_Assembler;
++  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
++    switch (rtype) {
++      case relocInfo::external_word_type:
++        return external_word_Relocation::spec(addr);
++      case relocInfo::internal_word_type:
++        return internal_word_Relocation::spec(addr);
++      case relocInfo::opt_virtual_call_type:
++        return opt_virtual_call_Relocation::spec();
++      case relocInfo::static_call_type:
++        return static_call_Relocation::spec();
++      case relocInfo::runtime_call_type:
++        return runtime_call_Relocation::spec();
++      case relocInfo::poll_type:
++      case relocInfo::poll_return_type:
++        return Relocation::spec_simple(rtype);
++      case relocInfo::none:
++      case relocInfo::oop_type:
++        // Oops are a special case. Normally they would be their own section
++        // but in cases like icBuffer they are literals in the code stream that
++        // we don't have a section for. We use none so that we get a literal address
++        // which is always patchable.
++        return RelocationHolder();
++      default:
++        ShouldNotReachHere();
++        return RelocationHolder();
++    }
++  }
++
++};
++
++// Convience classes
++class RuntimeAddress: public AddressLiteral {
++
++  public:
++
++  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
++
++};
++
++class OopAddress: public AddressLiteral {
++
++ public:
++
++  OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){}
++
++};
++
++class ExternalAddress: public AddressLiteral {
++
++ public:
++
++  ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){}
++
++};
++
++class InternalAddress: public AddressLiteral {
++
++  public:
++
++  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
++
++};
++
++// x86 can do array addressing as a single operation since disp can be an absolute
++// address amd64 can't. We create a class that expresses the concept but does extra
++// magic on amd64 to get the final result
++
++class ArrayAddress {
++  private:
++
++  AddressLiteral _base;
++  Address        _index;
++
++  public:
++
++  ArrayAddress() {};
++  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
++  AddressLiteral base() { return _base; }
++  Address index() { return _index; }
++
++};
++
++// The LoongArch Assembler: Pure assembler doing NO optimizations on the instruction
++// level ; i.e., what you write is what you get. The Assembler is generating code into
++// a CodeBuffer.
++
++class Assembler : public AbstractAssembler  {
++  friend class AbstractAssembler; // for the non-virtual hack
++  friend class LIR_Assembler; // as_Address()
++  friend class StubGenerator;
++
++ public:
++  // 22-bit opcode, highest 22 bits: bits[31...10]
++  enum ops22 {
++    clo_w_op           = 0b0000000000000000000100,
++    clz_w_op           = 0b0000000000000000000101,
++    cto_w_op           = 0b0000000000000000000110,
++    ctz_w_op           = 0b0000000000000000000111,
++    clo_d_op           = 0b0000000000000000001000,
++    clz_d_op           = 0b0000000000000000001001,
++    cto_d_op           = 0b0000000000000000001010,
++    ctz_d_op           = 0b0000000000000000001011,
++    revb_2h_op         = 0b0000000000000000001100,
++    revb_4h_op         = 0b0000000000000000001101,
++    revb_2w_op         = 0b0000000000000000001110,
++    revb_d_op          = 0b0000000000000000001111,
++    revh_2w_op         = 0b0000000000000000010000,
++    revh_d_op          = 0b0000000000000000010001,
++    bitrev_4b_op       = 0b0000000000000000010010,
++    bitrev_8b_op       = 0b0000000000000000010011,
++    bitrev_w_op        = 0b0000000000000000010100,
++    bitrev_d_op        = 0b0000000000000000010101,
++    ext_w_h_op         = 0b0000000000000000010110,
++    ext_w_b_op         = 0b0000000000000000010111,
++    rdtimel_w_op       = 0b0000000000000000011000,
++    rdtimeh_w_op       = 0b0000000000000000011001,
++    rdtime_d_op        = 0b0000000000000000011010,
++    cpucfg_op          = 0b0000000000000000011011,
++    fabs_s_op          = 0b0000000100010100000001,
++    fabs_d_op          = 0b0000000100010100000010,
++    fneg_s_op          = 0b0000000100010100000101,
++    fneg_d_op          = 0b0000000100010100000110,
++    flogb_s_op         = 0b0000000100010100001001,
++    flogb_d_op         = 0b0000000100010100001010,
++    fclass_s_op        = 0b0000000100010100001101,
++    fclass_d_op        = 0b0000000100010100001110,
++    fsqrt_s_op         = 0b0000000100010100010001,
++    fsqrt_d_op         = 0b0000000100010100010010,
++    frecip_s_op        = 0b0000000100010100010101,
++    frecip_d_op        = 0b0000000100010100010110,
++    frsqrt_s_op        = 0b0000000100010100011001,
++    frsqrt_d_op        = 0b0000000100010100011010,
++    fmov_s_op          = 0b0000000100010100100101,
++    fmov_d_op          = 0b0000000100010100100110,
++    movgr2fr_w_op      = 0b0000000100010100101001,
++    movgr2fr_d_op      = 0b0000000100010100101010,
++    movgr2frh_w_op     = 0b0000000100010100101011,
++    movfr2gr_s_op      = 0b0000000100010100101101,
++    movfr2gr_d_op      = 0b0000000100010100101110,
++    movfrh2gr_s_op     = 0b0000000100010100101111,
++    movgr2fcsr_op      = 0b0000000100010100110000,
++    movfcsr2gr_op      = 0b0000000100010100110010,
++    movfr2cf_op        = 0b0000000100010100110100,
++    movcf2fr_op        = 0b0000000100010100110101,
++    movgr2cf_op        = 0b0000000100010100110110,
++    movcf2gr_op        = 0b0000000100010100110111,
++    fcvt_s_d_op        = 0b0000000100011001000110,
++    fcvt_d_s_op        = 0b0000000100011001001001,
++    ftintrm_w_s_op     = 0b0000000100011010000001,
++    ftintrm_w_d_op     = 0b0000000100011010000010,
++    ftintrm_l_s_op     = 0b0000000100011010001001,
++    ftintrm_l_d_op     = 0b0000000100011010001010,
++    ftintrp_w_s_op     = 0b0000000100011010010001,
++    ftintrp_w_d_op     = 0b0000000100011010010010,
++    ftintrp_l_s_op     = 0b0000000100011010011001,
++    ftintrp_l_d_op     = 0b0000000100011010011010,
++    ftintrz_w_s_op     = 0b0000000100011010100001,
++    ftintrz_w_d_op     = 0b0000000100011010100010,
++    ftintrz_l_s_op     = 0b0000000100011010101001,
++    ftintrz_l_d_op     = 0b0000000100011010101010,
++    ftintrne_w_s_op    = 0b0000000100011010110001,
++    ftintrne_w_d_op    = 0b0000000100011010110010,
++    ftintrne_l_s_op    = 0b0000000100011010111001,
++    ftintrne_l_d_op    = 0b0000000100011010111010,
++    ftint_w_s_op       = 0b0000000100011011000001,
++    ftint_w_d_op       = 0b0000000100011011000010,
++    ftint_l_s_op       = 0b0000000100011011001001,
++    ftint_l_d_op       = 0b0000000100011011001010,
++    ffint_s_w_op       = 0b0000000100011101000100,
++    ffint_s_l_op       = 0b0000000100011101000110,
++    ffint_d_w_op       = 0b0000000100011101001000,
++    ffint_d_l_op       = 0b0000000100011101001010,
++    frint_s_op         = 0b0000000100011110010001,
++    frint_d_op         = 0b0000000100011110010010,
++    iocsrrd_b_op       = 0b0000011001001000000000,
++    iocsrrd_h_op       = 0b0000011001001000000001,
++    iocsrrd_w_op       = 0b0000011001001000000010,
++    iocsrrd_d_op       = 0b0000011001001000000011,
++    iocsrwr_b_op       = 0b0000011001001000000100,
++    iocsrwr_h_op       = 0b0000011001001000000101,
++    iocsrwr_w_op       = 0b0000011001001000000110,
++    iocsrwr_d_op       = 0b0000011001001000000111,
++    vpcnt_b_op         = 0b0111001010011100001000,
++    vpcnt_h_op         = 0b0111001010011100001001,
++    vpcnt_w_op         = 0b0111001010011100001010,
++    vpcnt_d_op         = 0b0111001010011100001011,
++    vneg_b_op          = 0b0111001010011100001100,
++    vneg_h_op          = 0b0111001010011100001101,
++    vneg_w_op          = 0b0111001010011100001110,
++    vneg_d_op          = 0b0111001010011100001111,
++    vfclass_s_op       = 0b0111001010011100110101,
++    vfclass_d_op       = 0b0111001010011100110110,
++    vfsqrt_s_op        = 0b0111001010011100111001,
++    vfsqrt_d_op        = 0b0111001010011100111010,
++    vfrint_s_op        = 0b0111001010011101001101,
++    vfrint_d_op        = 0b0111001010011101001110,
++    vfrintrm_s_op      = 0b0111001010011101010001,
++    vfrintrm_d_op      = 0b0111001010011101010010,
++    vfrintrp_s_op      = 0b0111001010011101010101,
++    vfrintrp_d_op      = 0b0111001010011101010110,
++    vfrintrz_s_op      = 0b0111001010011101011001,
++    vfrintrz_d_op      = 0b0111001010011101011010,
++    vfrintrne_s_op     = 0b0111001010011101011101,
++    vfrintrne_d_op     = 0b0111001010011101011110,
++    vfcvtl_s_h_op      = 0b0111001010011101111010,
++    vfcvth_s_h_op      = 0b0111001010011101111011,
++    vfcvtl_d_s_op      = 0b0111001010011101111100,
++    vfcvth_d_s_op      = 0b0111001010011101111101,
++    vffint_s_w_op      = 0b0111001010011110000000,
++    vffint_s_wu_op     = 0b0111001010011110000001,
++    vffint_d_l_op      = 0b0111001010011110000010,
++    vffint_d_lu_op     = 0b0111001010011110000011,
++    vffintl_d_w_op     = 0b0111001010011110000100,
++    vffinth_d_w_op     = 0b0111001010011110000101,
++    vftint_w_s_op      = 0b0111001010011110001100,
++    vftint_l_d_op      = 0b0111001010011110001101,
++    vftintrm_w_s_op    = 0b0111001010011110001110,
++    vftintrm_l_d_op    = 0b0111001010011110001111,
++    vftintrp_w_s_op    = 0b0111001010011110010000,
++    vftintrp_l_d_op    = 0b0111001010011110010001,
++    vftintrz_w_s_op    = 0b0111001010011110010010,
++    vftintrz_l_d_op    = 0b0111001010011110010011,
++    vftintrne_w_s_op   = 0b0111001010011110010100,
++    vftintrne_l_d_op   = 0b0111001010011110010101,
++    vftint_wu_s        = 0b0111001010011110010110,
++    vftint_lu_d        = 0b0111001010011110010111,
++    vftintrz_wu_f      = 0b0111001010011110011100,
++    vftintrz_lu_d      = 0b0111001010011110011101,
++    vftintl_l_s_op     = 0b0111001010011110100000,
++    vftinth_l_s_op     = 0b0111001010011110100001,
++    vftintrml_l_s_op   = 0b0111001010011110100010,
++    vftintrmh_l_s_op   = 0b0111001010011110100011,
++    vftintrpl_l_s_op   = 0b0111001010011110100100,
++    vftintrph_l_s_op   = 0b0111001010011110100101,
++    vftintrzl_l_s_op   = 0b0111001010011110100110,
++    vftintrzh_l_s_op   = 0b0111001010011110100111,
++    vftintrnel_l_s_op  = 0b0111001010011110101000,
++    vftintrneh_l_s_op  = 0b0111001010011110101001,
++    vreplgr2vr_b_op    = 0b0111001010011111000000,
++    vreplgr2vr_h_op    = 0b0111001010011111000001,
++    vreplgr2vr_w_op    = 0b0111001010011111000010,
++    vreplgr2vr_d_op    = 0b0111001010011111000011,
++    xvpcnt_b_op        = 0b0111011010011100001000,
++    xvpcnt_h_op        = 0b0111011010011100001001,
++    xvpcnt_w_op        = 0b0111011010011100001010,
++    xvpcnt_d_op        = 0b0111011010011100001011,
++    xvneg_b_op         = 0b0111011010011100001100,
++    xvneg_h_op         = 0b0111011010011100001101,
++    xvneg_w_op         = 0b0111011010011100001110,
++    xvneg_d_op         = 0b0111011010011100001111,
++    xvfclass_s_op      = 0b0111011010011100110101,
++    xvfclass_d_op      = 0b0111011010011100110110,
++    xvfsqrt_s_op       = 0b0111011010011100111001,
++    xvfsqrt_d_op       = 0b0111011010011100111010,
++    xvfrint_s_op       = 0b0111011010011101001101,
++    xvfrint_d_op       = 0b0111011010011101001110,
++    xvfrintrm_s_op     = 0b0111011010011101010001,
++    xvfrintrm_d_op     = 0b0111011010011101010010,
++    xvfrintrp_s_op     = 0b0111011010011101010101,
++    xvfrintrp_d_op     = 0b0111011010011101010110,
++    xvfrintrz_s_op     = 0b0111011010011101011001,
++    xvfrintrz_d_op     = 0b0111011010011101011010,
++    xvfrintrne_s_op    = 0b0111011010011101011101,
++    xvfrintrne_d_op    = 0b0111011010011101011110,
++    xvfcvtl_s_h_op     = 0b0111011010011101111010,
++    xvfcvth_s_h_op     = 0b0111011010011101111011,
++    xvfcvtl_d_s_op     = 0b0111011010011101111100,
++    xvfcvth_d_s_op     = 0b0111011010011101111101,
++    xvffint_s_w_op     = 0b0111011010011110000000,
++    xvffint_s_wu_op    = 0b0111011010011110000001,
++    xvffint_d_l_op     = 0b0111011010011110000010,
++    xvffint_d_lu_op    = 0b0111011010011110000011,
++    xvffintl_d_w_op    = 0b0111011010011110000100,
++    xvffinth_d_w_op    = 0b0111011010011110000101,
++    xvftint_w_s_op     = 0b0111011010011110001100,
++    xvftint_l_d_op     = 0b0111011010011110001101,
++    xvftintrm_w_s_op   = 0b0111011010011110001110,
++    xvftintrm_l_d_op   = 0b0111011010011110001111,
++    xvftintrp_w_s_op   = 0b0111011010011110010000,
++    xvftintrp_l_d_op   = 0b0111011010011110010001,
++    xvftintrz_w_s_op   = 0b0111011010011110010010,
++    xvftintrz_l_d_op   = 0b0111011010011110010011,
++    xvftintrne_w_s_op  = 0b0111011010011110010100,
++    xvftintrne_l_d_op  = 0b0111011010011110010101,
++    xvftint_wu_s       = 0b0111011010011110010110,
++    xvftint_lu_d       = 0b0111011010011110010111,
++    xvftintrz_wu_f     = 0b0111011010011110011100,
++    xvftintrz_lu_d     = 0b0111011010011110011101,
++    xvftintl_l_s_op    = 0b0111011010011110100000,
++    xvftinth_l_s_op    = 0b0111011010011110100001,
++    xvftintrml_l_s_op  = 0b0111011010011110100010,
++    xvftintrmh_l_s_op  = 0b0111011010011110100011,
++    xvftintrpl_l_s_op  = 0b0111011010011110100100,
++    xvftintrph_l_s_op  = 0b0111011010011110100101,
++    xvftintrzl_l_s_op  = 0b0111011010011110100110,
++    xvftintrzh_l_s_op  = 0b0111011010011110100111,
++    xvftintrnel_l_s_op = 0b0111011010011110101000,
++    xvftintrneh_l_s_op = 0b0111011010011110101001,
++    xvreplgr2vr_b_op   = 0b0111011010011111000000,
++    xvreplgr2vr_h_op   = 0b0111011010011111000001,
++    xvreplgr2vr_w_op   = 0b0111011010011111000010,
++    xvreplgr2vr_d_op   = 0b0111011010011111000011,
++    vext2xv_h_b_op     = 0b0111011010011111000100,
++    vext2xv_w_b_op     = 0b0111011010011111000101,
++    vext2xv_d_b_op     = 0b0111011010011111000110,
++    vext2xv_w_h_op     = 0b0111011010011111000111,
++    vext2xv_d_h_op     = 0b0111011010011111001000,
++    vext2xv_d_w_op     = 0b0111011010011111001001,
++    vext2xv_hu_bu_op   = 0b0111011010011111001010,
++    vext2xv_wu_bu_op   = 0b0111011010011111001011,
++    vext2xv_du_bu_op   = 0b0111011010011111001100,
++    vext2xv_wu_hu_op   = 0b0111011010011111001101,
++    vext2xv_du_hu_op   = 0b0111011010011111001110,
++    vext2xv_du_wu_op   = 0b0111011010011111001111,
++    xvreplve0_b_op     = 0b0111011100000111000000,
++    xvreplve0_h_op     = 0b0111011100000111100000,
++    xvreplve0_w_op     = 0b0111011100000111110000,
++    xvreplve0_d_op     = 0b0111011100000111111000,
++    xvreplve0_q_op     = 0b0111011100000111111100,
++
++    unknow_ops22       = 0b1111111111111111111111
++  };
++
++  // 21-bit opcode, highest 21 bits: bits[31...11]
++  enum ops21 {
++    vinsgr2vr_d_op     = 0b011100101110101111110,
++    vpickve2gr_d_op    = 0b011100101110111111110,
++    vpickve2gr_du_op   = 0b011100101111001111110,
++    vreplvei_d_op      = 0b011100101111011111110,
++
++    unknow_ops21       = 0b111111111111111111111
++  };
++
++  // 20-bit opcode, highest 20 bits: bits[31...12]
++  enum ops20 {
++    vinsgr2vr_w_op     = 0b01110010111010111110,
++    vpickve2gr_w_op    = 0b01110010111011111110,
++    vpickve2gr_wu_op   = 0b01110010111100111110,
++    vreplvei_w_op      = 0b01110010111101111110,
++    xvinsgr2vr_d_op    = 0b01110110111010111110,
++    xvpickve2gr_d_op   = 0b01110110111011111110,
++    xvpickve2gr_du_op  = 0b01110110111100111110,
++    xvinsve0_d_op      = 0b01110110111111111110,
++    xvpickve_d_op      = 0b01110111000000111110,
++
++    unknow_ops20       = 0b11111111111111111111
++  };
++
++  // 19-bit opcode, highest 19 bits: bits[31...13]
++  enum ops19 {
++    vrotri_b_op        = 0b0111001010100000001,
++    vinsgr2vr_h_op     = 0b0111001011101011110,
++    vpickve2gr_h_op    = 0b0111001011101111110,
++    vpickve2gr_hu_op   = 0b0111001011110011110,
++    vreplvei_h_op      = 0b0111001011110111110,
++    vbitclri_b_op      = 0b0111001100010000001,
++    vbitseti_b_op      = 0b0111001100010100001,
++    vbitrevi_b_op      = 0b0111001100011000001,
++    vslli_b_op         = 0b0111001100101100001,
++    vsrli_b_op         = 0b0111001100110000001,
++    vsrai_b_op         = 0b0111001100110100001,
++    xvrotri_b_op       = 0b0111011010100000001,
++    xvinsgr2vr_w_op    = 0b0111011011101011110,
++    xvpickve2gr_w_op   = 0b0111011011101111110,
++    xvpickve2gr_wu_op  = 0b0111011011110011110,
++    xvinsve0_w_op      = 0b0111011011111111110,
++    xvpickve_w_op      = 0b0111011100000011110,
++    xvbitclri_b_op     = 0b0111011100010000001,
++    xvbitseti_b_op     = 0b0111011100010100001,
++    xvbitrevi_b_op     = 0b0111011100011000001,
++    xvslli_b_op        = 0b0111011100101100001,
++    xvsrli_b_op        = 0b0111011100110000001,
++    xvsrai_b_op        = 0b0111011100110100001,
++
++    unknow_ops19       = 0b1111111111111111111
++  };
++
++  // 18-bit opcode, highest 18 bits: bits[31...14]
++  enum ops18 {
++    vrotri_h_op        = 0b011100101010000001,
++    vinsgr2vr_b_op     = 0b011100101110101110,
++    vpickve2gr_b_op    = 0b011100101110111110,
++    vpickve2gr_bu_op   = 0b011100101111001110,
++    vreplvei_b_op      = 0b011100101111011110,
++    vbitclri_h_op      = 0b011100110001000001,
++    vbitseti_h_op      = 0b011100110001010001,
++    vbitrevi_h_op      = 0b011100110001100001,
++    vslli_h_op         = 0b011100110010110001,
++    vsrli_h_op         = 0b011100110011000001,
++    vsrai_h_op         = 0b011100110011010001,
++    vsrlni_b_h_op      = 0b011100110100000001,
++    xvrotri_h_op       = 0b011101101010000001,
++    xvbitclri_h_op     = 0b011101110001000001,
++    xvbitseti_h_op     = 0b011101110001010001,
++    xvbitrevi_h_op     = 0b011101110001100001,
++    xvslli_h_op        = 0b011101110010110001,
++    xvsrli_h_op        = 0b011101110011000001,
++    xvsrai_h_op        = 0b011101110011010001,
++
++    unknow_ops18       = 0b111111111111111111
++  };
++
++  // 17-bit opcode, highest 17 bits: bits[31...15]
++  enum ops17 {
++    asrtle_d_op        = 0b00000000000000010,
++    asrtgt_d_op        = 0b00000000000000011,
++    add_w_op           = 0b00000000000100000,
++    add_d_op           = 0b00000000000100001,
++    sub_w_op           = 0b00000000000100010,
++    sub_d_op           = 0b00000000000100011,
++    slt_op             = 0b00000000000100100,
++    sltu_op            = 0b00000000000100101,
++    maskeqz_op         = 0b00000000000100110,
++    masknez_op         = 0b00000000000100111,
++    nor_op             = 0b00000000000101000,
++    and_op             = 0b00000000000101001,
++    or_op              = 0b00000000000101010,
++    xor_op             = 0b00000000000101011,
++    orn_op             = 0b00000000000101100,
++    andn_op            = 0b00000000000101101,
++    sll_w_op           = 0b00000000000101110,
++    srl_w_op           = 0b00000000000101111,
++    sra_w_op           = 0b00000000000110000,
++    sll_d_op           = 0b00000000000110001,
++    srl_d_op           = 0b00000000000110010,
++    sra_d_op           = 0b00000000000110011,
++    rotr_w_op          = 0b00000000000110110,
++    rotr_d_op          = 0b00000000000110111,
++    mul_w_op           = 0b00000000000111000,
++    mulh_w_op          = 0b00000000000111001,
++    mulh_wu_op         = 0b00000000000111010,
++    mul_d_op           = 0b00000000000111011,
++    mulh_d_op          = 0b00000000000111100,
++    mulh_du_op         = 0b00000000000111101,
++    mulw_d_w_op        = 0b00000000000111110,
++    mulw_d_wu_op       = 0b00000000000111111,
++    div_w_op           = 0b00000000001000000,
++    mod_w_op           = 0b00000000001000001,
++    div_wu_op          = 0b00000000001000010,
++    mod_wu_op          = 0b00000000001000011,
++    div_d_op           = 0b00000000001000100,
++    mod_d_op           = 0b00000000001000101,
++    div_du_op          = 0b00000000001000110,
++    mod_du_op          = 0b00000000001000111,
++    crc_w_b_w_op       = 0b00000000001001000,
++    crc_w_h_w_op       = 0b00000000001001001,
++    crc_w_w_w_op       = 0b00000000001001010,
++    crc_w_d_w_op       = 0b00000000001001011,
++    crcc_w_b_w_op      = 0b00000000001001100,
++    crcc_w_h_w_op      = 0b00000000001001101,
++    crcc_w_w_w_op      = 0b00000000001001110,
++    crcc_w_d_w_op      = 0b00000000001001111,
++    break_op           = 0b00000000001010100,
++    fadd_s_op          = 0b00000001000000001,
++    fadd_d_op          = 0b00000001000000010,
++    fsub_s_op          = 0b00000001000000101,
++    fsub_d_op          = 0b00000001000000110,
++    fmul_s_op          = 0b00000001000001001,
++    fmul_d_op          = 0b00000001000001010,
++    fdiv_s_op          = 0b00000001000001101,
++    fdiv_d_op          = 0b00000001000001110,
++    fmax_s_op          = 0b00000001000010001,
++    fmax_d_op          = 0b00000001000010010,
++    fmin_s_op          = 0b00000001000010101,
++    fmin_d_op          = 0b00000001000010110,
++    fmaxa_s_op         = 0b00000001000011001,
++    fmaxa_d_op         = 0b00000001000011010,
++    fmina_s_op         = 0b00000001000011101,
++    fmina_d_op         = 0b00000001000011110,
++    fscaleb_s_op       = 0b00000001000100001,
++    fscaleb_d_op       = 0b00000001000100010,
++    fcopysign_s_op     = 0b00000001000100101,
++    fcopysign_d_op     = 0b00000001000100110,
++    ldx_b_op           = 0b00111000000000000,
++    ldx_h_op           = 0b00111000000001000,
++    ldx_w_op           = 0b00111000000010000,
++    ldx_d_op           = 0b00111000000011000,
++    stx_b_op           = 0b00111000000100000,
++    stx_h_op           = 0b00111000000101000,
++    stx_w_op           = 0b00111000000110000,
++    stx_d_op           = 0b00111000000111000,
++    ldx_bu_op          = 0b00111000001000000,
++    ldx_hu_op          = 0b00111000001001000,
++    ldx_wu_op          = 0b00111000001010000,
++    fldx_s_op          = 0b00111000001100000,
++    fldx_d_op          = 0b00111000001101000,
++    fstx_s_op          = 0b00111000001110000,
++    fstx_d_op          = 0b00111000001111000,
++    vldx_op            = 0b00111000010000000,
++    vstx_op            = 0b00111000010001000,
++    xvldx_op           = 0b00111000010010000,
++    xvstx_op           = 0b00111000010011000,
++    amswap_w_op        = 0b00111000011000000,
++    amswap_d_op        = 0b00111000011000001,
++    amadd_w_op         = 0b00111000011000010,
++    amadd_d_op         = 0b00111000011000011,
++    amand_w_op         = 0b00111000011000100,
++    amand_d_op         = 0b00111000011000101,
++    amor_w_op          = 0b00111000011000110,
++    amor_d_op          = 0b00111000011000111,
++    amxor_w_op         = 0b00111000011001000,
++    amxor_d_op         = 0b00111000011001001,
++    ammax_w_op         = 0b00111000011001010,
++    ammax_d_op         = 0b00111000011001011,
++    ammin_w_op         = 0b00111000011001100,
++    ammin_d_op         = 0b00111000011001101,
++    ammax_wu_op        = 0b00111000011001110,
++    ammax_du_op        = 0b00111000011001111,
++    ammin_wu_op        = 0b00111000011010000,
++    ammin_du_op        = 0b00111000011010001,
++    amswap_db_w_op     = 0b00111000011010010,
++    amswap_db_d_op     = 0b00111000011010011,
++    amadd_db_w_op      = 0b00111000011010100,
++    amadd_db_d_op      = 0b00111000011010101,
++    amand_db_w_op      = 0b00111000011010110,
++    amand_db_d_op      = 0b00111000011010111,
++    amor_db_w_op       = 0b00111000011011000,
++    amor_db_d_op       = 0b00111000011011001,
++    amxor_db_w_op      = 0b00111000011011010,
++    amxor_db_d_op      = 0b00111000011011011,
++    ammax_db_w_op      = 0b00111000011011100,
++    ammax_db_d_op      = 0b00111000011011101,
++    ammin_db_w_op      = 0b00111000011011110,
++    ammin_db_d_op      = 0b00111000011011111,
++    ammax_db_wu_op     = 0b00111000011100000,
++    ammax_db_du_op     = 0b00111000011100001,
++    ammin_db_wu_op     = 0b00111000011100010,
++    ammin_db_du_op     = 0b00111000011100011,
++    dbar_op            = 0b00111000011100100,
++    ibar_op            = 0b00111000011100101,
++    fldgt_s_op         = 0b00111000011101000,
++    fldgt_d_op         = 0b00111000011101001,
++    fldle_s_op         = 0b00111000011101010,
++    fldle_d_op         = 0b00111000011101011,
++    fstgt_s_op         = 0b00111000011101100,
++    fstgt_d_op         = 0b00111000011101101,
++    fstle_s_op         = 0b00111000011101110,
++    fstle_d_op         = 0b00111000011101111,
++    ldgt_b_op          = 0b00111000011110000,
++    ldgt_h_op          = 0b00111000011110001,
++    ldgt_w_op          = 0b00111000011110010,
++    ldgt_d_op          = 0b00111000011110011,
++    ldle_b_op          = 0b00111000011110100,
++    ldle_h_op          = 0b00111000011110101,
++    ldle_w_op          = 0b00111000011110110,
++    ldle_d_op          = 0b00111000011110111,
++    stgt_b_op          = 0b00111000011111000,
++    stgt_h_op          = 0b00111000011111001,
++    stgt_w_op          = 0b00111000011111010,
++    stgt_d_op          = 0b00111000011111011,
++    stle_b_op          = 0b00111000011111100,
++    stle_h_op          = 0b00111000011111101,
++    stle_w_op          = 0b00111000011111110,
++    stle_d_op          = 0b00111000011111111,
++    vseq_b_op          = 0b01110000000000000,
++    vseq_h_op          = 0b01110000000000001,
++    vseq_w_op          = 0b01110000000000010,
++    vseq_d_op          = 0b01110000000000011,
++    vsle_b_op          = 0b01110000000000100,
++    vsle_h_op          = 0b01110000000000101,
++    vsle_w_op          = 0b01110000000000110,
++    vsle_d_op          = 0b01110000000000111,
++    vsle_bu_op         = 0b01110000000001000,
++    vsle_hu_op         = 0b01110000000001001,
++    vsle_wu_op         = 0b01110000000001010,
++    vsle_du_op         = 0b01110000000001011,
++    vslt_b_op          = 0b01110000000001100,
++    vslt_h_op          = 0b01110000000001101,
++    vslt_w_op          = 0b01110000000001110,
++    vslt_d_op          = 0b01110000000001111,
++    vslt_bu_op         = 0b01110000000010000,
++    vslt_hu_op         = 0b01110000000010001,
++    vslt_wu_op         = 0b01110000000010010,
++    vslt_du_op         = 0b01110000000010011,
++    vadd_b_op          = 0b01110000000010100,
++    vadd_h_op          = 0b01110000000010101,
++    vadd_w_op          = 0b01110000000010110,
++    vadd_d_op          = 0b01110000000010111,
++    vsub_b_op          = 0b01110000000011000,
++    vsub_h_op          = 0b01110000000011001,
++    vsub_w_op          = 0b01110000000011010,
++    vsub_d_op          = 0b01110000000011011,
++    vabsd_b_op         = 0b01110000011000000,
++    vabsd_h_op         = 0b01110000011000001,
++    vabsd_w_op         = 0b01110000011000010,
++    vabsd_d_op         = 0b01110000011000011,
++    vmax_b_op          = 0b01110000011100000,
++    vmax_h_op          = 0b01110000011100001,
++    vmax_w_op          = 0b01110000011100010,
++    vmax_d_op          = 0b01110000011100011,
++    vmin_b_op          = 0b01110000011100100,
++    vmin_h_op          = 0b01110000011100101,
++    vmin_w_op          = 0b01110000011100110,
++    vmin_d_op          = 0b01110000011100111,
++    vmul_b_op          = 0b01110000100001000,
++    vmul_h_op          = 0b01110000100001001,
++    vmul_w_op          = 0b01110000100001010,
++    vmul_d_op          = 0b01110000100001011,
++    vmuh_b_op          = 0b01110000100001100,
++    vmuh_h_op          = 0b01110000100001101,
++    vmuh_w_op          = 0b01110000100001110,
++    vmuh_d_op          = 0b01110000100001111,
++    vmuh_bu_op         = 0b01110000100010000,
++    vmuh_hu_op         = 0b01110000100010001,
++    vmuh_wu_op         = 0b01110000100010010,
++    vmuh_du_op         = 0b01110000100010011,
++    vmulwev_h_b_op     = 0b01110000100100000,
++    vmulwev_w_h_op     = 0b01110000100100001,
++    vmulwev_d_w_op     = 0b01110000100100010,
++    vmulwev_q_d_op     = 0b01110000100100011,
++    vmulwod_h_b_op     = 0b01110000100100100,
++    vmulwod_w_h_op     = 0b01110000100100101,
++    vmulwod_d_w_op     = 0b01110000100100110,
++    vmulwod_q_d_op     = 0b01110000100100111,
++    vmadd_b_op         = 0b01110000101010000,
++    vmadd_h_op         = 0b01110000101010001,
++    vmadd_w_op         = 0b01110000101010010,
++    vmadd_d_op         = 0b01110000101010011,
++    vmsub_b_op         = 0b01110000101010100,
++    vmsub_h_op         = 0b01110000101010101,
++    vmsub_w_op         = 0b01110000101010110,
++    vmsub_d_op         = 0b01110000101010111,
++    vsll_b_op          = 0b01110000111010000,
++    vsll_h_op          = 0b01110000111010001,
++    vsll_w_op          = 0b01110000111010010,
++    vsll_d_op          = 0b01110000111010011,
++    vsrl_b_op          = 0b01110000111010100,
++    vsrl_h_op          = 0b01110000111010101,
++    vsrl_w_op          = 0b01110000111010110,
++    vsrl_d_op          = 0b01110000111010111,
++    vsra_b_op          = 0b01110000111011000,
++    vsra_h_op          = 0b01110000111011001,
++    vsra_w_op          = 0b01110000111011010,
++    vsra_d_op          = 0b01110000111011011,
++    vrotr_b_op         = 0b01110000111011100,
++    vrotr_h_op         = 0b01110000111011101,
++    vrotr_w_op         = 0b01110000111011110,
++    vrotr_d_op         = 0b01110000111011111,
++    vbitclr_b_op       = 0b01110001000011000,
++    vbitclr_h_op       = 0b01110001000011001,
++    vbitclr_w_op       = 0b01110001000011010,
++    vbitclr_d_op       = 0b01110001000011011,
++    vbitset_b_op       = 0b01110001000011100,
++    vbitset_h_op       = 0b01110001000011101,
++    vbitset_w_op       = 0b01110001000011110,
++    vbitset_d_op       = 0b01110001000011111,
++    vbitrev_b_op       = 0b01110001000100000,
++    vbitrev_h_op       = 0b01110001000100001,
++    vbitrev_w_op       = 0b01110001000100010,
++    vbitrev_d_op       = 0b01110001000100011,
++    vand_v_op          = 0b01110001001001100,
++    vor_v_op           = 0b01110001001001101,
++    vxor_v_op          = 0b01110001001001110,
++    vnor_v_op          = 0b01110001001001111,
++    vandn_v_op         = 0b01110001001010000,
++    vorn_v_op          = 0b01110001001010001,
++    vadd_q_op          = 0b01110001001011010,
++    vsub_q_op          = 0b01110001001011011,
++    vfadd_s_op         = 0b01110001001100001,
++    vfadd_d_op         = 0b01110001001100010,
++    vfsub_s_op         = 0b01110001001100101,
++    vfsub_d_op         = 0b01110001001100110,
++    vfmul_s_op         = 0b01110001001110001,
++    vfmul_d_op         = 0b01110001001110010,
++    vfdiv_s_op         = 0b01110001001110101,
++    vfdiv_d_op         = 0b01110001001110110,
++    vfmax_s_op         = 0b01110001001111001,
++    vfmax_d_op         = 0b01110001001111010,
++    vfmin_s_op         = 0b01110001001111101,
++    vfmin_d_op         = 0b01110001001111110,
++    vfcvt_h_s_op       = 0b01110001010001100,
++    vfcvt_s_d_op       = 0b01110001010001101,
++    vffint_s_l_op      = 0b01110001010010000,
++    vftint_w_d_op      = 0b01110001010010011,
++    vftintrm_w_d_op    = 0b01110001010010100,
++    vftintrp_w_d_op    = 0b01110001010010101,
++    vftintrz_w_d_op    = 0b01110001010010110,
++    vftintrne_w_d_op   = 0b01110001010010111,
++    vshuf_h_op         = 0b01110001011110101,
++    vshuf_w_op         = 0b01110001011110110,
++    vshuf_d_op         = 0b01110001011110111,
++    vslti_bu_op        = 0b01110010100010000,
++    vslti_hu_op        = 0b01110010100010001,
++    vslti_wu_op        = 0b01110010100010010,
++    vslti_du_op        = 0b01110010100010011,
++    vaddi_bu_op        = 0b01110010100010100,
++    vaddi_hu_op        = 0b01110010100010101,
++    vaddi_wu_op        = 0b01110010100010110,
++    vaddi_du_op        = 0b01110010100010111,
++    vsubi_bu_op        = 0b01110010100011000,
++    vsubi_hu_op        = 0b01110010100011001,
++    vsubi_wu_op        = 0b01110010100011010,
++    vsubi_du_op        = 0b01110010100011011,
++    vrotri_w_op        = 0b01110010101000001,
++    vbitclri_w_op      = 0b01110011000100001,
++    vbitseti_w_op      = 0b01110011000101001,
++    vbitrevi_w_op      = 0b01110011000110001,
++    vslli_w_op         = 0b01110011001011001,
++    vsrli_w_op         = 0b01110011001100001,
++    vsrai_w_op         = 0b01110011001101001,
++    vsrlni_h_w_op      = 0b01110011010000001,
++    xvseq_b_op         = 0b01110100000000000,
++    xvseq_h_op         = 0b01110100000000001,
++    xvseq_w_op         = 0b01110100000000010,
++    xvseq_d_op         = 0b01110100000000011,
++    xvsle_b_op         = 0b01110100000000100,
++    xvsle_h_op         = 0b01110100000000101,
++    xvsle_w_op         = 0b01110100000000110,
++    xvsle_d_op         = 0b01110100000000111,
++    xvsle_bu_op        = 0b01110100000001000,
++    xvsle_hu_op        = 0b01110100000001001,
++    xvsle_wu_op        = 0b01110100000001010,
++    xvsle_du_op        = 0b01110100000001011,
++    xvslt_b_op         = 0b01110100000001100,
++    xvslt_h_op         = 0b01110100000001101,
++    xvslt_w_op         = 0b01110100000001110,
++    xvslt_d_op         = 0b01110100000001111,
++    xvslt_bu_op        = 0b01110100000010000,
++    xvslt_hu_op        = 0b01110100000010001,
++    xvslt_wu_op        = 0b01110100000010010,
++    xvslt_du_op        = 0b01110100000010011,
++    xvadd_b_op         = 0b01110100000010100,
++    xvadd_h_op         = 0b01110100000010101,
++    xvadd_w_op         = 0b01110100000010110,
++    xvadd_d_op         = 0b01110100000010111,
++    xvsub_b_op         = 0b01110100000011000,
++    xvsub_h_op         = 0b01110100000011001,
++    xvsub_w_op         = 0b01110100000011010,
++    xvsub_d_op         = 0b01110100000011011,
++    xvabsd_b_op        = 0b01110100011000000,
++    xvabsd_h_op        = 0b01110100011000001,
++    xvabsd_w_op        = 0b01110100011000010,
++    xvabsd_d_op        = 0b01110100011000011,
++    xvmax_b_op         = 0b01110100011100000,
++    xvmax_h_op         = 0b01110100011100001,
++    xvmax_w_op         = 0b01110100011100010,
++    xvmax_d_op         = 0b01110100011100011,
++    xvmin_b_op         = 0b01110100011100100,
++    xvmin_h_op         = 0b01110100011100101,
++    xvmin_w_op         = 0b01110100011100110,
++    xvmin_d_op         = 0b01110100011100111,
++    xvmul_b_op         = 0b01110100100001000,
++    xvmul_h_op         = 0b01110100100001001,
++    xvmul_w_op         = 0b01110100100001010,
++    xvmul_d_op         = 0b01110100100001011,
++    xvmuh_b_op         = 0b01110100100001100,
++    xvmuh_h_op         = 0b01110100100001101,
++    xvmuh_w_op         = 0b01110100100001110,
++    xvmuh_d_op         = 0b01110100100001111,
++    xvmuh_bu_op        = 0b01110100100010000,
++    xvmuh_hu_op        = 0b01110100100010001,
++    xvmuh_wu_op        = 0b01110100100010010,
++    xvmuh_du_op        = 0b01110100100010011,
++    xvmulwev_h_b_op    = 0b01110100100100000,
++    xvmulwev_w_h_op    = 0b01110100100100001,
++    xvmulwev_d_w_op    = 0b01110100100100010,
++    xvmulwev_q_d_op    = 0b01110100100100011,
++    xvmulwod_h_b_op    = 0b01110100100100100,
++    xvmulwod_w_h_op    = 0b01110100100100101,
++    xvmulwod_d_w_op    = 0b01110100100100110,
++    xvmulwod_q_d_op    = 0b01110100100100111,
++    xvmadd_b_op        = 0b01110100101010000,
++    xvmadd_h_op        = 0b01110100101010001,
++    xvmadd_w_op        = 0b01110100101010010,
++    xvmadd_d_op        = 0b01110100101010011,
++    xvmsub_b_op        = 0b01110100101010100,
++    xvmsub_h_op        = 0b01110100101010101,
++    xvmsub_w_op        = 0b01110100101010110,
++    xvmsub_d_op        = 0b01110100101010111,
++    xvsll_b_op         = 0b01110100111010000,
++    xvsll_h_op         = 0b01110100111010001,
++    xvsll_w_op         = 0b01110100111010010,
++    xvsll_d_op         = 0b01110100111010011,
++    xvsrl_b_op         = 0b01110100111010100,
++    xvsrl_h_op         = 0b01110100111010101,
++    xvsrl_w_op         = 0b01110100111010110,
++    xvsrl_d_op         = 0b01110100111010111,
++    xvsra_b_op         = 0b01110100111011000,
++    xvsra_h_op         = 0b01110100111011001,
++    xvsra_w_op         = 0b01110100111011010,
++    xvsra_d_op         = 0b01110100111011011,
++    xvrotr_b_op        = 0b01110100111011100,
++    xvrotr_h_op        = 0b01110100111011101,
++    xvrotr_w_op        = 0b01110100111011110,
++    xvrotr_d_op        = 0b01110100111011111,
++    xvbitclr_b_op      = 0b01110101000011000,
++    xvbitclr_h_op      = 0b01110101000011001,
++    xvbitclr_w_op      = 0b01110101000011010,
++    xvbitclr_d_op      = 0b01110101000011011,
++    xvbitset_b_op      = 0b01110101000011100,
++    xvbitset_h_op      = 0b01110101000011101,
++    xvbitset_w_op      = 0b01110101000011110,
++    xvbitset_d_op      = 0b01110101000011111,
++    xvbitrev_b_op      = 0b01110101000100000,
++    xvbitrev_h_op      = 0b01110101000100001,
++    xvbitrev_w_op      = 0b01110101000100010,
++    xvbitrev_d_op      = 0b01110101000100011,
++    xvand_v_op         = 0b01110101001001100,
++    xvor_v_op          = 0b01110101001001101,
++    xvxor_v_op         = 0b01110101001001110,
++    xvnor_v_op         = 0b01110101001001111,
++    xvandn_v_op        = 0b01110101001010000,
++    xvorn_v_op         = 0b01110101001010001,
++    xvadd_q_op         = 0b01110101001011010,
++    xvsub_q_op         = 0b01110101001011011,
++    xvfadd_s_op        = 0b01110101001100001,
++    xvfadd_d_op        = 0b01110101001100010,
++    xvfsub_s_op        = 0b01110101001100101,
++    xvfsub_d_op        = 0b01110101001100110,
++    xvfmul_s_op        = 0b01110101001110001,
++    xvfmul_d_op        = 0b01110101001110010,
++    xvfdiv_s_op        = 0b01110101001110101,
++    xvfdiv_d_op        = 0b01110101001110110,
++    xvfmax_s_op        = 0b01110101001111001,
++    xvfmax_d_op        = 0b01110101001111010,
++    xvfmin_s_op        = 0b01110101001111101,
++    xvfmin_d_op        = 0b01110101001111110,
++    xvfcvt_h_s_op      = 0b01110101010001100,
++    xvfcvt_s_d_op      = 0b01110101010001101,
++    xvffint_s_l_op     = 0b01110101010010000,
++    xvftint_w_d_op     = 0b01110101010010011,
++    xvftintrm_w_d_op   = 0b01110101010010100,
++    xvftintrp_w_d_op   = 0b01110101010010101,
++    xvftintrz_w_d_op   = 0b01110101010010110,
++    xvftintrne_w_d_op  = 0b01110101010010111,
++    xvshuf_h_op        = 0b01110101011110101,
++    xvshuf_w_op        = 0b01110101011110110,
++    xvshuf_d_op        = 0b01110101011110111,
++    xvperm_w_op        = 0b01110101011111010,
++    xvslti_bu_op       = 0b01110110100010000,
++    xvslti_hu_op       = 0b01110110100010001,
++    xvslti_wu_op       = 0b01110110100010010,
++    xvslti_du_op       = 0b01110110100010011,
++    xvaddi_bu_op       = 0b01110110100010100,
++    xvaddi_hu_op       = 0b01110110100010101,
++    xvaddi_wu_op       = 0b01110110100010110,
++    xvaddi_du_op       = 0b01110110100010111,
++    xvsubi_bu_op       = 0b01110110100011000,
++    xvsubi_hu_op       = 0b01110110100011001,
++    xvsubi_wu_op       = 0b01110110100011010,
++    xvsubi_du_op       = 0b01110110100011011,
++    xvrotri_w_op       = 0b01110110101000001,
++    xvbitclri_w_op     = 0b01110111000100001,
++    xvbitseti_w_op     = 0b01110111000101001,
++    xvbitrevi_w_op     = 0b01110111000110001,
++    xvslli_w_op        = 0b01110111001011001,
++    xvsrli_w_op        = 0b01110111001100001,
++    xvsrai_w_op        = 0b01110111001101001,
++
++    unknow_ops17       = 0b11111111111111111
++  };
++
++  // 16-bit opcode, highest 16 bits: bits[31...16]
++  enum ops16 {
++    vrotri_d_op        = 0b0111001010100001,
++    vbitclri_d_op      = 0b0111001100010001,
++    vbitseti_d_op      = 0b0111001100010101,
++    vbitrevi_d_op      = 0b0111001100011001,
++    vslli_d_op         = 0b0111001100101101,
++    vsrli_d_op         = 0b0111001100110001,
++    vsrai_d_op         = 0b0111001100110101,
++    vsrlni_w_d_op      = 0b0111001101000001,
++    xvrotri_d_op       = 0b0111011010100001,
++    xvbitclri_d_op     = 0b0111011100010001,
++    xvbitseti_d_op     = 0b0111011100010101,
++    xvbitrevi_d_op     = 0b0111011100011001,
++    xvslli_d_op        = 0b0111011100101101,
++    xvsrli_d_op        = 0b0111011100110001,
++    xvsrai_d_op        = 0b0111011100110101,
++
++    unknow_ops16       = 0b1111111111111111
++  };
++
++  // 15-bit opcode, highest 15 bits: bits[31...17]
++  enum ops15 {
++    vsrlni_d_q_op      = 0b011100110100001,
++
++    unknow_ops15       = 0b111111111111111
++  };
++
++  // 14-bit opcode, highest 14 bits: bits[31...18]
++  enum ops14 {
++    alsl_w_op          = 0b00000000000001,
++    bytepick_w_op      = 0b00000000000010,
++    bytepick_d_op      = 0b00000000000011,
++    alsl_d_op          = 0b00000000001011,
++    slli_op            = 0b00000000010000,
++    srli_op            = 0b00000000010001,
++    srai_op            = 0b00000000010010,
++    rotri_op           = 0b00000000010011,
++    lddir_op           = 0b00000110010000,
++    ldpte_op           = 0b00000110010001,
++    vshuf4i_b_op       = 0b01110011100100,
++    vshuf4i_h_op       = 0b01110011100101,
++    vshuf4i_w_op       = 0b01110011100110,
++    vshuf4i_d_op       = 0b01110011100111,
++    vandi_b_op         = 0b01110011110100,
++    vori_b_op          = 0b01110011110101,
++    vxori_b_op         = 0b01110011110110,
++    vnori_b_op         = 0b01110011110111,
++    vldi_op            = 0b01110011111000,
++    vpermi_w_op        = 0b01110011111001,
++    xvshuf4i_b_op      = 0b01110111100100,
++    xvshuf4i_h_op      = 0b01110111100101,
++    xvshuf4i_w_op      = 0b01110111100110,
++    xvshuf4i_d_op      = 0b01110111100111,
++    xvandi_b_op        = 0b01110111110100,
++    xvori_b_op         = 0b01110111110101,
++    xvxori_b_op        = 0b01110111110110,
++    xvnori_b_op        = 0b01110111110111,
++    xvldi_op           = 0b01110111111000,
++    xvpermi_w_op       = 0b01110111111001,
++    xvpermi_d_op       = 0b01110111111010,
++    xvpermi_q_op       = 0b01110111111011,
++
++    unknow_ops14       = 0b11111111111111
++  };
++
++  // 12-bit opcode, highest 12 bits: bits[31...20]
++  enum ops12 {
++    fmadd_s_op         = 0b000010000001,
++    fmadd_d_op         = 0b000010000010,
++    fmsub_s_op         = 0b000010000101,
++    fmsub_d_op         = 0b000010000110,
++    fnmadd_s_op        = 0b000010001001,
++    fnmadd_d_op        = 0b000010001010,
++    fnmsub_s_op        = 0b000010001101,
++    fnmsub_d_op        = 0b000010001110,
++    vfmadd_s_op        = 0b000010010001,
++    vfmadd_d_op        = 0b000010010010,
++    vfmsub_s_op        = 0b000010010101,
++    vfmsub_d_op        = 0b000010010110,
++    vfnmadd_s_op       = 0b000010011001,
++    vfnmadd_d_op       = 0b000010011010,
++    vfnmsub_s_op       = 0b000010011101,
++    vfnmsub_d_op       = 0b000010011110,
++    xvfmadd_s_op       = 0b000010100001,
++    xvfmadd_d_op       = 0b000010100010,
++    xvfmsub_s_op       = 0b000010100101,
++    xvfmsub_d_op       = 0b000010100110,
++    xvfnmadd_s_op      = 0b000010101001,
++    xvfnmadd_d_op      = 0b000010101010,
++    xvfnmsub_s_op      = 0b000010101101,
++    xvfnmsub_d_op      = 0b000010101110,
++    fcmp_cond_s_op     = 0b000011000001,
++    fcmp_cond_d_op     = 0b000011000010,
++    vfcmp_cond_s_op    = 0b000011000101,
++    vfcmp_cond_d_op    = 0b000011000110,
++    xvfcmp_cond_s_op   = 0b000011001001,
++    xvfcmp_cond_d_op   = 0b000011001010,
++    fsel_op            = 0b000011010000,
++    vbitsel_v_op       = 0b000011010001,
++    xvbitsel_v_op      = 0b000011010010,
++    vshuf_b_op         = 0b000011010101,
++    xvshuf_b_op        = 0b000011010110,
++
++    unknow_ops12       = 0b111111111111
++  };
++
++  // 10-bit opcode, highest 10 bits: bits[31...22]
++  enum ops10 {
++    bstr_w_op          = 0b0000000001,
++    bstrins_d_op       = 0b0000000010,
++    bstrpick_d_op      = 0b0000000011,
++    slti_op            = 0b0000001000,
++    sltui_op           = 0b0000001001,
++    addi_w_op          = 0b0000001010,
++    addi_d_op          = 0b0000001011,
++    lu52i_d_op         = 0b0000001100,
++    andi_op            = 0b0000001101,
++    ori_op             = 0b0000001110,
++    xori_op            = 0b0000001111,
++    ld_b_op            = 0b0010100000,
++    ld_h_op            = 0b0010100001,
++    ld_w_op            = 0b0010100010,
++    ld_d_op            = 0b0010100011,
++    st_b_op            = 0b0010100100,
++    st_h_op            = 0b0010100101,
++    st_w_op            = 0b0010100110,
++    st_d_op            = 0b0010100111,
++    ld_bu_op           = 0b0010101000,
++    ld_hu_op           = 0b0010101001,
++    ld_wu_op           = 0b0010101010,
++    preld_op           = 0b0010101011,
++    fld_s_op           = 0b0010101100,
++    fst_s_op           = 0b0010101101,
++    fld_d_op           = 0b0010101110,
++    fst_d_op           = 0b0010101111,
++    vld_op             = 0b0010110000,
++    vst_op             = 0b0010110001,
++    xvld_op            = 0b0010110010,
++    xvst_op            = 0b0010110011,
++    ldl_w_op           = 0b0010111000,
++    ldr_w_op           = 0b0010111001,
++
++    unknow_ops10       = 0b1111111111
++  };
++
++  // 8-bit opcode, highest 8 bits: bits[31...22]
++  enum ops8 {
++    ll_w_op            = 0b00100000,
++    sc_w_op            = 0b00100001,
++    ll_d_op            = 0b00100010,
++    sc_d_op            = 0b00100011,
++    ldptr_w_op         = 0b00100100,
++    stptr_w_op         = 0b00100101,
++    ldptr_d_op         = 0b00100110,
++    stptr_d_op         = 0b00100111,
++
++    unknow_ops8        = 0b11111111
++  };
++
++  // 7-bit opcode, highest 7 bits: bits[31...25]
++  enum ops7 {
++    lu12i_w_op         = 0b0001010,
++    lu32i_d_op         = 0b0001011,
++    pcaddi_op          = 0b0001100,
++    pcalau12i_op       = 0b0001101,
++    pcaddu12i_op       = 0b0001110,
++    pcaddu18i_op       = 0b0001111,
++
++    unknow_ops7        = 0b1111111
++  };
++
++  // 6-bit opcode, highest 6 bits: bits[31...25]
++  enum ops6 {
++    addu16i_d_op       = 0b000100,
++    beqz_op            = 0b010000,
++    bnez_op            = 0b010001,
++    bccondz_op         = 0b010010,
++    jirl_op            = 0b010011,
++    b_op               = 0b010100,
++    bl_op              = 0b010101,
++    beq_op             = 0b010110,
++    bne_op             = 0b010111,
++    blt_op             = 0b011000,
++    bge_op             = 0b011001,
++    bltu_op            = 0b011010,
++    bgeu_op            = 0b011011,
++
++    unknow_ops6        = 0b111111
++  };
++
++  enum fcmp_cond {
++    fcmp_caf           = 0x00,
++    fcmp_cun           = 0x08,
++    fcmp_ceq           = 0x04,
++    fcmp_cueq          = 0x0c,
++    fcmp_clt           = 0x02,
++    fcmp_cult          = 0x0a,
++    fcmp_cle           = 0x06,
++    fcmp_cule          = 0x0e,
++    fcmp_cne           = 0x10,
++    fcmp_cor           = 0x14,
++    fcmp_cune          = 0x18,
++    fcmp_saf           = 0x01,
++    fcmp_sun           = 0x09,
++    fcmp_seq           = 0x05,
++    fcmp_sueq          = 0x0d,
++    fcmp_slt           = 0x03,
++    fcmp_sult          = 0x0b,
++    fcmp_sle           = 0x07,
++    fcmp_sule          = 0x0f,
++    fcmp_sne           = 0x11,
++    fcmp_sor           = 0x15,
++    fcmp_sune          = 0x19
++  };
++
++  enum Condition {
++    zero         ,
++    notZero      ,
++    equal        ,
++    notEqual     ,
++    less         ,
++    lessEqual    ,
++    greater      ,
++    greaterEqual ,
++    below        ,
++    belowEqual   ,
++    above        ,
++    aboveEqual
++  };
++
++  static const int LogInstructionSize = 2;
++  static const int InstructionSize    = 1 << LogInstructionSize;
++
++  enum WhichOperand {
++    // input to locate_operand, and format code for relocations
++    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
++    disp32_operand = 1,          // embedded 32-bit displacement or address
++    call32_operand = 2,          // embedded 32-bit self-relative displacement
++    narrow_oop_operand = 3,      // embedded 32-bit immediate narrow oop
++    _WhichOperand_limit = 4
++  };
++
++  static int low  (int x, int l) { return bitfield(x, 0, l); }
++  static int low16(int x)        { return low(x, 16); }
++  static int low26(int x)        { return low(x, 26); }
++
++  static int high  (int x, int l) { return bitfield(x, 32-l, l); }
++  static int high16(int x)        { return high(x, 16); }
++  static int high6 (int x)        { return high(x, 6); }
++
++
++  static ALWAYSINLINE void patch(address a, int length, uint32_t val) {
++    guarantee(val < (1ULL << length), "Field too big for insn");
++    guarantee(length > 0, "length > 0");
++    unsigned target = *(unsigned *)a;
++    target = (target >> length) << length;
++    target |= val;
++    *(unsigned *)a = target;
++  }
++
++ protected:
++  // help methods for instruction ejection
++
++  // 2R-type
++  //  31                          10 9      5 4     0
++  // |   opcode                     |   rj   |  rd   |
++  static inline int insn_RR   (int op, int rj, int rd) { return (op<<10) | (rj<<5) | rd; }
++
++  // 3R-type
++  //  31                    15 14 10 9      5 4     0
++  // |   opcode               |  rk |   rj   |  rd   |
++  static inline int insn_RRR  (int op, int rk, int rj, int rd)  { return (op<<15) | (rk<<10) | (rj<<5) | rd; }
++
++  // 4R-type
++  //  31             20 19  15 14  10 9     5 4     0
++  // |   opcode        |  ra  |  rk |    rj  |  rd   |
++  static inline int insn_RRRR (int op, int ra,  int rk, int rj, int rd)  { return (op<<20) | (ra << 15) | (rk<<10) | (rj<<5) | rd; }
++
++  // 2RI1-type
++  //  31                11     10    9      5 4     0
++  // |   opcode           |    I1   |    vj  |  rd   |
++  static inline int insn_I1RR (int op, int ui1, int vj, int rd)  { assert(is_uimm(ui1, 1), "not a unsigned 1-bit int"); return (op<<11) | (low(ui1, 1)<<10) | (vj<<5) | rd; }
++
++  // 2RI2-type
++  //  31                12 11     10 9      5 4     0
++  // |   opcode           |    I2   |    vj  |  rd   |
++  static inline int insn_I2RR (int op, int ui2, int vj, int rd)  { assert(is_uimm(ui2, 2), "not a unsigned 2-bit int"); return (op<<12) | (low(ui2, 2)<<10) | (vj<<5) | rd; }
++
++  // 2RI3-type
++  //  31                13 12     10 9      5 4     0
++  // |   opcode           |    I3   |    vj  |  vd   |
++  static inline int insn_I3RR (int op, int ui3, int vj, int vd)  { assert(is_uimm(ui3, 3), "not a unsigned 3-bit int"); return (op<<13) | (low(ui3, 3)<<10) | (vj<<5) | vd; }
++
++  // 2RI4-type
++  //  31                14 13     10 9      5 4     0
++  // |   opcode           |    I4   |    vj  |  vd   |
++  static inline int insn_I4RR (int op, int ui4, int vj, int vd)  { assert(is_uimm(ui4, 4), "not a unsigned 4-bit int"); return (op<<14) | (low(ui4, 4)<<10) | (vj<<5) | vd; }
++
++  // 2RI5-type
++  //  31                15 14     10 9      5 4     0
++  // |   opcode           |    I5   |    vj  |  vd   |
++  static inline int insn_I5RR (int op, int ui5, int vj, int vd)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); return (op<<15) | (low(ui5, 5)<<10) | (vj<<5) | vd; }
++
++  // 2RI6-type
++  //  31                16 15     10 9      5 4     0
++  // |   opcode           |    I6   |    vj  |  vd   |
++  static inline int insn_I6RR (int op, int ui6, int vj, int vd)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); return (op<<16) | (low(ui6, 6)<<10) | (vj<<5) | vd; }
++
++  // 2RI7-type
++  //  31                17 16     10 9      5 4     0
++  // |   opcode           |    I7   |    vj  |  vd   |
++  static inline int insn_I7RR (int op, int ui7, int vj, int vd)  { assert(is_uimm(ui7, 7), "not a unsigned 7-bit int"); return (op<<17) | (low(ui7, 6)<<10) | (vj<<5) | vd; }
++
++  // 2RI8-type
++  //  31                18 17     10 9      5 4     0
++  // |   opcode           |    I8   |    rj  |  rd   |
++  static inline int insn_I8RR (int op, int imm8, int rj, int rd)  { /*assert(is_simm(imm8, 8), "not a signed 8-bit int");*/ return (op<<18) | (low(imm8, 8)<<10) | (rj<<5) | rd; }
++
++  // 2RI12-type
++  //  31           22 21          10 9      5 4     0
++  // |   opcode      |     I12      |    rj  |  rd   |
++  static inline int insn_I12RR(int op, int imm12, int rj, int rd) { /* assert(is_simm(imm12, 12), "not a signed 12-bit int");*/  return (op<<22) | (low(imm12, 12)<<10) | (rj<<5) | rd; }
++
++
++  // 2RI14-type
++  //  31         24 23            10 9      5 4     0
++  // |   opcode    |      I14       |    rj  |  rd   |
++  static inline int insn_I14RR(int op, int imm14, int rj, int rd) { assert(is_simm(imm14, 14), "not a signed 14-bit int"); return (op<<24) | (low(imm14, 14)<<10) | (rj<<5) | rd; }
++
++  // 2RI16-type
++  //  31       26 25              10 9      5 4     0
++  // |   opcode  |       I16        |    rj  |  rd   |
++  static inline int insn_I16RR(int op, int imm16, int rj, int rd) { assert(is_simm16(imm16), "not a signed 16-bit int"); return (op<<26) | (low16(imm16)<<10) | (rj<<5) | rd; }
++
++  // 1RI13-type (?)
++  //  31        18 17                      5 4     0
++  // |   opcode   |               I13        |  vd   |
++  static inline int insn_I13R (int op, int imm13, int vd) { assert(is_simm(imm13, 13), "not a signed 13-bit int"); return (op<<18) | (low(imm13, 13)<<5) | vd; }
++
++  // 1RI20-type (?)
++  //  31        25 24                      5 4     0
++  // |   opcode   |               I20        |  rd   |
++  static inline int insn_I20R (int op, int imm20, int rd) { assert(is_simm(imm20, 20), "not a signed 20-bit int"); return (op<<25) | (low(imm20, 20)<<5) | rd; }
++
++  // 1RI21-type
++  //  31       26 25              10 9     5 4        0
++  // |   opcode  |     I21[15:0]    |   rj   |I21[20:16]|
++  static inline int insn_IRI(int op, int imm21, int rj) { assert(is_simm(imm21, 21), "not a signed 21-bit int"); return (op << 26) | (low16(imm21) << 10) | (rj << 5) | low(imm21 >> 16, 5); }
++
++  // I26-type
++  //  31       26 25              10 9               0
++  // |   opcode  |     I26[15:0]    |    I26[25:16]   |
++  static inline int insn_I26(int op, int imm26) { assert(is_simm(imm26, 26), "not a signed 26-bit int"); return (op << 26) | (low16(imm26) << 10) | low(imm26 >> 16, 10); }
++
++  // imm15
++  //  31                    15 14                    0
++  // |         opcode         |          I15          |
++  static inline int insn_I15  (int op, int imm15) { assert(is_uimm(imm15, 15), "not a unsigned 15-bit int"); return (op<<15) | low(imm15, 15); }
++
++
++  // get the offset field of beq, bne, blt[u], bge[u] instruction
++  int offset16(address entry) {
++    assert(is_simm16((entry - pc()) / 4), "change this code");
++    if (!is_simm16((entry - pc()) / 4)) {
++      tty->print_cr("!!! is_simm16: %lx", (entry - pc()) / 4);
++    }
++    return (entry - pc()) / 4;
++  }
++
++  // get the offset field of beqz, bnez instruction
++  int offset21(address entry) {
++    assert(is_simm((int)(entry - pc()) / 4, 21), "change this code");
++    if (!is_simm((int)(entry - pc()) / 4, 21)) {
++      tty->print_cr("!!! is_simm21: %lx", (entry - pc()) / 4);
++    }
++    return (entry - pc()) / 4;
++  }
++
++  // get the offset field of b instruction
++  int offset26(address entry) {
++    assert(is_simm((int)(entry - pc()) / 4, 26), "change this code");
++    if (!is_simm((int)(entry - pc()) / 4, 26)) {
++      tty->print_cr("!!! is_simm26: %lx", (entry - pc()) / 4);
++    }
++    return (entry - pc()) / 4;
++  }
++
++public:
++  using AbstractAssembler::offset;
++
++  //sign expand with the sign bit is h
++  static int expand(int x, int h) { return -(x & (1<<h)) | x;  }
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_int_mask(int x);
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_jlong_mask(jlong x);
++
++  static int split_low16(int x) {
++    return (x & 0xffff);
++  }
++
++  // Convert 16-bit x to a sign-extended 16-bit integer
++  static int simm16(int x) {
++    assert(x == (x & 0xFFFF), "must be 16-bit only");
++    return (x << 16) >> 16;
++  }
++
++  static int split_high16(int x) {
++    return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff;
++  }
++
++  static int split_low20(int x) {
++    return (x & 0xfffff);
++  }
++
++  // Convert 20-bit x to a sign-extended 20-bit integer
++  static int simm20(int x) {
++    assert(x == (x & 0xFFFFF), "must be 20-bit only");
++    return (x << 12) >> 12;
++  }
++
++  static int split_low12(int x) {
++    return (x & 0xfff);
++  }
++
++  static inline void split_simm32(jlong si32, jint& si12, jint& si20) {
++    si12 = ((jint)(si32 & 0xfff) << 20) >> 20;
++    si32 += (si32 & 0x800) << 1;
++    si20 = si32 >> 12;
++  }
++
++  static inline void split_simm38(jlong si38, jint& si18, jint& si20) {
++    si18 = ((jint)(si38 & 0x3ffff) << 14) >> 14;
++    si38 += (si38 & 0x20000) << 1;
++    si20 = si38 >> 18;
++  }
++
++  // Convert 12-bit x to a sign-extended 12-bit integer
++  static int simm12(int x) {
++    assert(x == (x & 0xFFF), "must be 12-bit only");
++    return (x << 20) >> 20;
++  }
++
++  // Convert 26-bit x to a sign-extended 26-bit integer
++  static int simm26(int x) {
++    assert(x == (x & 0x3FFFFFF), "must be 26-bit only");
++    return (x << 6) >> 6;
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x12) {
++    //lu12i, ori
++    return (((x12 << 12) | x0) << 32) >> 32;
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32) {
++    //lu32i, lu12i, ori
++    return (((x32 << 32) | (x12 << 12) | x0) << 12) >> 12;
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32, intptr_t x52) {
++    //lu52i, lu32i, lu12i, ori
++    return (x52 << 52) | (x32 << 32) | (x12 << 12) | x0;
++  }
++
++  // Test if x is within signed immediate range for nbits.
++  static bool is_simm  (int x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   min      = -( ((int)1) << nbits-1 );
++    const int   maxplus1 =  ( ((int)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  static bool is_simm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong min      = -( ((jlong)1) << nbits-1 );
++    const jlong maxplus1 =  ( ((jlong)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  static bool is_simm16(int x)            { return is_simm(x, 16); }
++  static bool is_simm16(long x)           { return is_simm((jlong)x, (unsigned int)16); }
++
++  // Test if x is within unsigned immediate range for nbits
++  static bool is_uimm(int x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   maxplus1 = ( ((int)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++  static bool is_uimm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong maxplus1 =  ( ((jlong)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++public:
++
++  void flush() {
++    AbstractAssembler::flush();
++  }
++
++  inline void emit_int32(int x) {
++    AbstractAssembler::emit_int32(x);
++  }
++
++  inline void emit_data(int x) { emit_int32(x); }
++  inline void emit_data(int x, relocInfo::relocType rtype) {
++    relocate(rtype);
++    emit_int32(x);
++  }
++
++  inline void emit_data(int x, RelocationHolder const& rspec) {
++    relocate(rspec);
++    emit_int32(x);
++  }
++
++
++  // Generic instructions
++  // Does 32bit or 64bit as needed for the platform. In some sense these
++  // belong in macro assembler but there is no need for both varieties to exist
++
++  void clo_w  (Register rd, Register rj) { emit_int32(insn_RR(clo_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void clz_w  (Register rd, Register rj) { emit_int32(insn_RR(clz_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void cto_w  (Register rd, Register rj) { emit_int32(insn_RR(cto_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void ctz_w  (Register rd, Register rj) { emit_int32(insn_RR(ctz_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void clo_d  (Register rd, Register rj) { emit_int32(insn_RR(clo_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void clz_d  (Register rd, Register rj) { emit_int32(insn_RR(clz_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void cto_d  (Register rd, Register rj) { emit_int32(insn_RR(cto_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void ctz_d  (Register rd, Register rj) { emit_int32(insn_RR(ctz_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void revb_2h(Register rd, Register rj) { emit_int32(insn_RR(revb_2h_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revb_4h(Register rd, Register rj) { emit_int32(insn_RR(revb_4h_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revb_2w(Register rd, Register rj) { emit_int32(insn_RR(revb_2w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revb_d (Register rd, Register rj) { emit_int32(insn_RR( revb_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revh_2w(Register rd, Register rj) { emit_int32(insn_RR(revh_2w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void revh_d (Register rd, Register rj) { emit_int32(insn_RR( revh_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void bitrev_4b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_4b_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void bitrev_8b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_8b_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void bitrev_w (Register rd, Register rj) { emit_int32(insn_RR(bitrev_w_op,  (int)rj->encoding(), (int)rd->encoding())); }
++  void bitrev_d (Register rd, Register rj) { emit_int32(insn_RR(bitrev_d_op,  (int)rj->encoding(), (int)rd->encoding())); }
++
++  void ext_w_h(Register rd, Register rj) { emit_int32(insn_RR(ext_w_h_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void ext_w_b(Register rd, Register rj) { emit_int32(insn_RR(ext_w_b_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void rdtimel_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimel_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void rdtimeh_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimeh_w_op, (int)rj->encoding(), (int)rd->encoding())); }
++  void rdtime_d(Register rd, Register rj)  { emit_int32(insn_RR(rdtime_d_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void cpucfg(Register rd, Register rj) { emit_int32(insn_RR(cpucfg_op, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void asrtle_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtle_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); }
++  void asrtgt_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtgt_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); }
++
++  void alsl_w(Register rd, Register rj, Register rk, int sa2)  { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int");  emit_int32(insn_I8RR(alsl_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++  void alsl_wu(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (1 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bytepick_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(bytepick_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bytepick_d(Register rd, Register rj, Register rk, int sa3) { assert(is_uimm(sa3, 3), "not a unsigned 3-bit int"); emit_int32(insn_I8RR(bytepick_d_op, ( (sa3 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void add_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void add_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sub_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sub_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void slt  (Register rd, Register rj, Register rk)  { emit_int32(insn_RRR(slt_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sltu (Register rd, Register rj, Register rk)  { emit_int32(insn_RRR(sltu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void maskeqz (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(maskeqz_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void masknez (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(masknez_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void nor (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(nor_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void AND (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(and_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void OR  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(or_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void XOR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(xor_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void orn (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(orn_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void andn(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(andn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void sll_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void srl_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sra_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sll_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void srl_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void sra_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void rotr_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void rotr_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void mul_w     (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_w    (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_w_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_wu   (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_wu_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mul_d     (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_d    (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulh_du   (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_du_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulw_d_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mulw_d_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void div_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void div_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void div_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void div_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void mod_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void crc_w_b_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_b_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crc_w_h_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_h_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crc_w_w_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_w_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crc_w_d_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_d_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void crcc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void brk(int code)      { assert(is_uimm(code, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(break_op, code)); }
++
++  void alsl_d(Register rd, Register rj, Register rk, int sa2)  { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int");  emit_int32(insn_I8RR(alsl_d_op, ( (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void slli_w(Register rd, Register rj, int ui5)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void slli_d(Register rd, Register rj, int ui6)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srli_w(Register rd, Register rj, int ui5)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srli_d(Register rd, Register rj, int ui6)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srai_w(Register rd, Register rj, int ui5)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void srai_d(Register rd, Register rj, int ui6)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void rotri_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
++  void rotri_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void bstrins_w  (Register rd, Register rj, int msbw, int lsbw)  { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (0<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bstrpick_w  (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (1<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bstrins_d  (Register rd, Register rj, int msbd, int lsbd)  { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrins_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); }
++  void bstrpick_d  (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrpick_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void fadd_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fadd_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fsub_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fsub_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmul_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmul_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fdiv_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fdiv_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmax_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmax_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmin_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmin_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmaxa_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmaxa_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmina_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmina_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void fscaleb_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fscaleb_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fcopysign_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fcopysign_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void fabs_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fabs_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fabs_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fabs_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fneg_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fneg_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fneg_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fneg_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void flogb_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(flogb_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void flogb_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(flogb_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fclass_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fclass_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fclass_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fclass_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fsqrt_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fsqrt_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frecip_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frecip_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frecip_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frecip_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frsqrt_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frsqrt_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fmov_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fmov_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fmov_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fmov_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++
++  void movgr2fr_w (FloatRegister fd, Register rj)  { emit_int32(insn_RR(movgr2fr_w_op,  (int)rj->encoding(), (int)fd->encoding())); }
++  void movgr2fr_d (FloatRegister fd, Register rj)  { emit_int32(insn_RR(movgr2fr_d_op,  (int)rj->encoding(), (int)fd->encoding())); }
++  void movgr2frh_w(FloatRegister fd, Register rj)  { emit_int32(insn_RR(movgr2frh_w_op, (int)rj->encoding(), (int)fd->encoding())); }
++  void movfr2gr_s (Register rd, FloatRegister fj)  { emit_int32(insn_RR(movfr2gr_s_op,  (int)fj->encoding(), (int)rd->encoding())); }
++  void movfr2gr_d (Register rd, FloatRegister fj)  { emit_int32(insn_RR(movfr2gr_d_op,  (int)fj->encoding(), (int)rd->encoding())); }
++  void movfrh2gr_s(Register rd, FloatRegister fj)  { emit_int32(insn_RR(movfrh2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); }
++  void movgr2fcsr (int fcsr, Register rj)  { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movgr2fcsr_op,  (int)rj->encoding(), fcsr)); }
++  void movfcsr2gr (Register rd, int fcsr)  { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movfcsr2gr_op,  fcsr, (int)rd->encoding())); }
++  void movfr2cf   (ConditionalFlagRegister cd, FloatRegister fj)  { emit_int32(insn_RR(movfr2cf_op,    (int)fj->encoding(), (int)cd->encoding())); }
++  void movcf2fr   (FloatRegister fd, ConditionalFlagRegister cj)  { emit_int32(insn_RR(movcf2fr_op,    (int)cj->encoding(), (int)fd->encoding())); }
++  void movgr2cf   (ConditionalFlagRegister cd, Register rj)  { emit_int32(insn_RR(movgr2cf_op,    (int)rj->encoding(), (int)cd->encoding())); }
++  void movcf2gr   (Register rd, ConditionalFlagRegister cj)  { emit_int32(insn_RR(movcf2gr_op,    (int)cj->encoding(), (int)rd->encoding())); }
++
++  void fcvt_s_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fcvt_s_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void fcvt_d_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fcvt_d_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++
++  void ftintrm_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrm_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrm_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrm_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrp_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrz_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftintrne_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ftint_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_s_w(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_s_w_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_s_l(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_s_l_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_d_w(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_d_w_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void ffint_d_l(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_d_l_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frint_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frint_s_op, (int)fj->encoding(), (int)fd->encoding())); }
++  void frint_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frint_d_op, (int)fj->encoding(), (int)fd->encoding())); }
++
++  void slti  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(slti_op,   si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void sltui (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(sltui_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void addi_w(Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void addi_d(Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void lu52i_d(Register rd, Register rj, int si12) { /*assert(is_simm(si12, 12), "not a signed 12-bit int");*/ emit_int32(insn_I12RR(lu52i_d_op,  simm12(si12), (int)rj->encoding(), (int)rd->encoding())); }
++  void andi  (Register rd, Register rj, int ui12)  { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(andi_op,   ui12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ori   (Register rd, Register rj, int ui12)  { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(ori_op,    ui12, (int)rj->encoding(), (int)rd->encoding())); }
++  void xori  (Register rd, Register rj, int ui12)  { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(xori_op,   ui12, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void fmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa)  { emit_int32(insn_RRRR(fnmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++  void fnmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa)  { emit_int32(insn_RRRR(fnmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void fcmp_caf_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cun_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_ceq_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_clt_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cle_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cne_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cor_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_saf_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sun_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_seq_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_slt_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sle_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sne_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sor_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++
++  void fcmp_caf_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cun_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_ceq_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_clt_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cle_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cne_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cor_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_cune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_saf_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sun_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_seq_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_slt_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sle_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sne_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sor_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++  void fcmp_sune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
++
++  void fsel (FloatRegister fd, FloatRegister fj, FloatRegister fk, ConditionalFlagRegister ca) { emit_int32(insn_RRRR(fsel_op, (int)ca->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
++
++  void addu16i_d(Register rj, Register rd, int si16)      { assert(is_simm(si16, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(addu16i_d_op, si16, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void lu12i_w(Register rj, int si20)      { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu12i_w_op, simm20(si20), (int)rj->encoding())); }
++  void lu32i_d(Register rj, int si20)      { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu32i_d_op, simm20(si20), (int)rj->encoding())); }
++  void pcaddi(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddi_op, si20, (int)rj->encoding())); }
++  void pcalau12i(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcalau12i_op, si20, (int)rj->encoding())); }
++  void pcaddu12i(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu12i_op, si20, (int)rj->encoding())); }
++  void pcaddu18i(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu18i_op, si20, (int)rj->encoding())); }
++
++  void ll_w  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void sc_w  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void ll_d  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void sc_d  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void ldptr_w  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void stptr_w  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void ldptr_d  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++  void stptr_d  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void ld_b  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_b_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_h  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_h_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_w  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_d  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_d_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_b  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_b_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_h  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_h_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_w  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void st_d  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_d_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_bu (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_bu_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_hu (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_hu_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ld_wu (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_wu_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void preld (int hint, Register rj, int si12)  { assert(is_uimm(hint, 5), "not a unsigned 5-bit int"); assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(preld_op, si12, (int)rj->encoding(), hint)); }
++  void fld_s (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void fst_s (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void fld_d (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void fst_d (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
++  void ldl_w (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldl_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++  void ldr_w (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldr_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void ldx_b  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_b_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_h  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_h_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_d  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_b  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_b_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_h  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_h_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stx_d  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_bu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_bu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_hu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_hu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldx_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_wu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_s_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++  void fldx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++  void fstx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_s_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++  void fstx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
++
++  void ld_b  (Register rd, Address src);
++  void ld_bu (Register rd, Address src);
++  void ld_d  (Register rd, Address src);
++  void ld_h  (Register rd, Address src);
++  void ld_hu (Register rd, Address src);
++  void ll_w  (Register rd, Address src);
++  void ll_d  (Register rd, Address src);
++  void ld_wu (Register rd, Address src);
++  void ld_w  (Register rd, Address src);
++  void st_b  (Register rd, Address dst);
++  void st_d  (Register rd, Address dst);
++  void st_w  (Register rd, Address dst);
++  void sc_w  (Register rd, Address dst);
++  void sc_d  (Register rd, Address dst);
++  void st_h  (Register rd, Address dst);
++  void fld_s (FloatRegister fd, Address src);
++  void fld_d (FloatRegister fd, Address src);
++  void fst_s (FloatRegister fd, Address dst);
++  void fst_d (FloatRegister fd, Address dst);
++
++  void amswap_w   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_w_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amswap_d   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rj->encoding())); }
++  void amand_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amand_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_w     (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_w_op,      (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_d     (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_d_op,      (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_wu   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_wu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_du   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_du_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_wu   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_wu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_du   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_du_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amswap_db_w(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amswap_db_d(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amadd_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amand_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amand_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_db_w  (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_w_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amor_db_d  (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_d_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void amxor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammax_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ammin_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void dbar(int hint)      {
++    assert(is_uimm(hint, 15), "not a unsigned 15-bit int");
++
++    if (os::is_ActiveCoresMP())
++      andi(R0, R0, 0);
++    else
++      emit_int32(insn_I15(dbar_op, hint));
++  }
++  void ibar(int hint)      { assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(ibar_op, hint)); }
++
++  void fldgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void fstle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void ldgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void ldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++  void stle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
++
++  void beqz(Register rj, int offs)      { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(beqz_op, offs, (int)rj->encoding())); }
++  void bnez(Register rj, int offs)      { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bnez_op, offs, (int)rj->encoding())); }
++  void bceqz(ConditionalFlagRegister cj, int offs)     { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b00<<3) | (int)cj->encoding()))); }
++  void bcnez(ConditionalFlagRegister cj, int offs)     { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b01<<3) | (int)cj->encoding()))); }
++
++  void jirl(Register rd, Register rj, int offs)      { assert(is_simm(offs, 18) && ((offs & 3) == 0), "not a signed 18-bit int"); emit_int32(insn_I16RR(jirl_op, offs >> 2, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void b(int offs)      { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(b_op, offs)); }
++  void bl(int offs)     { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(bl_op, offs)); }
++
++
++  void beq(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(beq_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bne(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bne_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void blt(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(blt_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bge(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bge_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bltu(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bltu_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++  void bgeu(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bgeu_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
++
++  void beq   (Register rj, Register rd, address entry) { beq   (rj, rd, offset16(entry)); }
++  void bne   (Register rj, Register rd, address entry) { bne   (rj, rd, offset16(entry)); }
++  void blt   (Register rj, Register rd, address entry) { blt   (rj, rd, offset16(entry)); }
++  void bge   (Register rj, Register rd, address entry) { bge   (rj, rd, offset16(entry)); }
++  void bltu  (Register rj, Register rd, address entry) { bltu  (rj, rd, offset16(entry)); }
++  void bgeu  (Register rj, Register rd, address entry) { bgeu  (rj, rd, offset16(entry)); }
++  void beqz  (Register rj, address entry) { beqz  (rj, offset21(entry)); }
++  void bnez  (Register rj, address entry) { bnez  (rj, offset21(entry)); }
++  void b(address entry) { b(offset26(entry)); }
++  void bl(address entry) { bl(offset26(entry)); }
++  void bceqz(ConditionalFlagRegister cj, address entry)     { bceqz(cj, offset21(entry)); }
++  void bcnez(ConditionalFlagRegister cj, address entry)     { bcnez(cj, offset21(entry)); }
++
++  void beq   (Register rj, Register rd, Label& L) { beq   (rj, rd, target(L)); }
++  void bne   (Register rj, Register rd, Label& L) { bne   (rj, rd, target(L)); }
++  void blt   (Register rj, Register rd, Label& L) { blt   (rj, rd, target(L)); }
++  void bge   (Register rj, Register rd, Label& L) { bge   (rj, rd, target(L)); }
++  void bltu  (Register rj, Register rd, Label& L) { bltu  (rj, rd, target(L)); }
++  void bgeu  (Register rj, Register rd, Label& L) { bgeu  (rj, rd, target(L)); }
++  void beqz  (Register rj, Label& L) { beqz  (rj, target(L)); }
++  void bnez  (Register rj, Label& L) { bnez  (rj, target(L)); }
++  void b(Label& L)      { b(target(L)); }
++  void bl(Label& L)     { bl(target(L)); }
++  void bceqz(ConditionalFlagRegister cj, Label& L)     { bceqz(cj, target(L)); }
++  void bcnez(ConditionalFlagRegister cj, Label& L)     { bcnez(cj, target(L)); }
++
++  typedef enum {
++    // hint[4]
++    Completion = 0,
++    Ordering   = (1 << 4),
++
++    // The bitwise-not of the below constants is corresponding to the hint. This is convenient for OR operation.
++    // hint[3:2] and hint[1:0]
++    LoadLoad   = ((1 << 3) | (1 << 1)),
++    LoadStore  = ((1 << 3) | (1 << 0)),
++    StoreLoad  = ((1 << 2) | (1 << 1)),
++    StoreStore = ((1 << 2) | (1 << 0)),
++    AnyAny     = ((3 << 2) | (3 << 0)),
++  } Membar_mask_bits;
++
++  // Serializes memory and blows flags
++  void membar(Membar_mask_bits hint) {
++    assert((hint & (3 << 0)) != 0, "membar mask unsupported!");
++    assert((hint & (3 << 2)) != 0, "membar mask unsupported!");
++    dbar(Ordering | (~hint & 0xf));
++  }
++
++  // LSX and LASX
++#define ASSERT_LSX  assert(UseLSX, "");
++#define ASSERT_LASX assert(UseLASX, "");
++
++  void  vadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vadd_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvadd_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsub_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsub_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vaddi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vaddi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vaddi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vaddi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvaddi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvaddi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvaddi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvaddi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsubi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsubi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsubi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsubi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsubi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsubi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsubi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsubi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vneg_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_b_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vneg_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_h_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vneg_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_w_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vneg_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvneg_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvneg_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvneg_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvneg_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vabsd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vabsd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vabsd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vabsd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvabsd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvabsd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvabsd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvabsd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmax_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmax_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmax_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmax_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmax_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmax_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmin_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmin_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmin_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmin_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmin_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmin_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmul_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmul_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmul_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmul_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmul_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmul_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmuh_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmuh_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmuh_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmuh_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmuh_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmuh_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmulwev_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwev_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwev_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwev_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmulwev_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwev_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwev_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwev_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmulwod_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwod_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwod_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmulwod_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmulwod_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwod_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwod_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmulwod_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vmsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvmsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void vext2xv_h_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_h_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_w_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_d_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_w_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_d_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void vext2xv_hu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_hu_bu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_wu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_bu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_du_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_bu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_wu_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_hu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_du_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_hu_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void vext2xv_du_wu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_wu_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vldi(FloatRegister vd, int i13) { ASSERT_LSX  emit_int32(insn_I13R( vldi_op, i13, (int)vd->encoding())); }
++  void xvldi(FloatRegister xd, int i13) { ASSERT_LASX emit_int32(insn_I13R(xvldi_op, i13, (int)xd->encoding())); }
++
++  void  vand_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vand_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvand_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvand_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vxor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vxor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvxor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvxor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vnor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vnor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvnor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvnor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vandn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vandn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvandn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvandn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vorn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vorn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvorn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvorn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vandi_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vandi_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvandi_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvandi_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vxori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vxori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvxori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvxori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vnori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vnori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvnori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvnori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsll_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsll_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsll_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsll_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsll_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsll_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsll_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsll_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vslli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vslli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vslli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvslli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvslli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvslli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrl_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrl_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrl_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrl_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsrl_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrl_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrl_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrl_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vsrli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vsrli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsrli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vsrli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsrli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsra_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsra_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsra_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsra_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsra_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsra_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsra_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsra_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrai_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vsrai_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrai_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vsrai_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrai_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsrai_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrai_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vsrai_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsrai_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrai_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrai_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrai_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrai_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrai_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsrai_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrai_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vrotr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvrotr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vrotri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vrotri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vrotri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vrotri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vrotri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vrotri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvrotri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvrotri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvrotri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvrotri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvrotri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvrotri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsrlni_b_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vsrlni_b_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrlni_h_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsrlni_h_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrlni_w_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vsrlni_w_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsrlni_d_q(FloatRegister vd, FloatRegister vj, int ui7) { ASSERT_LSX  emit_int32(insn_I7RR( vsrlni_d_q_op, ui7, (int)vj->encoding(), (int)vd->encoding())); }
++
++  void  vpcnt_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_b_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vpcnt_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_h_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vpcnt_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_w_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vpcnt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvpcnt_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpcnt_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpcnt_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpcnt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitclr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitclr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitclri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vbitclri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vbitclri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vbitclri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitclri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vbitclri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitclri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitclri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitclri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitclri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitclri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitclri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitset_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitset_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitset_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitset_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitset_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitset_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitset_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitset_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitseti_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vbitseti_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitseti_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vbitseti_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitseti_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vbitseti_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitseti_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vbitseti_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitseti_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitseti_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitseti_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitseti_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitseti_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitseti_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitseti_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitseti_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitrev_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrev_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrev_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrev_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitrev_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrev_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrev_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrev_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitrevi_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vbitrevi_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrevi_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vbitrevi_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrevi_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vbitrevi_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vbitrevi_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vbitrevi_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitrevi_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitrevi_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrevi_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitrevi_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrevi_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitrevi_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvbitrevi_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitrevi_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfadd_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfsub_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmul_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmul_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmul_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfdiv_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfdiv_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfdiv_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfdiv_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfdiv_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfdiv_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfnmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfnmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfnmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfnmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfnmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfnmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfnmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfnmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmax_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmax_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmax_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfmin_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmin_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfmin_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfclass_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfclass_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfclass_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfclass_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfclass_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfclass_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfsqrt_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfsqrt_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfsqrt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfsqrt_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfsqrt_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfsqrt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcvtl_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvtl_s_h_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vfcvtl_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvtl_d_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvfcvtl_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_s_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcvtl_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_d_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcvth_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvth_s_h_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vfcvth_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvth_d_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvfcvth_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_s_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcvth_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_d_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcvt_h_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfcvt_h_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcvt_s_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfcvt_s_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfcvt_h_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_h_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcvt_s_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_s_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrne_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrne_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrne_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrne_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrne_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrne_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrz_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrz_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrz_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrz_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrz_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrz_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrp_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrp_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrp_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrp_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrp_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrp_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrintrm_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrm_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrintrm_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrm_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrintrm_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrintrm_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfrint_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrint_s_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfrint_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrint_d_op, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvfrint_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfrint_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrne_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrne_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrne_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrne_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrne_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrne_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrz_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrz_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrz_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrz_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrz_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrz_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrp_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrp_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrp_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrp_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrp_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrp_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrm_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrm_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftintrm_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrm_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrm_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftintrm_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftint_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftint_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vftint_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftint_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftint_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvftint_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrne_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk)  { ASSERT_LSX  emit_int32(insn_RRR( vftintrne_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrne_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk)  { ASSERT_LASX emit_int32(insn_RRR(xvftintrne_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrz_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftintrz_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrz_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrz_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrp_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftintrp_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrp_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrp_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrm_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftintrm_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftintrm_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrm_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftint_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftint_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvftint_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftint_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrnel_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrnel_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrnel_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrnel_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrneh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrneh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrneh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrneh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrzl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrzl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrzl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrzh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrzh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrzh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrpl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrpl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrpl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrpl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrph_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrph_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrph_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrph_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrml_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrml_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrml_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrml_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintrmh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrmh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintrmh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrmh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftintl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftintl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vftinth_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftinth_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvftinth_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftinth_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffint_s_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffint_s_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vffint_d_l(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffint_d_l_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvffint_s_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_s_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvffint_d_l(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_d_l_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffint_s_l(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vffint_s_l_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvffint_s_l(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvffint_s_l_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffintl_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffintl_d_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvffintl_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffintl_d_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vffinth_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffinth_d_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvffinth_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffinth_d_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vseq_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vseq_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vseq_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vseq_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvseq_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvseq_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvseq_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvseq_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsle_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsle_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vsle_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vsle_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvsle_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvsle_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslt_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslt_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslt_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslt_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslt_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslt_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vslti_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslti_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslti_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vslti_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvslti_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslti_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslti_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvslti_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vfcmp_caf_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cun_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_ceq_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_clt_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cle_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cne_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cor_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_saf_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sun_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_seq_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_slt_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sle_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sne_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sor_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++
++  void  vfcmp_caf_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cun_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_ceq_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_clt_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cle_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cne_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cor_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_cune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_saf_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sun_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_seq_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_slt_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sle_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sne_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sor_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vfcmp_sune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++
++  void xvfcmp_caf_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cun_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_ceq_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_clt_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cle_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cne_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cor_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_saf_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sun_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_seq_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_slt_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sle_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sne_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sor_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvfcmp_caf_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cun_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_ceq_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_clt_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cle_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cne_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cor_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_cune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_saf_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sun_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_seq_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_slt_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sle_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sne_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sor_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvfcmp_sune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vbitsel_v(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vbitsel_v_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvbitsel_v(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvbitsel_v_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vinsgr2vr_b(FloatRegister vd, Register rj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vinsgr2vr_b_op, ui4, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vinsgr2vr_h(FloatRegister vd, Register rj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vinsgr2vr_h_op, ui3, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vinsgr2vr_w(FloatRegister vd, Register rj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR( vinsgr2vr_w_op, ui2, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vinsgr2vr_d(FloatRegister vd, Register rj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR( vinsgr2vr_d_op, ui1, (int)rj->encoding(), (int)vd->encoding())); }
++
++  void xvinsgr2vr_w(FloatRegister xd, Register rj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsgr2vr_w_op, ui3, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvinsgr2vr_d(FloatRegister xd, Register rj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsgr2vr_d_op, ui2, (int)rj->encoding(), (int)xd->encoding())); }
++
++  void  vpickve2gr_b(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vpickve2gr_b_op, ui4, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_h(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vpickve2gr_h_op, ui3, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_w(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR( vpickve2gr_w_op, ui2, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_d(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR( vpickve2gr_d_op, ui1, (int)vj->encoding(), (int)rd->encoding())); }
++
++  void  vpickve2gr_bu(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vpickve2gr_bu_op, ui4, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_hu(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vpickve2gr_hu_op, ui3, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_wu(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR( vpickve2gr_wu_op, ui2, (int)vj->encoding(), (int)rd->encoding())); }
++  void  vpickve2gr_du(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR( vpickve2gr_du_op, ui1, (int)vj->encoding(), (int)rd->encoding())); }
++
++  void xvpickve2gr_w(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_w_op, ui3, (int)xj->encoding(), (int)rd->encoding())); }
++  void xvpickve2gr_d(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_d_op, ui2, (int)xj->encoding(), (int)rd->encoding())); }
++
++  void xvpickve2gr_wu(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_wu_op, ui3, (int)xj->encoding(), (int)rd->encoding())); }
++  void xvpickve2gr_du(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_du_op, ui2, (int)xj->encoding(), (int)rd->encoding())); }
++
++  void  vreplgr2vr_b(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_b_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vreplgr2vr_h(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_h_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vreplgr2vr_w(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_w_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void  vreplgr2vr_d(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_d_op, (int)rj->encoding(), (int)vd->encoding())); }
++  void xvreplgr2vr_b(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_b_op, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvreplgr2vr_h(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_h_op, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvreplgr2vr_w(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_w_op, (int)rj->encoding(), (int)xd->encoding())); }
++  void xvreplgr2vr_d(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_d_op, (int)rj->encoding(), (int)xd->encoding())); }
++
++  void  vreplvei_b(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR(vreplvei_b_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vreplvei_h(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR(vreplvei_h_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vreplvei_w(FloatRegister vd, FloatRegister vj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR(vreplvei_w_op, ui2, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vreplvei_d(FloatRegister vd, FloatRegister vj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR(vreplvei_d_op, ui1, (int)vj->encoding(), (int)vd->encoding())); }
++
++  void xvreplve0_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_b_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_h_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_w_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_d_op, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvreplve0_q(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_q_op, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvinsve0_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsve0_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvinsve0_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsve0_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvpickve_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvpickve_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf_b(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vshuf_b_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void xvshuf_b(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvshuf_b_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vshuf_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vshuf_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vshuf_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
++
++  void xvshuf_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvperm_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvperm_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf4i_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf4i_h(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_h_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void  vshuf4i_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvshuf4i_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf4i_h(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_h_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++  void xvshuf4i_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vshuf4i_d(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_d_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvshuf4i_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vpermi_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vpermi_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
++  void xvpermi_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvpermi_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvpermi_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvpermi_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void xvpermi_q(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvpermi_q_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
++
++  void  vld(FloatRegister vd, Register rj, int si12) { ASSERT_LSX  assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vld_op, si12, (int)rj->encoding(), (int)vd->encoding()));}
++  void xvld(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvld_op, si12, (int)rj->encoding(), (int)xd->encoding()));}
++
++  void  vst(FloatRegister vd, Register rj, int si12) { ASSERT_LSX  assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vst_op, si12, (int)rj->encoding(), (int)vd->encoding()));}
++  void xvst(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvst_op, si12, (int)rj->encoding(), (int)xd->encoding()));}
++
++  void  vldx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX  emit_int32(insn_RRR( vldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); }
++  void xvldx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); }
++
++  void  vstx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX  emit_int32(insn_RRR( vstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); }
++  void xvstx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); }
++
++#undef ASSERT_LSX
++#undef ASSERT_LASX
++
++public:
++  // Creation
++  Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
++
++  // Decoding
++  static address locate_operand(address inst, WhichOperand which);
++  static address locate_next_instruction(address inst);
++};
++
++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp
+--- a/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp	2024-01-30 10:00:11.834765144 +0800
+@@ -0,0 +1,33 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/bytes_loongarch.hpp b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/bytes_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp	2024-01-30 10:00:11.834765144 +0800
+@@ -0,0 +1,73 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_BYTES_LOONGARCH_HPP
++#define CPU_LOONGARCH_BYTES_LOONGARCH_HPP
++
++#include "memory/allocation.hpp"
++
++class Bytes: AllStatic {
++ public:
++  // Returns true if the byte ordering used by Java is different from the native byte ordering
++  // of the underlying machine. For example, this is true for Intel x86, but false for Solaris
++  // on Sparc.
++  // we use LoongArch, so return true
++  static inline bool is_Java_byte_ordering_different(){ return true; }
++
++
++  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
++  // (no special code is needed since LoongArch CPUs can access unaligned data)
++  static inline u2   get_native_u2(address p)         { return *(u2*)p; }
++  static inline u4   get_native_u4(address p)         { return *(u4*)p; }
++  static inline u8   get_native_u8(address p)         { return *(u8*)p; }
++
++  static inline void put_native_u2(address p, u2 x)   { *(u2*)p = x; }
++  static inline void put_native_u4(address p, u4 x)   { *(u4*)p = x; }
++  static inline void put_native_u8(address p, u8 x)   { *(u8*)p = x; }
++
++
++  // Efficient reading and writing of unaligned unsigned data in Java
++  // byte ordering (i.e. big-endian ordering). Byte-order reversal is
++  // needed since LoongArch64 CPUs use little-endian format.
++  static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
++  static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
++  static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
++
++  static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, swap_u2(x)); }
++  static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, swap_u4(x)); }
++  static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, swap_u8(x)); }
++
++
++  // Efficient swapping of byte ordering
++  static inline u2   swap_u2(u2 x);                   // compiler-dependent implementation
++  static inline u4   swap_u4(u4 x);                   // compiler-dependent implementation
++  static inline u8   swap_u8(u8 x);
++};
++
++
++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
++#include OS_CPU_HEADER_INLINE(bytes)
++
++#endif // CPU_LOONGARCH_BYTES_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp	2024-01-30 10:00:11.834765144 +0800
+@@ -0,0 +1,344 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "c1/c1_CodeStubs.hpp"
++#include "c1/c1_FrameMap.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "classfile/javaClasses.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#define __ ce->masm()->
++
++void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  Metadata *m = _method->as_constant_ptr()->as_metadata();
++  __ mov_metadata(SCR2, m);
++  ce->store_parameter(SCR2, 1);
++  ce->store_parameter(_bci, 0);
++  __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  __ b(_continuation);
++}
++
++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array)
++  : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) {
++  assert(info != NULL, "must have info");
++  _info = new CodeEmitInfo(info);
++}
++
++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index)
++  : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) {
++  assert(info != NULL, "must have info");
++  _info = new CodeEmitInfo(info);
++}
++
++void RangeCheckStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  if (_info->deoptimize_on_exception()) {
++    address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
++    __ call(a, relocInfo::runtime_call_type);
++    ce->add_call_info_here(_info);
++    ce->verify_oop_map(_info);
++    debug_only(__ should_not_reach_here());
++    return;
++  }
++
++  if (_index->is_cpu_register()) {
++    __ move(SCR1, _index->as_register());
++  } else {
++    __ li(SCR1, _index->as_jint());
++  }
++  Runtime1::StubID stub_id;
++  if (_throw_index_out_of_bounds_exception) {
++    stub_id = Runtime1::throw_index_exception_id;
++  } else {
++    assert(_array != NULL, "sanity");
++    __ move(SCR2, _array->as_pointer_register());
++    stub_id = Runtime1::throw_range_check_failed_id;
++  }
++  __ call(Runtime1::entry_for(stub_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
++  _info = new CodeEmitInfo(info);
++}
++
++void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
++  __ call(a, relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++void DivByZeroStub::emit_code(LIR_Assembler* ce) {
++  if (_offset != -1) {
++    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
++  }
++  __ bind(_entry);
++  __ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++#ifdef ASSERT
++  __ should_not_reach_here();
++#endif
++}
++
++// Implementation of NewInstanceStub
++
++NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass,
++                                 CodeEmitInfo* info, Runtime1::StubID stub_id) {
++  _result = result;
++  _klass = klass;
++  _klass_reg = klass_reg;
++  _info = new CodeEmitInfo(info);
++  assert(stub_id == Runtime1::new_instance_id ||
++         stub_id == Runtime1::fast_new_instance_id ||
++         stub_id == Runtime1::fast_new_instance_init_check_id,
++         "need new_instance id");
++  _stub_id   = stub_id;
++}
++
++void NewInstanceStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  __ move(A3, _klass_reg->as_register());
++  __ call(Runtime1::entry_for(_stub_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  assert(_result->as_register() == A0, "result must in A0");
++  __ b(_continuation);
++}
++
++// Implementation of NewTypeArrayStub
++
++NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result,
++                                   CodeEmitInfo* info) {
++  _klass_reg = klass_reg;
++  _length = length;
++  _result = result;
++  _info = new CodeEmitInfo(info);
++}
++
++void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  assert(_length->as_register() == S0, "length must in S0,");
++  assert(_klass_reg->as_register() == A3, "klass_reg must in A3");
++  __ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  assert(_result->as_register() == A0, "result must in A0");
++  __ b(_continuation);
++}
++
++// Implementation of NewObjectArrayStub
++
++NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result,
++                                       CodeEmitInfo* info) {
++  _klass_reg = klass_reg;
++  _result = result;
++  _length = length;
++  _info = new CodeEmitInfo(info);
++}
++
++void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  assert(_length->as_register() == S0, "length must in S0,");
++  assert(_klass_reg->as_register() == A3, "klass_reg must in A3");
++  __ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  assert(_result->as_register() == A0, "result must in A0");
++  __ b(_continuation);
++}
++
++// Implementation of MonitorAccessStubs
++
++MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
++  : MonitorAccessStub(obj_reg, lock_reg) {
++  _info = new CodeEmitInfo(info);
++}
++
++void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ bind(_entry);
++  ce->store_parameter(_obj_reg->as_register(),  1);
++  ce->store_parameter(_lock_reg->as_register(), 0);
++  Runtime1::StubID enter_id;
++  if (ce->compilation()->has_fpu_code()) {
++    enter_id = Runtime1::monitorenter_id;
++  } else {
++    enter_id = Runtime1::monitorenter_nofpu_id;
++  }
++  __ call(Runtime1::entry_for(enter_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  __ b(_continuation);
++}
++
++void MonitorExitStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  if (_compute_lock) {
++    // lock_reg was destroyed by fast unlocking attempt => recompute it
++    ce->monitor_address(_monitor_ix, _lock_reg);
++  }
++  ce->store_parameter(_lock_reg->as_register(), 0);
++  // note: non-blocking leaf routine => no call info needed
++  Runtime1::StubID exit_id;
++  if (ce->compilation()->has_fpu_code()) {
++    exit_id = Runtime1::monitorexit_id;
++  } else {
++    exit_id = Runtime1::monitorexit_nofpu_id;
++  }
++  __ lipc(RA, _continuation);
++  __ jmp(Runtime1::entry_for(exit_id), relocInfo::runtime_call_type);
++}
++
++// Implementation of patching:
++// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
++// - Replace original code with a call to the stub
++// At Runtime:
++// - call to stub, jump to runtime
++// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object)
++// - in runtime: after initializing class, restore original code, reexecute instruction
++
++int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
++
++void PatchingStub::align_patch_site(MacroAssembler* masm) {
++}
++
++void PatchingStub::emit_code(LIR_Assembler* ce) {
++  assert(false, "LoongArch64 should not use C1 runtime patching");
++}
++
++void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
++  __ bind(_entry);
++  ce->store_parameter(_trap_request, 0);
++  __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  DEBUG_ONLY(__ should_not_reach_here());
++}
++
++void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
++  address a;
++  if (_info->deoptimize_on_exception()) {
++    // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
++    a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
++  } else {
++    a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
++  }
++
++  ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
++  __ bind(_entry);
++  __ call(a, relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++
++  __ bind(_entry);
++  // pass the object in a scratch register because all other registers
++  // must be preserved
++  if (_obj->is_cpu_register()) {
++    __ move(SCR1, _obj->as_register());
++  }
++  __ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type);
++  ce->add_call_info_here(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
++  //---------------slow case: call to native-----------------
++  __ bind(_entry);
++  // Figure out where the args should go
++  // This should really convert the IntrinsicID to the Method* and signature
++  // but I don't know how to do that.
++  //
++  VMRegPair args[5];
++  BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT};
++  SharedRuntime::java_calling_convention(signature, args, 5, true);
++
++  // push parameters
++  // (src, src_pos, dest, destPos, length)
++  Register r[5];
++  r[0] = src()->as_register();
++  r[1] = src_pos()->as_register();
++  r[2] = dst()->as_register();
++  r[3] = dst_pos()->as_register();
++  r[4] = length()->as_register();
++
++  // next registers will get stored on the stack
++  for (int i = 0; i < 5 ; i++ ) {
++    VMReg r_1 = args[i].first();
++    if (r_1->is_stack()) {
++      int st_off = r_1->reg2stack() * wordSize;
++      __ stptr_d (r[i], SP, st_off);
++    } else {
++      assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg ");
++    }
++  }
++
++  ce->align_call(lir_static_call);
++
++  ce->emit_static_call_stub();
++  if (ce->compilation()->bailed_out()) {
++    return; // CodeCache is full
++  }
++  AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(),
++                         relocInfo::static_call_type);
++  address call = __ trampoline_call(resolve);
++  if (call == NULL) {
++    ce->bailout("trampoline stub overflow");
++    return;
++  }
++  ce->add_call_info_here(info());
++
++#ifndef PRODUCT
++  if (PrintC1Statistics) {
++    __ li(SCR2, (address)&Runtime1::_arraycopy_slowcase_cnt);
++    __ increment(Address(SCR2));
++  }
++#endif
++
++  __ b(_continuation);
++}
++
++#undef __
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp	2024-01-30 10:00:11.834765144 +0800
+@@ -0,0 +1,79 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
++
++// native word offsets from memory address (little endian)
++enum {
++  pd_lo_word_offset_in_bytes = 0,
++  pd_hi_word_offset_in_bytes = BytesPerWord
++};
++
++// explicit rounding operations are required to implement the strictFP mode
++enum {
++  pd_strict_fp_requires_explicit_rounding = false
++};
++
++// FIXME: There are no callee-saved
++
++// registers
++enum {
++  pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers,      // number of registers used during code emission
++  pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of registers used during code emission
++
++  pd_nof_caller_save_cpu_regs_frame_map = 15, // number of registers killed by calls
++  pd_nof_caller_save_fpu_regs_frame_map = 32, // number of registers killed by calls
++
++  pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map,
++  pd_last_callee_saved_reg = 21,
++
++  pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1,
++
++  pd_nof_cpu_regs_reg_alloc = pd_nof_caller_save_cpu_regs_frame_map, // number of registers that are visible to register allocator
++  pd_nof_fpu_regs_reg_alloc = 32, // number of registers that are visible to register allocator
++
++  pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan
++  pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan
++  pd_nof_xmm_regs_linearscan = 0,  // don't have vector registers
++  pd_first_cpu_reg = 0,
++  pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1,
++  pd_first_byte_reg = 0,
++  pd_last_byte_reg = pd_nof_cpu_regs_reg_alloc - 1,
++  pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
++  pd_last_fpu_reg =  pd_first_fpu_reg + 31,
++
++  pd_first_callee_saved_fpu_reg = 24 + pd_first_fpu_reg,
++  pd_last_callee_saved_fpu_reg = 31 + pd_first_fpu_reg,
++};
++
++// Encoding of float value in debug info.  This is true on x86 where
++// floats are extended to doubles when stored in the stack, false for
++// LoongArch64 where floats and doubles are stored in their native form.
++enum {
++  pd_float_saved_as_double = false
++};
++
++#endif // CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp	2024-01-30 10:00:11.834765144 +0800
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++//--------------------------------------------------------
++//               FpuStackSim
++//--------------------------------------------------------
++
++// No FPU stack on LoongArch64
++#include "precompiled.hpp"
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp	2024-01-30 10:00:11.834765144 +0800
+@@ -0,0 +1,32 @@
++/*
++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
++
++// No FPU stack on LoongArch
++class FpuStackSim;
++
++#endif // CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp	2024-01-30 10:00:11.834765144 +0800
+@@ -0,0 +1,354 @@
++/*
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_FrameMap.hpp"
++#include "c1/c1_LIR.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
++  LIR_Opr opr = LIR_OprFact::illegalOpr;
++  VMReg r_1 = reg->first();
++  VMReg r_2 = reg->second();
++  if (r_1->is_stack()) {
++    // Convert stack slot to an SP offset
++    // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
++    // so we must add it in here.
++    int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++    opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type));
++  } else if (r_1->is_Register()) {
++    Register reg = r_1->as_Register();
++    if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
++      Register reg2 = r_2->as_Register();
++      assert(reg2 == reg, "must be same register");
++      opr = as_long_opr(reg);
++    } else if (is_reference_type(type)) {
++      opr = as_oop_opr(reg);
++    } else if (type == T_METADATA) {
++      opr = as_metadata_opr(reg);
++    } else if (type == T_ADDRESS) {
++      opr = as_address_opr(reg);
++    } else {
++      opr = as_opr(reg);
++    }
++  } else if (r_1->is_FloatRegister()) {
++    assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
++    int num = r_1->as_FloatRegister()->encoding();
++    if (type == T_FLOAT) {
++      opr = LIR_OprFact::single_fpu(num);
++    } else {
++      opr = LIR_OprFact::double_fpu(num);
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++  return opr;
++}
++
++LIR_Opr FrameMap::r0_opr;
++LIR_Opr FrameMap::ra_opr;
++LIR_Opr FrameMap::tp_opr;
++LIR_Opr FrameMap::sp_opr;
++LIR_Opr FrameMap::a0_opr;
++LIR_Opr FrameMap::a1_opr;
++LIR_Opr FrameMap::a2_opr;
++LIR_Opr FrameMap::a3_opr;
++LIR_Opr FrameMap::a4_opr;
++LIR_Opr FrameMap::a5_opr;
++LIR_Opr FrameMap::a6_opr;
++LIR_Opr FrameMap::a7_opr;
++LIR_Opr FrameMap::t0_opr;
++LIR_Opr FrameMap::t1_opr;
++LIR_Opr FrameMap::t2_opr;
++LIR_Opr FrameMap::t3_opr;
++LIR_Opr FrameMap::t4_opr;
++LIR_Opr FrameMap::t5_opr;
++LIR_Opr FrameMap::t6_opr;
++LIR_Opr FrameMap::t7_opr;
++LIR_Opr FrameMap::t8_opr;
++LIR_Opr FrameMap::rx_opr;
++LIR_Opr FrameMap::fp_opr;
++LIR_Opr FrameMap::s0_opr;
++LIR_Opr FrameMap::s1_opr;
++LIR_Opr FrameMap::s2_opr;
++LIR_Opr FrameMap::s3_opr;
++LIR_Opr FrameMap::s4_opr;
++LIR_Opr FrameMap::s5_opr;
++LIR_Opr FrameMap::s6_opr;
++LIR_Opr FrameMap::s7_opr;
++LIR_Opr FrameMap::s8_opr;
++
++LIR_Opr FrameMap::receiver_opr;
++
++LIR_Opr FrameMap::ra_oop_opr;
++LIR_Opr FrameMap::a0_oop_opr;
++LIR_Opr FrameMap::a1_oop_opr;
++LIR_Opr FrameMap::a2_oop_opr;
++LIR_Opr FrameMap::a3_oop_opr;
++LIR_Opr FrameMap::a4_oop_opr;
++LIR_Opr FrameMap::a5_oop_opr;
++LIR_Opr FrameMap::a6_oop_opr;
++LIR_Opr FrameMap::a7_oop_opr;
++LIR_Opr FrameMap::t0_oop_opr;
++LIR_Opr FrameMap::t1_oop_opr;
++LIR_Opr FrameMap::t2_oop_opr;
++LIR_Opr FrameMap::t3_oop_opr;
++LIR_Opr FrameMap::t4_oop_opr;
++LIR_Opr FrameMap::t5_oop_opr;
++LIR_Opr FrameMap::t6_oop_opr;
++LIR_Opr FrameMap::t7_oop_opr;
++LIR_Opr FrameMap::t8_oop_opr;
++LIR_Opr FrameMap::fp_oop_opr;
++LIR_Opr FrameMap::s0_oop_opr;
++LIR_Opr FrameMap::s1_oop_opr;
++LIR_Opr FrameMap::s2_oop_opr;
++LIR_Opr FrameMap::s3_oop_opr;
++LIR_Opr FrameMap::s4_oop_opr;
++LIR_Opr FrameMap::s5_oop_opr;
++LIR_Opr FrameMap::s6_oop_opr;
++LIR_Opr FrameMap::s7_oop_opr;
++LIR_Opr FrameMap::s8_oop_opr;
++
++LIR_Opr FrameMap::scr1_opr;
++LIR_Opr FrameMap::scr2_opr;
++LIR_Opr FrameMap::scr1_long_opr;
++LIR_Opr FrameMap::scr2_long_opr;
++
++LIR_Opr FrameMap::a0_metadata_opr;
++LIR_Opr FrameMap::a1_metadata_opr;
++LIR_Opr FrameMap::a2_metadata_opr;
++LIR_Opr FrameMap::a3_metadata_opr;
++LIR_Opr FrameMap::a4_metadata_opr;
++LIR_Opr FrameMap::a5_metadata_opr;
++
++LIR_Opr FrameMap::long0_opr;
++LIR_Opr FrameMap::long1_opr;
++LIR_Opr FrameMap::fpu0_float_opr;
++LIR_Opr FrameMap::fpu0_double_opr;
++
++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0 };
++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0 };
++
++//--------------------------------------------------------
++//               FrameMap
++//--------------------------------------------------------
++
++void FrameMap::initialize() {
++  assert(!_init_done, "once");
++  int i = 0;
++
++  // caller save register
++  map_register(i, A0); a0_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A1); a1_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A2); a2_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A3); a3_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A4); a4_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A5); a5_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A6); a6_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, A7); a7_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T0); t0_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T1); t1_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T2); t2_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T3); t3_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T5); t5_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T6); t6_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, T8); t8_opr = LIR_OprFact::single_cpu(i); i++;
++
++  // callee save register
++  map_register(i, S0); s0_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S1); s1_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S2); s2_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S3); s3_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S4); s4_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S7); s7_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, S8); s8_opr = LIR_OprFact::single_cpu(i); i++;
++
++  // special register
++  map_register(i, S5); s5_opr = LIR_OprFact::single_cpu(i); i++; // heapbase
++  map_register(i, S6); s6_opr = LIR_OprFact::single_cpu(i); i++; // thread
++  map_register(i, TP); tp_opr = LIR_OprFact::single_cpu(i); i++; // tp
++  map_register(i, FP); fp_opr = LIR_OprFact::single_cpu(i); i++; // fp
++  map_register(i, RA); ra_opr = LIR_OprFact::single_cpu(i); i++; // ra
++  map_register(i, SP); sp_opr = LIR_OprFact::single_cpu(i); i++; // sp
++
++  // tmp register
++  map_register(i, T7); t7_opr = LIR_OprFact::single_cpu(i); i++; // scr1
++  map_register(i, T4); t4_opr = LIR_OprFact::single_cpu(i); i++; // scr2
++
++  scr1_opr = t7_opr;
++  scr2_opr = t4_opr;
++  scr1_long_opr = LIR_OprFact::double_cpu(t7_opr->cpu_regnr(), t7_opr->cpu_regnr());
++  scr2_long_opr = LIR_OprFact::double_cpu(t4_opr->cpu_regnr(), t4_opr->cpu_regnr());
++
++  long0_opr = LIR_OprFact::double_cpu(a0_opr->cpu_regnr(), a0_opr->cpu_regnr());
++  long1_opr = LIR_OprFact::double_cpu(a1_opr->cpu_regnr(), a1_opr->cpu_regnr());
++
++  fpu0_float_opr   = LIR_OprFact::single_fpu(0);
++  fpu0_double_opr  = LIR_OprFact::double_fpu(0);
++
++  // scr1, scr2 not included
++  _caller_save_cpu_regs[0] = a0_opr;
++  _caller_save_cpu_regs[1] = a1_opr;
++  _caller_save_cpu_regs[2] = a2_opr;
++  _caller_save_cpu_regs[3] = a3_opr;
++  _caller_save_cpu_regs[4] = a4_opr;
++  _caller_save_cpu_regs[5] = a5_opr;
++  _caller_save_cpu_regs[6] = a6_opr;
++  _caller_save_cpu_regs[7] = a7_opr;
++  _caller_save_cpu_regs[8] = t0_opr;
++  _caller_save_cpu_regs[9] = t1_opr;
++  _caller_save_cpu_regs[10] = t2_opr;
++  _caller_save_cpu_regs[11] = t3_opr;
++  _caller_save_cpu_regs[12] = t5_opr;
++  _caller_save_cpu_regs[13] = t6_opr;
++  _caller_save_cpu_regs[14] = t8_opr;
++
++  for (int i = 0; i < 8; i++) {
++    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
++  }
++
++  _init_done = true;
++
++  ra_oop_opr = as_oop_opr(RA);
++  a0_oop_opr = as_oop_opr(A0);
++  a1_oop_opr = as_oop_opr(A1);
++  a2_oop_opr = as_oop_opr(A2);
++  a3_oop_opr = as_oop_opr(A3);
++  a4_oop_opr = as_oop_opr(A4);
++  a5_oop_opr = as_oop_opr(A5);
++  a6_oop_opr = as_oop_opr(A6);
++  a7_oop_opr = as_oop_opr(A7);
++  t0_oop_opr = as_oop_opr(T0);
++  t1_oop_opr = as_oop_opr(T1);
++  t2_oop_opr = as_oop_opr(T2);
++  t3_oop_opr = as_oop_opr(T3);
++  t4_oop_opr = as_oop_opr(T4);
++  t5_oop_opr = as_oop_opr(T5);
++  t6_oop_opr = as_oop_opr(T6);
++  t7_oop_opr = as_oop_opr(T7);
++  t8_oop_opr = as_oop_opr(T8);
++  fp_oop_opr = as_oop_opr(FP);
++  s0_oop_opr = as_oop_opr(S0);
++  s1_oop_opr = as_oop_opr(S1);
++  s2_oop_opr = as_oop_opr(S2);
++  s3_oop_opr = as_oop_opr(S3);
++  s4_oop_opr = as_oop_opr(S4);
++  s5_oop_opr = as_oop_opr(S5);
++  s6_oop_opr = as_oop_opr(S6);
++  s7_oop_opr = as_oop_opr(S7);
++  s8_oop_opr = as_oop_opr(S8);
++
++  a0_metadata_opr = as_metadata_opr(A0);
++  a1_metadata_opr = as_metadata_opr(A1);
++  a2_metadata_opr = as_metadata_opr(A2);
++  a3_metadata_opr = as_metadata_opr(A3);
++  a4_metadata_opr = as_metadata_opr(A4);
++  a5_metadata_opr = as_metadata_opr(A5);
++
++  sp_opr = as_pointer_opr(SP);
++  fp_opr = as_pointer_opr(FP);
++
++  VMRegPair regs;
++  BasicType sig_bt = T_OBJECT;
++  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1, true);
++  receiver_opr = as_oop_opr(regs.first()->as_Register());
++
++  for (int i = 0; i < nof_caller_save_fpu_regs; i++) {
++    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
++  }
++}
++
++Address FrameMap::make_new_address(ByteSize sp_offset) const {
++  // for sp, based address use this:
++  // return Address(sp, in_bytes(sp_offset) - (framesize() - 2) * 4);
++  return Address(SP, in_bytes(sp_offset));
++}
++
++// ----------------mapping-----------------------
++// all mapping is based on fp addressing, except for simple leaf methods where we access
++// the locals sp based (and no frame is built)
++
++// Frame for simple leaf methods (quick entries)
++//
++//   +----------+
++//   | ret addr |   <- TOS
++//   +----------+
++//   | args     |
++//   | ......   |
++
++// Frame for standard methods
++//
++//   | .........|  <- TOS
++//   | locals   |
++//   +----------+
++//   |  old fp, |  <- RFP
++//   +----------+
++//   | ret addr |
++//   +----------+
++//   |  args    |
++//   | .........|
++
++// For OopMaps, map a local variable or spill index to an VMRegImpl name.
++// This is the offset from sp() in the frame of the slot for the index,
++// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.)
++//
++//           framesize +
++//           stack0         stack0          0  <- VMReg
++//             |              | <registers> |
++//  ...........|..............|.............|
++//      0 1 2 3 x x 4 5 6 ... |                <- local indices
++//      ^           ^        sp()                 ( x x indicate link
++//      |           |                               and return addr)
++//  arguments   non-argument locals
++
++VMReg FrameMap::fpu_regname(int n) {
++  // Return the OptoReg name for the fpu stack slot "n"
++  // A spilled fpu stack slot comprises to two single-word OptoReg's.
++  return as_FloatRegister(n)->as_VMReg();
++}
++
++LIR_Opr FrameMap::stack_pointer() {
++  return FrameMap::sp_opr;
++}
++
++// JSR 292
++LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
++  return LIR_OprFact::illegalOpr;  // Not needed on LoongArch64
++}
++
++bool FrameMap::validate_frame() {
++  return true;
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp	2024-01-30 10:00:11.834765144 +0800
+@@ -0,0 +1,143 @@
++/*
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
++
++//  On LoongArch64 the frame looks as follows:
++//
++//  +-----------------------------+---------+----------------------------------------+----------------+-----------
++//  | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling .
++//  +-----------------------------+---------+----------------------------------------+----------------+-----------
++
++ public:
++  static const int pd_c_runtime_reserved_arg_size;
++
++  enum {
++    first_available_sp_in_frame = 0,
++    frame_pad_in_bytes = 16,
++    nof_reg_args = 8
++  };
++
++ public:
++  static LIR_Opr receiver_opr;
++
++  static LIR_Opr r0_opr;
++  static LIR_Opr ra_opr;
++  static LIR_Opr tp_opr;
++  static LIR_Opr sp_opr;
++  static LIR_Opr a0_opr;
++  static LIR_Opr a1_opr;
++  static LIR_Opr a2_opr;
++  static LIR_Opr a3_opr;
++  static LIR_Opr a4_opr;
++  static LIR_Opr a5_opr;
++  static LIR_Opr a6_opr;
++  static LIR_Opr a7_opr;
++  static LIR_Opr t0_opr;
++  static LIR_Opr t1_opr;
++  static LIR_Opr t2_opr;
++  static LIR_Opr t3_opr;
++  static LIR_Opr t4_opr;
++  static LIR_Opr t5_opr;
++  static LIR_Opr t6_opr;
++  static LIR_Opr t7_opr;
++  static LIR_Opr t8_opr;
++  static LIR_Opr rx_opr;
++  static LIR_Opr fp_opr;
++  static LIR_Opr s0_opr;
++  static LIR_Opr s1_opr;
++  static LIR_Opr s2_opr;
++  static LIR_Opr s3_opr;
++  static LIR_Opr s4_opr;
++  static LIR_Opr s5_opr;
++  static LIR_Opr s6_opr;
++  static LIR_Opr s7_opr;
++  static LIR_Opr s8_opr;
++
++  static LIR_Opr ra_oop_opr;
++  static LIR_Opr a0_oop_opr;
++  static LIR_Opr a1_oop_opr;
++  static LIR_Opr a2_oop_opr;
++  static LIR_Opr a3_oop_opr;
++  static LIR_Opr a4_oop_opr;
++  static LIR_Opr a5_oop_opr;
++  static LIR_Opr a6_oop_opr;
++  static LIR_Opr a7_oop_opr;
++  static LIR_Opr t0_oop_opr;
++  static LIR_Opr t1_oop_opr;
++  static LIR_Opr t2_oop_opr;
++  static LIR_Opr t3_oop_opr;
++  static LIR_Opr t4_oop_opr;
++  static LIR_Opr t5_oop_opr;
++  static LIR_Opr t6_oop_opr;
++  static LIR_Opr t7_oop_opr;
++  static LIR_Opr t8_oop_opr;
++  static LIR_Opr fp_oop_opr;
++  static LIR_Opr s0_oop_opr;
++  static LIR_Opr s1_oop_opr;
++  static LIR_Opr s2_oop_opr;
++  static LIR_Opr s3_oop_opr;
++  static LIR_Opr s4_oop_opr;
++  static LIR_Opr s5_oop_opr;
++  static LIR_Opr s6_oop_opr;
++  static LIR_Opr s7_oop_opr;
++  static LIR_Opr s8_oop_opr;
++
++  static LIR_Opr scr1_opr;
++  static LIR_Opr scr2_opr;
++  static LIR_Opr scr1_long_opr;
++  static LIR_Opr scr2_long_opr;
++
++  static LIR_Opr a0_metadata_opr;
++  static LIR_Opr a1_metadata_opr;
++  static LIR_Opr a2_metadata_opr;
++  static LIR_Opr a3_metadata_opr;
++  static LIR_Opr a4_metadata_opr;
++  static LIR_Opr a5_metadata_opr;
++
++  static LIR_Opr long0_opr;
++  static LIR_Opr long1_opr;
++  static LIR_Opr fpu0_float_opr;
++  static LIR_Opr fpu0_double_opr;
++
++  static LIR_Opr as_long_opr(Register r) {
++    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
++  }
++  static LIR_Opr as_pointer_opr(Register r) {
++    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
++  }
++
++  // VMReg name for spilled physical FPU stack slot n
++  static VMReg fpu_regname (int n);
++
++  static bool is_caller_save_register(LIR_Opr opr) { return true; }
++  static bool is_caller_save_register(Register r) { return true; }
++
++  static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; }
++  static int last_cpu_reg() { return pd_last_cpu_reg;  }
++  static int last_byte_reg() { return pd_last_byte_reg; }
++
++#endif // CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the client compiler.
++// (see c1_globals.hpp)
++
++#ifndef COMPILER2
++define_pd_global(bool, BackgroundCompilation,        true );
++define_pd_global(bool, UseTLAB,                      true );
++define_pd_global(bool, ResizeTLAB,                   true );
++define_pd_global(bool, InlineIntrinsics,             true );
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 false);
++define_pd_global(bool, UseOnStackReplacement,        true );
++define_pd_global(bool, TieredCompilation,            false);
++define_pd_global(intx, CompileThreshold,             1500 );
++
++define_pd_global(intx, OnStackReplacePercentage,     933  );
++define_pd_global(intx, FreqInlineSize,               325  );
++define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
++define_pd_global(intx, InitialCodeCacheSize,         160*K);
++define_pd_global(intx, ReservedCodeCacheSize,        32*M );
++define_pd_global(intx, NonProfiledCodeHeapSize,      13*M );
++define_pd_global(intx, ProfiledCodeHeapSize,         14*M );
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M  );
++define_pd_global(bool, ProfileInterpreter,           false);
++define_pd_global(intx, CodeCacheExpansionSize,       32*K );
++define_pd_global(uintx, CodeCacheMinBlockLength,     1);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++define_pd_global(uintx, MetaspaceSize,               12*M );
++define_pd_global(bool, NeverActAsServerClassMachine, true );
++define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
++define_pd_global(bool, CICompileOSR,                 true );
++#endif // !COMPILER2
++define_pd_global(bool, UseTypeProfile,               false);
++define_pd_global(bool, RoundFPResults,               true );
++
++define_pd_global(bool, LIRFillDelaySlots,            false);
++define_pd_global(bool, OptimizeSinglePrecision,      true );
++define_pd_global(bool, CSEArrayLength,               false);
++define_pd_global(bool, TwoOperandLIRForm,            false );
++
++#endif // CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,33 @@
++/*
++ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_Instruction.hpp"
++#include "c1/c1_LinearScan.hpp"
++#include "utilities/bitMap.inline.hpp"
++
++void LinearScan::allocate_fpu_stack() {
++  // No FPU stack on LoongArch64
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,70 @@
++/*
++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP
++
++inline bool LinearScan::is_processed_reg_num(int reg_num) {
++  return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map;
++}
++
++inline int LinearScan::num_physical_regs(BasicType type) {
++  return 1;
++}
++
++inline bool LinearScan::requires_adjacent_regs(BasicType type) {
++  return false;
++}
++
++inline bool LinearScan::is_caller_save(int assigned_reg) {
++  assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers");
++  if (assigned_reg < pd_first_callee_saved_reg)
++    return true;
++  if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg)
++    return true;
++  if (assigned_reg > pd_last_callee_saved_fpu_reg && assigned_reg < pd_last_fpu_reg)
++    return true;
++  return false;
++}
++
++inline void LinearScan::pd_add_temps(LIR_Op* op) {}
++
++// Implementation of LinearScanWalker
++inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
++  if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) {
++    assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only");
++    _first_reg = pd_first_callee_saved_reg;
++    _last_reg = pd_last_callee_saved_reg;
++    return true;
++  } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT ||
++             cur->type() == T_ADDRESS || cur->type() == T_METADATA) {
++    _first_reg = pd_first_cpu_reg;
++    _last_reg = pd_last_allocatable_cpu_reg;
++    return true;
++  }
++  return false;
++}
++
++#endif // CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp	2024-01-30 10:00:11.834765144 +0800
+@@ -0,0 +1,3387 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "asm/assembler.hpp"
++#include "c1/c1_CodeStubs.hpp"
++#include "c1/c1_Compilation.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "c1/c1_ValueStack.hpp"
++#include "ci/ciArrayKlass.hpp"
++#include "ci/ciInstance.hpp"
++#include "code/compiledIC.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "gc/shared/gc_globals.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#ifndef PRODUCT
++#define COMMENT(x) do { __ block_comment(x); } while (0)
++#else
++#define COMMENT(x)
++#endif
++
++NEEDS_CLEANUP // remove this definitions?
++
++#define __ _masm->
++
++static void select_different_registers(Register preserve, Register extra,
++                                       Register &tmp1, Register &tmp2) {
++  if (tmp1 == preserve) {
++    assert_different_registers(tmp1, tmp2, extra);
++    tmp1 = extra;
++  } else if (tmp2 == preserve) {
++    assert_different_registers(tmp1, tmp2, extra);
++    tmp2 = extra;
++  }
++  assert_different_registers(preserve, tmp1, tmp2);
++}
++
++static void select_different_registers(Register preserve, Register extra,
++                                       Register &tmp1, Register &tmp2,
++                                       Register &tmp3) {
++  if (tmp1 == preserve) {
++    assert_different_registers(tmp1, tmp2, tmp3, extra);
++    tmp1 = extra;
++  } else if (tmp2 == preserve) {
++    assert_different_registers(tmp1, tmp2, tmp3, extra);
++    tmp2 = extra;
++  } else if (tmp3 == preserve) {
++    assert_different_registers(tmp1, tmp2, tmp3, extra);
++    tmp3 = extra;
++  }
++  assert_different_registers(preserve, tmp1, tmp2, tmp3);
++}
++
++bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
++
++LIR_Opr LIR_Assembler::receiverOpr() {
++  return FrameMap::receiver_opr;
++}
++
++LIR_Opr LIR_Assembler::osrBufferPointer() {
++  return FrameMap::as_pointer_opr(receiverOpr()->as_register());
++}
++
++//--------------fpu register translations-----------------------
++
++address LIR_Assembler::float_constant(float f) {
++  address const_addr = __ float_constant(f);
++  if (const_addr == NULL) {
++    bailout("const section overflow");
++    return __ code()->consts()->start();
++  } else {
++    return const_addr;
++  }
++}
++
++address LIR_Assembler::double_constant(double d) {
++  address const_addr = __ double_constant(d);
++  if (const_addr == NULL) {
++    bailout("const section overflow");
++    return __ code()->consts()->start();
++  } else {
++    return const_addr;
++  }
++}
++
++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
++  ShouldNotReachHere();
++}
++
++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
++
++void LIR_Assembler::reset_FPU() { Unimplemented(); }
++
++void LIR_Assembler::fpop() { Unimplemented(); }
++
++void LIR_Assembler::fxch(int i) { Unimplemented(); }
++
++void LIR_Assembler::fld(int i) { Unimplemented(); }
++
++void LIR_Assembler::ffree(int i) { Unimplemented(); }
++
++void LIR_Assembler::breakpoint() { Unimplemented(); }
++
++void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); }
++
++void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); }
++
++bool LIR_Assembler::is_literal_address(LIR_Address* addr) { Unimplemented(); return false; }
++
++static Register as_reg(LIR_Opr op) {
++  return op->is_double_cpu() ? op->as_register_lo() : op->as_register();
++}
++
++static jlong as_long(LIR_Opr data) {
++  jlong result;
++  switch (data->type()) {
++  case T_INT:
++    result = (data->as_jint());
++    break;
++  case T_LONG:
++    result = (data->as_jlong());
++    break;
++  default:
++    ShouldNotReachHere();
++    result = 0; // unreachable
++  }
++  return result;
++}
++
++Address LIR_Assembler::as_Address(LIR_Address* addr) {
++  Register base = addr->base()->as_pointer_register();
++  LIR_Opr opr = addr->index();
++  if (opr->is_cpu_register()) {
++    Register index;
++    if (opr->is_single_cpu())
++      index = opr->as_register();
++    else
++      index = opr->as_register_lo();
++    assert(addr->disp() == 0, "must be");
++    return Address(base, index, Address::ScaleFactor(addr->scale()));
++  } else {
++    assert(addr->scale() == 0, "must be");
++    return Address(base, addr->disp());
++  }
++  return Address();
++}
++
++Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
++  ShouldNotReachHere();
++  return Address();
++}
++
++Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
++  return as_Address(addr); // Ouch
++  // FIXME: This needs to be much more clever. See x86.
++}
++
++// Ensure a valid Address (base + offset) to a stack-slot. If stack access is
++// not encodable as a base + (immediate) offset, generate an explicit address
++// calculation to hold the address in a temporary register.
++Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) {
++  precond(size == 4 || size == 8);
++  Address addr = frame_map()->address_for_slot(index, adjust);
++  precond(addr.index() == noreg);
++  precond(addr.base() == SP);
++  precond(addr.disp() > 0);
++  uint mask = size - 1;
++  assert((addr.disp() & mask) == 0, "scaled offsets only");
++  return addr;
++}
++
++void LIR_Assembler::osr_entry() {
++  offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
++  BlockBegin* osr_entry = compilation()->hir()->osr_entry();
++  ValueStack* entry_state = osr_entry->state();
++  int number_of_locks = entry_state->locks_size();
++
++  // we jump here if osr happens with the interpreter
++  // state set up to continue at the beginning of the
++  // loop that triggered osr - in particular, we have
++  // the following registers setup:
++  //
++  // A2: osr buffer
++  //
++
++  // build frame
++  ciMethod* m = compilation()->method();
++  __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
++
++  // OSR buffer is
++  //
++  // locals[nlocals-1..0]
++  // monitors[0..number_of_locks]
++  //
++  // locals is a direct copy of the interpreter frame so in the osr buffer
++  // so first slot in the local array is the last local from the interpreter
++  // and last slot is local[0] (receiver) from the interpreter
++  //
++  // Similarly with locks. The first lock slot in the osr buffer is the nth lock
++  // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
++  // in the interpreter frame (the method lock if a sync method)
++
++  // Initialize monitors in the compiled activation.
++  //   A2: pointer to osr buffer
++  //
++  // All other registers are dead at this point and the locals will be
++  // copied into place by code emitted in the IR.
++
++  Register OSR_buf = osrBufferPointer()->as_pointer_register();
++  {
++    assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
++    int monitor_offset = BytesPerWord * method()->max_locals() + (2 * BytesPerWord) * (number_of_locks - 1);
++    // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
++    // the OSR buffer using 2 word entries: first the lock and then
++    // the oop.
++    for (int i = 0; i < number_of_locks; i++) {
++      int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
++#ifdef ASSERT
++      // verify the interpreter's monitor has a non-null object
++      {
++        Label L;
++        __ ld_ptr(SCR1, Address(OSR_buf, slot_offset + 1 * BytesPerWord));
++        __ bnez(SCR1, L);
++        __ stop("locked object is NULL");
++        __ bind(L);
++      }
++#endif
++      __ ld_ptr(S0, Address(OSR_buf, slot_offset + 0));
++      __ st_ptr(S0, frame_map()->address_for_monitor_lock(i));
++      __ ld_ptr(S0, Address(OSR_buf, slot_offset + 1*BytesPerWord));
++      __ st_ptr(S0, frame_map()->address_for_monitor_object(i));
++    }
++  }
++}
++
++// inline cache check; done before the frame is built.
++int LIR_Assembler::check_icache() {
++  Register receiver = FrameMap::receiver_opr->as_register();
++  Register ic_klass = IC_Klass;
++  int start_offset = __ offset();
++  Label dont;
++
++  __ verify_oop(receiver);
++
++  // explicit NULL check not needed since load from [klass_offset] causes a trap
++  // check against inline cache
++  assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()),
++         "must add explicit null check");
++
++  __ load_klass(SCR2, receiver);
++  __ beq(SCR2, ic_klass, dont);
++
++  // if icache check fails, then jump to runtime routine
++  // Note: RECEIVER must still contain the receiver!
++  __ jmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
++
++  // We align the verified entry point unless the method body
++  // (including its inline cache check) will fit in a single 64-byte
++  // icache line.
++  if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) {
++    // force alignment after the cache check.
++    __ align(CodeEntryAlignment);
++  }
++
++  __ bind(dont);
++  return start_offset;
++}
++
++void LIR_Assembler::jobject2reg(jobject o, Register reg) {
++  if (o == NULL) {
++    __ move(reg, R0);
++  } else {
++    int oop_index = __ oop_recorder()->find_index(o);
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    __ relocate(rspec);
++    __ patchable_li52(reg, (long)o);
++  }
++}
++
++void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) {
++  address target = NULL;
++
++  switch (patching_id(info)) {
++  case PatchingStub::access_field_id:
++    target = Runtime1::entry_for(Runtime1::access_field_patching_id);
++    break;
++  case PatchingStub::load_klass_id:
++    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
++    break;
++  case PatchingStub::load_mirror_id:
++    target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
++    break;
++  case PatchingStub::load_appendix_id:
++    target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
++    break;
++  default: ShouldNotReachHere();
++  }
++
++  __ call(target, relocInfo::runtime_call_type);
++  add_call_info_here(info);
++}
++
++void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) {
++  deoptimize_trap(info);
++}
++
++// This specifies the rsp decrement needed to build the frame
++int LIR_Assembler::initial_frame_size_in_bytes() const {
++  // if rounding, must let FrameMap know!
++  return in_bytes(frame_map()->framesize_in_bytes());
++}
++
++int LIR_Assembler::emit_exception_handler() {
++  // if the last instruction is a call (typically to do a throw which
++  // is coming at the end after block reordering) the return address
++  // must still point into the code area in order to avoid assertion
++  // failures when searching for the corresponding bci => add a nop
++  // (was bug 5/14/1999 - gri)
++  __ nop();
++
++  // generate code for exception handler
++  address handler_base = __ start_a_stub(exception_handler_size());
++  if (handler_base == NULL) {
++    // not enough space left for the handler
++    bailout("exception handler overflow");
++    return -1;
++  }
++
++  int offset = code_offset();
++
++  // the exception oop and pc are in A0, and A1
++  // no other registers need to be preserved, so invalidate them
++  __ invalidate_registers(false, true, true, true, true, true);
++
++  // check that there is really an exception
++  __ verify_not_null_oop(A0);
++
++  // search an exception handler (A0: exception oop, A1: throwing pc)
++  __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type);
++  __ should_not_reach_here();
++  guarantee(code_offset() - offset <= exception_handler_size(), "overflow");
++  __ end_a_stub();
++
++  return offset;
++}
++
++// Emit the code to remove the frame from the stack in the exception unwind path.
++int LIR_Assembler::emit_unwind_handler() {
++#ifndef PRODUCT
++  if (CommentedAssembly) {
++    _masm->block_comment("Unwind handler");
++  }
++#endif
++
++  int offset = code_offset();
++
++  // Fetch the exception from TLS and clear out exception related thread state
++  __ ld_ptr(A0, Address(TREG, JavaThread::exception_oop_offset()));
++  __ st_ptr(R0, Address(TREG, JavaThread::exception_oop_offset()));
++  __ st_ptr(R0, Address(TREG, JavaThread::exception_pc_offset()));
++
++  __ bind(_unwind_handler_entry);
++  __ verify_not_null_oop(V0);
++  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
++    __ move(S0, V0);  // Preserve the exception
++  }
++
++  // Perform needed unlocking
++  MonitorExitStub* stub = NULL;
++  if (method()->is_synchronized()) {
++    monitor_address(0, FrameMap::a0_opr);
++    stub = new MonitorExitStub(FrameMap::a0_opr, true, 0);
++    __ unlock_object(A5, A4, A0, *stub->entry());
++    __ bind(*stub->continuation());
++  }
++
++  if (compilation()->env()->dtrace_method_probes()) {
++    __ mov_metadata(A1, method()->constant_encoding());
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), TREG, A1);
++  }
++
++  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
++    __ move(A0, S0);  // Restore the exception
++  }
++
++  // remove the activation and dispatch to the unwind handler
++  __ block_comment("remove_frame and dispatch to the unwind handler");
++  __ remove_frame(initial_frame_size_in_bytes());
++  __ jmp(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type);
++
++  // Emit the slow path assembly
++  if (stub != NULL) {
++    stub->emit_code(this);
++  }
++
++  return offset;
++}
++
++int LIR_Assembler::emit_deopt_handler() {
++  // if the last instruction is a call (typically to do a throw which
++  // is coming at the end after block reordering) the return address
++  // must still point into the code area in order to avoid assertion
++  // failures when searching for the corresponding bci => add a nop
++  // (was bug 5/14/1999 - gri)
++  __ nop();
++
++  // generate code for exception handler
++  address handler_base = __ start_a_stub(deopt_handler_size());
++  if (handler_base == NULL) {
++    // not enough space left for the handler
++    bailout("deopt handler overflow");
++    return -1;
++  }
++
++  int offset = code_offset();
++
++  __ call(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type);
++  guarantee(code_offset() - offset <= deopt_handler_size(), "overflow");
++  __ end_a_stub();
++
++  return offset;
++}
++
++void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
++  _masm->code_section()->relocate(adr, relocInfo::poll_type);
++  int pc_offset = code_offset();
++  flush_debug_info(pc_offset);
++  info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
++  if (info->exception_handlers() != NULL) {
++    compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
++  }
++}
++
++void LIR_Assembler::return_op(LIR_Opr result) {
++  assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == V0,
++         "word returns are in V0,");
++
++  // Pop the stack before the safepoint code
++  __ remove_frame(initial_frame_size_in_bytes());
++
++  if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) {
++    __ reserved_stack_check();
++  }
++
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    __ ld_ptr(SCR2, Address(TREG, JavaThread::polling_page_offset()));
++  } else {
++    __ li(SCR2, os::get_polling_page());
++  }
++  __ relocate(relocInfo::poll_return_type);
++  __ ld_w(SCR1, SCR2, 0);
++  __ jr(RA);
++}
++
++int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
++  guarantee(info != NULL, "Shouldn't be NULL");
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    __ ld_ptr(SCR2, Address(TREG, JavaThread::polling_page_offset()));
++  } else {
++    __ li(SCR2, os::get_polling_page());
++  }
++  add_debug_info_for_branch(info); // This isn't just debug info: it's the oop map
++  __ relocate(relocInfo::poll_type);
++  __ ld_w(SCR1, SCR2, 0);
++  return __ offset();
++}
++
++void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
++  __ move(to_reg, from_reg);
++}
++
++void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); }
++
++void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
++  assert(src->is_constant(), "should not call otherwise");
++  assert(dest->is_register(), "should not call otherwise");
++  LIR_Const* c = src->as_constant_ptr();
++
++  switch (c->type()) {
++    case T_INT:
++      assert(patch_code == lir_patch_none, "no patching handled here");
++      __ li(dest->as_register(), c->as_jint());
++      break;
++    case T_ADDRESS:
++      assert(patch_code == lir_patch_none, "no patching handled here");
++      __ li(dest->as_register(), c->as_jint());
++      break;
++    case T_LONG:
++      assert(patch_code == lir_patch_none, "no patching handled here");
++      __ li(dest->as_register_lo(), (intptr_t)c->as_jlong());
++      break;
++    case T_OBJECT:
++      if (patch_code == lir_patch_none) {
++        jobject2reg(c->as_jobject(), dest->as_register());
++      } else {
++        jobject2reg_with_patching(dest->as_register(), info);
++      }
++      break;
++    case T_METADATA:
++      if (patch_code != lir_patch_none) {
++        klass2reg_with_patching(dest->as_register(), info);
++      } else {
++        __ mov_metadata(dest->as_register(), c->as_metadata());
++      }
++      break;
++    case T_FLOAT:
++      __ lea(SCR1, InternalAddress(float_constant(c->as_jfloat())));
++      __ fld_s(dest->as_float_reg(), SCR1, 0);
++      break;
++    case T_DOUBLE:
++      __ lea(SCR1, InternalAddress(double_constant(c->as_jdouble())));
++      __ fld_d(dest->as_double_reg(), SCR1, 0);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
++  LIR_Const* c = src->as_constant_ptr();
++  switch (c->type()) {
++  case T_OBJECT:
++    if (!c->as_jobject())
++      __ st_ptr(R0, frame_map()->address_for_slot(dest->single_stack_ix()));
++    else {
++      const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL);
++      reg2stack(FrameMap::scr1_opr, dest, c->type(), false);
++    }
++    break;
++  case T_ADDRESS:
++    const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL);
++    reg2stack(FrameMap::scr1_opr, dest, c->type(), false);
++  case T_INT:
++  case T_FLOAT:
++    if (c->as_jint_bits() == 0)
++      __ st_w(R0, frame_map()->address_for_slot(dest->single_stack_ix()));
++    else {
++      __ li(SCR2, c->as_jint_bits());
++      __ st_w(SCR2, frame_map()->address_for_slot(dest->single_stack_ix()));
++    }
++    break;
++  case T_LONG:
++  case T_DOUBLE:
++    if (c->as_jlong_bits() == 0)
++      __ st_ptr(R0, frame_map()->address_for_slot(dest->double_stack_ix(),
++                lo_word_offset_in_bytes));
++    else {
++      __ li(SCR2, (intptr_t)c->as_jlong_bits());
++      __ st_ptr(SCR2, frame_map()->address_for_slot(dest->double_stack_ix(),
++                lo_word_offset_in_bytes));
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type,
++                              CodeEmitInfo* info, bool wide) {
++  assert(src->is_constant(), "should not call otherwise");
++  LIR_Const* c = src->as_constant_ptr();
++  LIR_Address* to_addr = dest->as_address_ptr();
++
++  void (Assembler::* insn)(Register Rt, Address adr);
++
++  switch (type) {
++  case T_ADDRESS:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_d;
++    break;
++  case T_LONG:
++    assert(c->as_jlong() == 0, "should be");
++    insn = &Assembler::st_d;
++    break;
++  case T_INT:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_w;
++    break;
++  case T_OBJECT:
++  case T_ARRAY:
++    assert(c->as_jobject() == 0, "should be");
++    if (UseCompressedOops && !wide) {
++      insn = &Assembler::st_w;
++    } else {
++      insn = &Assembler::st_d;
++    }
++    break;
++  case T_CHAR:
++  case T_SHORT:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_h;
++    break;
++  case T_BOOLEAN:
++  case T_BYTE:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::st_b;
++    break;
++  default:
++    ShouldNotReachHere();
++    insn = &Assembler::st_d;  // unreachable
++  }
++
++  if (info) add_debug_info_for_null_check_here(info);
++  (_masm->*insn)(R0, as_Address(to_addr));
++}
++
++void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
++  assert(src->is_register(), "should not call otherwise");
++  assert(dest->is_register(), "should not call otherwise");
++
++  // move between cpu-registers
++  if (dest->is_single_cpu()) {
++    if (src->type() == T_LONG) {
++      // Can do LONG -> OBJECT
++      move_regs(src->as_register_lo(), dest->as_register());
++      return;
++    }
++    assert(src->is_single_cpu(), "must match");
++    if (src->type() == T_OBJECT) {
++      __ verify_oop(src->as_register());
++    }
++    move_regs(src->as_register(), dest->as_register());
++  } else if (dest->is_double_cpu()) {
++    if (is_reference_type(src->type())) {
++      // Surprising to me but we can see move of a long to t_object
++      __ verify_oop(src->as_register());
++      move_regs(src->as_register(), dest->as_register_lo());
++      return;
++    }
++    assert(src->is_double_cpu(), "must match");
++    Register f_lo = src->as_register_lo();
++    Register f_hi = src->as_register_hi();
++    Register t_lo = dest->as_register_lo();
++    Register t_hi = dest->as_register_hi();
++    assert(f_hi == f_lo, "must be same");
++    assert(t_hi == t_lo, "must be same");
++    move_regs(f_lo, t_lo);
++  } else if (dest->is_single_fpu()) {
++    __ fmov_s(dest->as_float_reg(), src->as_float_reg());
++  } else if (dest->is_double_fpu()) {
++    __ fmov_d(dest->as_double_reg(), src->as_double_reg());
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
++  precond(src->is_register() && dest->is_stack());
++
++  uint const c_sz32 = sizeof(uint32_t);
++  uint const c_sz64 = sizeof(uint64_t);
++
++  if (src->is_single_cpu()) {
++    int index = dest->single_stack_ix();
++    if (is_reference_type(type)) {
++      __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64));
++      __ verify_oop(src->as_register());
++    } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) {
++      __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64));
++    } else {
++      __ st_w(src->as_register(), stack_slot_address(index, c_sz32));
++    }
++  } else if (src->is_double_cpu()) {
++    int index = dest->double_stack_ix();
++    Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes);
++    __ st_ptr(src->as_register_lo(), dest_addr_LO);
++  } else if (src->is_single_fpu()) {
++    int index = dest->single_stack_ix();
++    __ fst_s(src->as_float_reg(), stack_slot_address(index, c_sz32));
++  } else if (src->is_double_fpu()) {
++    int index = dest->double_stack_ix();
++    __ fst_d(src->as_double_reg(), stack_slot_address(index, c_sz64));
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code,
++                            CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
++  LIR_Address* to_addr = dest->as_address_ptr();
++  PatchingStub* patch = NULL;
++  Register compressed_src = SCR2;
++
++  if (patch_code != lir_patch_none) {
++    deoptimize_trap(info);
++    return;
++  }
++
++  if (is_reference_type(type)) {
++    __ verify_oop(src->as_register());
++
++    if (UseCompressedOops && !wide) {
++      __ encode_heap_oop(compressed_src, src->as_register());
++    } else {
++      compressed_src = src->as_register();
++    }
++  }
++
++  int null_check_here = code_offset();
++  switch (type) {
++    case T_FLOAT:
++      __ fst_s(src->as_float_reg(), as_Address(to_addr));
++      break;
++    case T_DOUBLE:
++      __ fst_d(src->as_double_reg(), as_Address(to_addr));
++      break;
++    case T_ARRAY:  // fall through
++    case T_OBJECT: // fall through
++      if (UseCompressedOops && !wide) {
++        __ st_w(compressed_src, as_Address(to_addr));
++      } else {
++         __ st_ptr(compressed_src, as_Address(to_addr));
++      }
++      break;
++    case T_METADATA:
++      // We get here to store a method pointer to the stack to pass to
++      // a dtrace runtime call. This can't work on 64 bit with
++      // compressed klass ptrs: T_METADATA can be a compressed klass
++      // ptr or a 64 bit method pointer.
++      ShouldNotReachHere();
++      __ st_ptr(src->as_register(), as_Address(to_addr));
++      break;
++    case T_ADDRESS:
++      __ st_ptr(src->as_register(), as_Address(to_addr));
++      break;
++    case T_INT:
++      __ st_w(src->as_register(), as_Address(to_addr));
++      break;
++    case T_LONG:
++      __ st_ptr(src->as_register_lo(), as_Address_lo(to_addr));
++      break;
++    case T_BYTE: // fall through
++    case T_BOOLEAN:
++      __ st_b(src->as_register(), as_Address(to_addr));
++      break;
++    case T_CHAR: // fall through
++    case T_SHORT:
++      __ st_h(src->as_register(), as_Address(to_addr));
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  if (info != NULL) {
++    add_debug_info_for_null_check(null_check_here, info);
++  }
++}
++
++void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
++  precond(src->is_stack() && dest->is_register());
++
++  uint const c_sz32 = sizeof(uint32_t);
++  uint const c_sz64 = sizeof(uint64_t);
++
++  if (dest->is_single_cpu()) {
++    int index = src->single_stack_ix();
++    if (is_reference_type(type)) {
++      __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64));
++      __ verify_oop(dest->as_register());
++    } else if (type == T_METADATA || type == T_ADDRESS) {
++      __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64));
++    } else {
++      __ ld_w(dest->as_register(), stack_slot_address(index, c_sz32));
++    }
++  } else if (dest->is_double_cpu()) {
++    int index = src->double_stack_ix();
++    Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes);
++    __ ld_ptr(dest->as_register_lo(), src_addr_LO);
++  } else if (dest->is_single_fpu()) {
++    int index = src->single_stack_ix();
++    __ fld_s(dest->as_float_reg(), stack_slot_address(index, c_sz32));
++  } else if (dest->is_double_fpu()) {
++    int index = src->double_stack_ix();
++    __ fld_d(dest->as_double_reg(), stack_slot_address(index, c_sz64));
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) {
++  address target = NULL;
++
++  switch (patching_id(info)) {
++  case PatchingStub::access_field_id:
++    target = Runtime1::entry_for(Runtime1::access_field_patching_id);
++    break;
++  case PatchingStub::load_klass_id:
++    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
++    break;
++  case PatchingStub::load_mirror_id:
++    target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
++    break;
++  case PatchingStub::load_appendix_id:
++    target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
++    break;
++  default: ShouldNotReachHere();
++  }
++
++  __ call(target, relocInfo::runtime_call_type);
++  add_call_info_here(info);
++}
++
++void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
++  LIR_Opr temp;
++
++  if (type == T_LONG || type == T_DOUBLE)
++    temp = FrameMap::scr1_long_opr;
++  else
++    temp = FrameMap::scr1_opr;
++
++  stack2reg(src, temp, src->type());
++  reg2stack(temp, dest, dest->type(), false);
++}
++
++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code,
++                            CodeEmitInfo* info, bool wide, bool /* unaligned */) {
++  LIR_Address* addr = src->as_address_ptr();
++  LIR_Address* from_addr = src->as_address_ptr();
++
++  if (addr->base()->type() == T_OBJECT) {
++    __ verify_oop(addr->base()->as_pointer_register());
++  }
++
++  if (patch_code != lir_patch_none) {
++    deoptimize_trap(info);
++    return;
++  }
++
++  if (info != NULL) {
++    add_debug_info_for_null_check_here(info);
++  }
++  int null_check_here = code_offset();
++  switch (type) {
++    case T_FLOAT:
++      __ fld_s(dest->as_float_reg(), as_Address(from_addr));
++      break;
++    case T_DOUBLE:
++      __ fld_d(dest->as_double_reg(), as_Address(from_addr));
++      break;
++    case T_ARRAY:  // fall through
++    case T_OBJECT: // fall through
++      if (UseCompressedOops && !wide) {
++        __ ld_wu(dest->as_register(), as_Address(from_addr));
++      } else {
++         __ ld_ptr(dest->as_register(), as_Address(from_addr));
++      }
++      break;
++    case T_METADATA:
++      // We get here to store a method pointer to the stack to pass to
++      // a dtrace runtime call. This can't work on 64 bit with
++      // compressed klass ptrs: T_METADATA can be a compressed klass
++      // ptr or a 64 bit method pointer.
++      ShouldNotReachHere();
++      __ ld_ptr(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_ADDRESS:
++      // FIXME: OMG this is a horrible kludge.  Any offset from an
++      // address that matches klass_offset_in_bytes() will be loaded
++      // as a word, not a long.
++      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
++        __ ld_wu(dest->as_register(), as_Address(from_addr));
++      } else {
++        __ ld_ptr(dest->as_register(), as_Address(from_addr));
++      }
++      break;
++    case T_INT:
++      __ ld_w(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_LONG:
++      __ ld_ptr(dest->as_register_lo(), as_Address_lo(from_addr));
++      break;
++    case T_BYTE:
++      __ ld_b(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_BOOLEAN:
++      __ ld_bu(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_CHAR:
++      __ ld_hu(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_SHORT:
++      __ ld_h(dest->as_register(), as_Address(from_addr));
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  if (is_reference_type(type)) {
++    if (UseCompressedOops && !wide) {
++      __ decode_heap_oop(dest->as_register());
++    }
++
++    if (!UseZGC) {
++      // Load barrier has not yet been applied, so ZGC can't verify the oop here
++      __ verify_oop(dest->as_register());
++    }
++  } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
++    if (UseCompressedClassPointers) {
++      __ decode_klass_not_null(dest->as_register());
++    }
++  }
++}
++
++int LIR_Assembler::array_element_size(BasicType type) const {
++  int elem_size = type2aelembytes(type);
++  return exact_log2(elem_size);
++}
++
++void LIR_Assembler::emit_op3(LIR_Op3* op) {
++  switch (op->code()) {
++  case lir_idiv:
++  case lir_irem:
++    arithmetic_idiv(op->code(), op->in_opr1(), op->in_opr2(), op->in_opr3(),
++                    op->result_opr(), op->info());
++    break;
++  case lir_fmad:
++    __ fmadd_d(op->result_opr()->as_double_reg(), op->in_opr1()->as_double_reg(),
++               op->in_opr2()->as_double_reg(), op->in_opr3()->as_double_reg());
++    break;
++  case lir_fmaf:
++    __ fmadd_s(op->result_opr()->as_float_reg(), op->in_opr1()->as_float_reg(),
++               op->in_opr2()->as_float_reg(), op->in_opr3()->as_float_reg());
++    break;
++  default:
++    ShouldNotReachHere();
++    break;
++  }
++}
++
++void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
++#ifdef ASSERT
++  assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
++  if (op->block() != NULL)  _branch_target_blocks.append(op->block());
++  assert(op->cond() == lir_cond_always, "must be");
++#endif
++
++  if (op->info() != NULL)
++    add_debug_info_for_branch(op->info());
++
++  __ b_far(*(op->label()));
++}
++
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++#ifdef ASSERT
++  assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
++  if (op->block() != NULL)  _branch_target_blocks.append(op->block());
++  if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
++#endif
++
++  if (op->info() != NULL) {
++    assert(op->in_opr1()->is_address() || op->in_opr2()->is_address(),
++           "shouldn't be codeemitinfo for non-address operands");
++    add_debug_info_for_null_check_here(op->info()); // exception possible
++  }
++
++  Label& L = *(op->label());
++  Assembler::Condition acond;
++  LIR_Opr opr1 = op->in_opr1();
++  LIR_Opr opr2 = op->in_opr2();
++  assert(op->condition() != lir_cond_always, "must be");
++
++  if (op->code() == lir_cmp_float_branch) {
++    bool is_unordered = (op->ublock() == op->block());
++    if (opr1->is_single_fpu()) {
++      FloatRegister reg1 = opr1->as_float_reg();
++      assert(opr2->is_single_fpu(), "expect single float register");
++      FloatRegister reg2 = opr2->as_float_reg();
++      switch(op->condition()) {
++      case lir_cond_equal:
++        if (is_unordered)
++          __ fcmp_cueq_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_ceq_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_notEqual:
++        if (is_unordered)
++          __ fcmp_cune_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_cne_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_less:
++        if (is_unordered)
++          __ fcmp_cult_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_clt_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_lessEqual:
++        if (is_unordered)
++          __ fcmp_cule_s(FCC0, reg1, reg2);
++        else
++          __ fcmp_cle_s(FCC0, reg1, reg2);
++        break;
++      case lir_cond_greaterEqual:
++        if (is_unordered)
++          __ fcmp_cule_s(FCC0, reg2, reg1);
++        else
++          __ fcmp_cle_s(FCC0, reg2, reg1);
++        break;
++      case lir_cond_greater:
++        if (is_unordered)
++          __ fcmp_cult_s(FCC0, reg2, reg1);
++        else
++          __ fcmp_clt_s(FCC0, reg2, reg1);
++        break;
++      default:
++        ShouldNotReachHere();
++      }
++    } else if (opr1->is_double_fpu()) {
++      FloatRegister reg1 = opr1->as_double_reg();
++      assert(opr2->is_double_fpu(), "expect double float register");
++      FloatRegister reg2 = opr2->as_double_reg();
++      switch(op->condition()) {
++      case lir_cond_equal:
++        if (is_unordered)
++          __ fcmp_cueq_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_ceq_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_notEqual:
++        if (is_unordered)
++          __ fcmp_cune_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_cne_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_less:
++        if (is_unordered)
++          __ fcmp_cult_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_clt_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_lessEqual:
++        if (is_unordered)
++          __ fcmp_cule_d(FCC0, reg1, reg2);
++        else
++          __ fcmp_cle_d(FCC0, reg1, reg2);
++        break;
++      case lir_cond_greaterEqual:
++        if (is_unordered)
++          __ fcmp_cule_d(FCC0, reg2, reg1);
++        else
++          __ fcmp_cle_d(FCC0, reg2, reg1);
++        break;
++      case lir_cond_greater:
++        if (is_unordered)
++          __ fcmp_cult_d(FCC0, reg2, reg1);
++        else
++          __ fcmp_clt_d(FCC0, reg2, reg1);
++        break;
++      default:
++        ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++    __ bcnez(FCC0, L);
++  } else {
++    if (opr1->is_constant() && opr2->is_single_cpu()) {
++      // tableswitch
++      Unimplemented();
++    } else if (opr1->is_single_cpu() || opr1->is_double_cpu()) {
++      Register reg1 = as_reg(opr1);
++      Register reg2 = noreg;
++      jlong imm2 = 0;
++      if (opr2->is_single_cpu()) {
++        // cpu register - cpu register
++        reg2 = opr2->as_register();
++      } else if (opr2->is_double_cpu()) {
++        // cpu register - cpu register
++        reg2 = opr2->as_register_lo();
++      } else if (opr2->is_constant()) {
++        switch(opr2->type()) {
++        case T_INT:
++        case T_ADDRESS:
++          imm2 = opr2->as_constant_ptr()->as_jint();
++          break;
++        case T_LONG:
++          imm2 = opr2->as_constant_ptr()->as_jlong();
++          break;
++        case T_METADATA:
++          imm2 = (intptr_t)opr2->as_constant_ptr()->as_metadata();
++          break;
++        case T_OBJECT:
++        case T_ARRAY:
++          if (opr2->as_constant_ptr()->as_jobject() != NULL) {
++            reg2 = SCR1;
++            jobject2reg(opr2->as_constant_ptr()->as_jobject(), reg2);
++          } else {
++            reg2 = R0;
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++          break;
++        }
++      } else {
++        ShouldNotReachHere();
++      }
++      if (reg2 == noreg) {
++        if (imm2 == 0) {
++          reg2 = R0;
++        } else {
++          reg2 = SCR1;
++          __ li(reg2, imm2);
++        }
++      }
++      switch (op->condition()) {
++        case lir_cond_equal:
++          __ beq_far(reg1, reg2, L); break;
++        case lir_cond_notEqual:
++          __ bne_far(reg1, reg2, L); break;
++        case lir_cond_less:
++          __ blt_far(reg1, reg2, L, true); break;
++        case lir_cond_lessEqual:
++          __ bge_far(reg2, reg1, L, true); break;
++        case lir_cond_greaterEqual:
++          __ bge_far(reg1, reg2, L, true); break;
++        case lir_cond_greater:
++          __ blt_far(reg2, reg1, L, true); break;
++        case lir_cond_belowEqual:
++          __ bge_far(reg2, reg1, L, false); break;
++        case lir_cond_aboveEqual:
++          __ bge_far(reg1, reg2, L, false); break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++  }
++}
++
++void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
++  LIR_Opr src  = op->in_opr();
++  LIR_Opr dest = op->result_opr();
++  LIR_Opr tmp  = op->tmp();
++
++  switch (op->bytecode()) {
++    case Bytecodes::_i2f:
++      __ movgr2fr_w(dest->as_float_reg(), src->as_register());
++      __ ffint_s_w(dest->as_float_reg(), dest->as_float_reg());
++      break;
++    case Bytecodes::_i2d:
++      __ movgr2fr_w(dest->as_double_reg(), src->as_register());
++      __ ffint_d_w(dest->as_double_reg(), dest->as_double_reg());
++      break;
++    case Bytecodes::_l2d:
++      __ movgr2fr_d(dest->as_double_reg(), src->as_register_lo());
++      __ ffint_d_l(dest->as_double_reg(), dest->as_double_reg());
++      break;
++    case Bytecodes::_l2f:
++      __ movgr2fr_d(dest->as_float_reg(), src->as_register_lo());
++      __ ffint_s_l(dest->as_float_reg(), dest->as_float_reg());
++      break;
++    case Bytecodes::_f2d:
++      __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg());
++      break;
++    case Bytecodes::_d2f:
++      __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg());
++      break;
++    case Bytecodes::_i2c:
++      __ bstrpick_w(dest->as_register(), src->as_register(), 15, 0);
++      break;
++    case Bytecodes::_i2l:
++      _masm->block_comment("FIXME: This could be a no-op");
++      __ slli_w(dest->as_register_lo(), src->as_register(), 0);
++      break;
++    case Bytecodes::_i2s:
++      __ ext_w_h(dest->as_register(), src->as_register());
++      break;
++    case Bytecodes::_i2b:
++      __ ext_w_b(dest->as_register(), src->as_register());
++      break;
++    case Bytecodes::_l2i:
++      __ slli_w(dest->as_register(), src->as_register_lo(), 0);
++      break;
++    case Bytecodes::_d2l:
++      __ ftintrz_l_d(tmp->as_double_reg(), src->as_double_reg());
++      __ movfr2gr_d(dest->as_register_lo(), tmp->as_double_reg());
++      break;
++    case Bytecodes::_f2i:
++      __ ftintrz_w_s(tmp->as_float_reg(), src->as_float_reg());
++      __ movfr2gr_s(dest->as_register(), tmp->as_float_reg());
++      break;
++    case Bytecodes::_f2l:
++      __ ftintrz_l_s(tmp->as_float_reg(), src->as_float_reg());
++      __ movfr2gr_d(dest->as_register_lo(), tmp->as_float_reg());
++      break;
++    case Bytecodes::_d2i:
++      __ ftintrz_w_d(tmp->as_double_reg(), src->as_double_reg());
++      __ movfr2gr_s(dest->as_register(), tmp->as_double_reg());
++      break;
++    default: ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
++  if (op->init_check()) {
++    __ ld_bu(SCR1, Address(op->klass()->as_register(), InstanceKlass::init_state_offset()));
++    __ li(SCR2, InstanceKlass::fully_initialized);
++    add_debug_info_for_null_check_here(op->stub()->info());
++    __ bne_far(SCR1, SCR2, *op->stub()->entry());
++  }
++  __ allocate_object(op->obj()->as_register(), op->tmp1()->as_register(),
++                     op->tmp2()->as_register(), op->header_size(),
++                     op->object_size(), op->klass()->as_register(),
++                     *op->stub()->entry());
++  __ bind(*op->stub()->continuation());
++}
++
++void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
++  Register len =  op->len()->as_register();
++  if (UseSlowPath ||
++      (!UseFastNewObjectArray && is_reference_type(op->type())) ||
++      (!UseFastNewTypeArray   && !is_reference_type(op->type()))) {
++    __ b(*op->stub()->entry());
++  } else {
++    Register tmp1 = op->tmp1()->as_register();
++    Register tmp2 = op->tmp2()->as_register();
++    Register tmp3 = op->tmp3()->as_register();
++    if (len == tmp1) {
++      tmp1 = tmp3;
++    } else if (len == tmp2) {
++      tmp2 = tmp3;
++    } else if (len == tmp3) {
++      // everything is ok
++    } else {
++      __ move(tmp3, len);
++    }
++    __ allocate_array(op->obj()->as_register(), len, tmp1, tmp2,
++                      arrayOopDesc::header_size(op->type()),
++                      array_element_size(op->type()),
++                      op->klass()->as_register(),
++                      *op->stub()->entry());
++  }
++  __ bind(*op->stub()->continuation());
++}
++
++void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
++                                        Register recv, Label* update_done) {
++  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
++    Label next_test;
++    // See if the receiver is receiver[n].
++    __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
++    __ ld_ptr(SCR1, Address(SCR2));
++    __ bne(recv, SCR1, next_test);
++    Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
++    __ ld_ptr(SCR2, data_addr);
++    __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++    __ st_ptr(SCR2, data_addr);
++    __ b(*update_done);
++    __ bind(next_test);
++  }
++
++  // Didn't find receiver; find next empty slot and fill it in
++  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
++    Label next_test;
++    __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
++    Address recv_addr(SCR2);
++    __ ld_ptr(SCR1, recv_addr);
++    __ bnez(SCR1, next_test);
++    __ st_ptr(recv, recv_addr);
++    __ li(SCR1, DataLayout::counter_increment);
++    __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))));
++    __ st_ptr(SCR1, Address(SCR2));
++    __ b(*update_done);
++    __ bind(next_test);
++  }
++}
++
++void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success,
++                                          Label* failure, Label* obj_is_null) {
++  // we always need a stub for the failure case.
++  CodeStub* stub = op->stub();
++  Register obj = op->object()->as_register();
++  Register k_RInfo = op->tmp1()->as_register();
++  Register klass_RInfo = op->tmp2()->as_register();
++  Register dst = op->result_opr()->as_register();
++  ciKlass* k = op->klass();
++  Register Rtmp1 = noreg;
++
++  // check if it needs to be profiled
++  ciMethodData* md;
++  ciProfileData* data;
++
++  const bool should_profile = op->should_profile();
++
++  if (should_profile) {
++    ciMethod* method = op->profiled_method();
++    assert(method != NULL, "Should have method");
++    int bci = op->profiled_bci();
++    md = method->method_data_or_null();
++    assert(md != NULL, "Sanity");
++    data = md->bci_to_data(bci);
++    assert(data != NULL, "need data for type check");
++    assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
++  }
++
++  Label profile_cast_success, profile_cast_failure;
++  Label *success_target = should_profile ? &profile_cast_success : success;
++  Label *failure_target = should_profile ? &profile_cast_failure : failure;
++
++  if (obj == k_RInfo) {
++    k_RInfo = dst;
++  } else if (obj == klass_RInfo) {
++    klass_RInfo = dst;
++  }
++  if (k->is_loaded() && !UseCompressedClassPointers) {
++    select_different_registers(obj, dst, k_RInfo, klass_RInfo);
++  } else {
++    Rtmp1 = op->tmp3()->as_register();
++    select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
++  }
++
++  assert_different_registers(obj, k_RInfo, klass_RInfo);
++
++  if (should_profile) {
++    Label not_null;
++    __ bnez(obj, not_null);
++    // Object is null; update MDO and exit
++    Register mdo = klass_RInfo;
++    __ mov_metadata(mdo, md->constant_encoding());
++    Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
++    __ ld_bu(SCR2, data_addr);
++    __ ori(SCR2, SCR2, BitData::null_seen_byte_constant());
++    __ st_b(SCR2, data_addr);
++    __ b(*obj_is_null);
++    __ bind(not_null);
++  } else {
++    __ beqz(obj, *obj_is_null);
++  }
++
++  if (!k->is_loaded()) {
++    klass2reg_with_patching(k_RInfo, op->info_for_patch());
++  } else {
++    __ mov_metadata(k_RInfo, k->constant_encoding());
++  }
++  __ verify_oop(obj);
++
++  if (op->fast_check()) {
++    // get object class
++    // not a safepoint as obj null check happens earlier
++    __ load_klass(SCR2, obj);
++    __ bne_far(SCR2, k_RInfo, *failure_target);
++    // successful cast, fall through to profile or jump
++  } else {
++    // get object class
++    // not a safepoint as obj null check happens earlier
++    __ load_klass(klass_RInfo, obj);
++    if (k->is_loaded()) {
++      // See if we get an immediate positive hit
++      __ ld_ptr(SCR1, Address(klass_RInfo, int64_t(k->super_check_offset())));
++      if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
++        __ bne_far(k_RInfo, SCR1, *failure_target);
++        // successful cast, fall through to profile or jump
++      } else {
++        // See if we get an immediate positive hit
++        __ beq_far(k_RInfo, SCR1, *success_target);
++        // check for self
++        __ beq_far(klass_RInfo, k_RInfo, *success_target);
++
++        __ addi_d(SP, SP, -2 * wordSize);
++        __ st_ptr(k_RInfo, Address(SP, 0 * wordSize));
++        __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++        __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++        __ ld_ptr(klass_RInfo, Address(SP, 0 * wordSize));
++        __ addi_d(SP, SP, 2 * wordSize);
++        // result is a boolean
++        __ beqz(klass_RInfo, *failure_target);
++        // successful cast, fall through to profile or jump
++      }
++    } else {
++      // perform the fast part of the checking logic
++      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
++      // call out-of-line instance of __ check_klass_subtype_slow_path(...):
++      __ addi_d(SP, SP, -2 * wordSize);
++      __ st_ptr(k_RInfo, Address(SP, 0 * wordSize));
++      __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++      __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++      __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize));
++      __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++      // result is a boolean
++      __ beqz(k_RInfo, *failure_target);
++      // successful cast, fall through to profile or jump
++    }
++  }
++  if (should_profile) {
++    Register mdo = klass_RInfo, recv = k_RInfo;
++    __ bind(profile_cast_success);
++    __ mov_metadata(mdo, md->constant_encoding());
++    __ load_klass(recv, obj);
++    Label update_done;
++    type_profile_helper(mdo, md, data, recv, success);
++    __ b(*success);
++
++    __ bind(profile_cast_failure);
++    __ mov_metadata(mdo, md->constant_encoding());
++    Address counter_addr = Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
++    __ ld_ptr(SCR2, counter_addr);
++    __ addi_d(SCR2, SCR2, -DataLayout::counter_increment);
++    __ st_ptr(SCR2, counter_addr);
++    __ b(*failure);
++  }
++  __ b(*success);
++}
++
++void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
++  const bool should_profile = op->should_profile();
++
++  LIR_Code code = op->code();
++  if (code == lir_store_check) {
++    Register value = op->object()->as_register();
++    Register array = op->array()->as_register();
++    Register k_RInfo = op->tmp1()->as_register();
++    Register klass_RInfo = op->tmp2()->as_register();
++    Register Rtmp1 = op->tmp3()->as_register();
++    CodeStub* stub = op->stub();
++
++    // check if it needs to be profiled
++    ciMethodData* md;
++    ciProfileData* data;
++
++    if (should_profile) {
++      ciMethod* method = op->profiled_method();
++      assert(method != NULL, "Should have method");
++      int bci = op->profiled_bci();
++      md = method->method_data_or_null();
++      assert(md != NULL, "Sanity");
++      data = md->bci_to_data(bci);
++      assert(data != NULL, "need data for type check");
++      assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
++    }
++    Label profile_cast_success, profile_cast_failure, done;
++    Label *success_target = should_profile ? &profile_cast_success : &done;
++    Label *failure_target = should_profile ? &profile_cast_failure : stub->entry();
++
++    if (should_profile) {
++      Label not_null;
++      __ bnez(value, not_null);
++      // Object is null; update MDO and exit
++      Register mdo = klass_RInfo;
++      __ mov_metadata(mdo, md->constant_encoding());
++      Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
++      __ ld_bu(SCR2, data_addr);
++      __ ori(SCR2, SCR2, BitData::null_seen_byte_constant());
++      __ st_b(SCR2, data_addr);
++      __ b(done);
++      __ bind(not_null);
++    } else {
++      __ beqz(value, done);
++    }
++
++    add_debug_info_for_null_check_here(op->info_for_exception());
++    __ load_klass(k_RInfo, array);
++    __ load_klass(klass_RInfo, value);
++
++    // get instance klass (it's already uncompressed)
++    __ ld_ptr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset()));
++    // perform the fast part of the checking logic
++    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
++    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
++    __ addi_d(SP, SP, -2 * wordSize);
++    __ st_ptr(k_RInfo, Address(SP, 0 * wordSize));
++    __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++    __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++    __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize));
++    __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize));
++    __ addi_d(SP, SP, 2 * wordSize);
++    // result is a boolean
++    __ beqz(k_RInfo, *failure_target);
++    // fall through to the success case
++
++    if (should_profile) {
++      Register mdo = klass_RInfo, recv = k_RInfo;
++      __ bind(profile_cast_success);
++      __ mov_metadata(mdo, md->constant_encoding());
++      __ load_klass(recv, value);
++      Label update_done;
++      type_profile_helper(mdo, md, data, recv, &done);
++      __ b(done);
++
++      __ bind(profile_cast_failure);
++      __ mov_metadata(mdo, md->constant_encoding());
++      Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
++      __ lea(SCR2, counter_addr);
++      __ ld_ptr(SCR1, Address(SCR2));
++      __ addi_d(SCR1, SCR1, -DataLayout::counter_increment);
++      __ st_ptr(SCR1, Address(SCR2));
++      __ b(*stub->entry());
++    }
++
++    __ bind(done);
++  } else if (code == lir_checkcast) {
++    Register obj = op->object()->as_register();
++    Register dst = op->result_opr()->as_register();
++    Label success;
++    emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
++    __ bind(success);
++    if (dst != obj) {
++      __ move(dst, obj);
++    }
++  } else if (code == lir_instanceof) {
++    Register obj = op->object()->as_register();
++    Register dst = op->result_opr()->as_register();
++    Label success, failure, done;
++    emit_typecheck_helper(op, &success, &failure, &failure);
++    __ bind(failure);
++    __ move(dst, R0);
++    __ b(done);
++    __ bind(success);
++    __ li(dst, 1);
++    __ bind(done);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::casw(Register addr, Register newval, Register cmpval, bool sign) {
++  __ cmpxchg32(Address(addr, 0), cmpval, newval, SCR1, sign,
++               /* retold */ false, /* barrier */ true);
++}
++
++void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
++  __ cmpxchg(Address(addr, 0), cmpval, newval, SCR1,
++             /* retold */ false, /* barrier */ true);
++}
++
++void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
++  assert(VM_Version::supports_cx8(), "wrong machine");
++  Register addr;
++  if (op->addr()->is_register()) {
++    addr = as_reg(op->addr());
++  } else {
++    assert(op->addr()->is_address(), "what else?");
++    LIR_Address* addr_ptr = op->addr()->as_address_ptr();
++    assert(addr_ptr->disp() == 0, "need 0 disp");
++    assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index");
++    addr = as_reg(addr_ptr->base());
++  }
++  Register newval = as_reg(op->new_value());
++  Register cmpval = as_reg(op->cmp_value());
++
++  if (op->code() == lir_cas_obj) {
++    if (UseCompressedOops) {
++      Register t1 = op->tmp1()->as_register();
++      assert(op->tmp1()->is_valid(), "must be");
++      __ encode_heap_oop(t1, cmpval);
++      cmpval = t1;
++      __ encode_heap_oop(SCR2, newval);
++      newval = SCR2;
++      casw(addr, newval, cmpval, false);
++    } else {
++      casl(addr, newval, cmpval);
++    }
++  } else if (op->code() == lir_cas_int) {
++    casw(addr, newval, cmpval, true);
++  } else {
++    casl(addr, newval, cmpval);
++  }
++}
++
++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
++                          LIR_Opr result, BasicType type) {
++  Unimplemented();
++}
++
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right,
++                              LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  assert(result->is_single_cpu() || result->is_double_cpu(), "expect single register for result");
++  assert(left->is_single_cpu() || left->is_double_cpu(), "must be");
++  Register regd = (result->type() == T_LONG) ? result->as_register_lo() : result->as_register();
++  Register regl = as_reg(left);
++  Register regr = noreg;
++  Register reg1 = noreg;
++  Register reg2 = noreg;
++  jlong immr = 0;
++
++  // comparison operands
++  if (right->is_single_cpu()) {
++    // cpu register - cpu register
++    regr = right->as_register();
++  } else if (right->is_double_cpu()) {
++    // cpu register - cpu register
++    regr = right->as_register_lo();
++  } else if (right->is_constant()) {
++    switch(right->type()) {
++    case T_INT:
++    case T_ADDRESS:
++      immr = right->as_constant_ptr()->as_jint();
++      break;
++    case T_LONG:
++      immr = right->as_constant_ptr()->as_jlong();
++      break;
++    case T_METADATA:
++      immr = (intptr_t)right->as_constant_ptr()->as_metadata();
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      if (right->as_constant_ptr()->as_jobject() != NULL) {
++        regr = SCR1;
++        jobject2reg(right->as_constant_ptr()->as_jobject(), regr);
++      } else {
++        immr = 0;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++
++  if (regr == noreg) {
++    switch (condition) {
++    case lir_cond_equal:
++    case lir_cond_notEqual:
++      if (!Assembler::is_simm(-immr, 12)) {
++        regr = SCR1;
++        __ li(regr, immr);
++      }
++      break;
++    default:
++      if (!Assembler::is_simm(immr, 12)) {
++        regr = SCR1;
++        __ li(regr, immr);
++      }
++    }
++  }
++
++  // special cases
++  if (src1->is_constant() && src2->is_constant()) {
++    jlong val1 = 0, val2 = 0;
++    if (src1->type() == T_INT && src2->type() == T_INT) {
++      val1 = src1->as_jint();
++      val2 = src2->as_jint();
++    } else if (src1->type() == T_LONG && src2->type() == T_LONG) {
++      val1 = src1->as_jlong();
++      val2 = src2->as_jlong();
++    }
++    if (val1 == 0 && val2 == 1) {
++      if (regr == noreg) {
++        switch (condition) {
++          case lir_cond_equal:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ maskeqz(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_notEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++              __ xori(regd, regd, 1);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ masknez(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_less:
++            __ slti(regd, regl, immr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_lessEqual:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            break;
++          case lir_cond_greater:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greaterEqual:
++            __ slti(regd, regl, immr);
++            break;
++          case lir_cond_belowEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ sltu(regd, SCR1, regl);
++            }
++            break;
++          case lir_cond_aboveEqual:
++            __ sltui(regd, regl, immr);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      } else {
++        switch (condition) {
++          case lir_cond_equal:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ maskeqz(regd, regd, SCR1);
++            break;
++          case lir_cond_notEqual:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ masknez(regd, regd, SCR1);
++            break;
++          case lir_cond_less:
++            __ slt(regd, regl, regr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_lessEqual:
++            __ slt(regd, regr, regl);
++            break;
++          case lir_cond_greater:
++            __ slt(regd, regr, regl);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greaterEqual:
++            __ slt(regd, regl, regr);
++            break;
++          case lir_cond_belowEqual:
++            __ sltu(regd, regr, regl);
++            break;
++          case lir_cond_aboveEqual:
++            __ sltu(regd, regl, regr);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      }
++      return;
++    } else if (val1 == 1 && val2 == 0) {
++      if (regr == noreg) {
++        switch (condition) {
++          case lir_cond_equal:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++              __ xori(regd, regd, 1);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ masknez(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_notEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ addi_d(SCR1, regl, -immr);
++              __ li(regd, 1);
++              __ maskeqz(regd, regd, SCR1);
++            }
++            break;
++          case lir_cond_less:
++            __ slti(regd, regl, immr);
++            break;
++          case lir_cond_lessEqual:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greater:
++            if (immr == 0) {
++              __ slt(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ slt(regd, SCR1, regl);
++            }
++            break;
++          case lir_cond_greaterEqual:
++            __ slti(regd, regl, immr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_belowEqual:
++            if (immr == 0) {
++              __ sltu(regd, R0, regl);
++            } else {
++              __ li(SCR1, immr);
++              __ sltu(regd, SCR1, regl);
++            }
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_aboveEqual:
++            __ sltui(regd, regl, immr);
++            __ xori(regd, regd, 1);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      } else {
++        switch (condition) {
++          case lir_cond_equal:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ masknez(regd, regd, SCR1);
++            break;
++          case lir_cond_notEqual:
++            __ sub_d(SCR1, regl, regr);
++            __ li(regd, 1);
++            __ maskeqz(regd, regd, SCR1);
++            break;
++          case lir_cond_less:
++            __ slt(regd, regl, regr);
++            break;
++          case lir_cond_lessEqual:
++            __ slt(regd, regr, regl);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_greater:
++            __ slt(regd, regr, regl);
++            break;
++          case lir_cond_greaterEqual:
++            __ slt(regd, regl, regr);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_belowEqual:
++            __ sltu(regd, regr, regl);
++            __ xori(regd, regd, 1);
++            break;
++          case lir_cond_aboveEqual:
++            __ sltu(regd, regl, regr);
++            __ xori(regd, regd, 1);
++            break;
++          default:
++            ShouldNotReachHere();
++        }
++      }
++      return;
++    }
++  }
++
++  // cmp
++  if (regr == noreg) {
++    switch (condition) {
++      case lir_cond_equal:
++        __ addi_d(SCR2, regl, -immr);
++        break;
++      case lir_cond_notEqual:
++        __ addi_d(SCR2, regl, -immr);
++        break;
++      case lir_cond_less:
++        __ slti(SCR2, regl, immr);
++        break;
++      case lir_cond_lessEqual:
++        __ li(SCR1, immr);
++        __ slt(SCR2, SCR1, regl);
++        break;
++      case lir_cond_greater:
++        __ li(SCR1, immr);
++        __ slt(SCR2, SCR1, regl);
++        break;
++      case lir_cond_greaterEqual:
++        __ slti(SCR2, regl, immr);
++        break;
++      case lir_cond_belowEqual:
++        __ li(SCR1, immr);
++        __ sltu(SCR2, SCR1, regl);
++        break;
++      case lir_cond_aboveEqual:
++        __ sltui(SCR2, regl, immr);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  } else {
++    switch (condition) {
++      case lir_cond_equal:
++        __ sub_d(SCR2, regl, regr);
++        break;
++      case lir_cond_notEqual:
++        __ sub_d(SCR2, regl, regr);
++        break;
++      case lir_cond_less:
++        __ slt(SCR2, regl, regr);
++        break;
++      case lir_cond_lessEqual:
++        __ slt(SCR2, regr, regl);
++        break;
++      case lir_cond_greater:
++        __ slt(SCR2, regr, regl);
++        break;
++      case lir_cond_greaterEqual:
++        __ slt(SCR2, regl, regr);
++        break;
++      case lir_cond_belowEqual:
++        __ sltu(SCR2, regr, regl);
++        break;
++      case lir_cond_aboveEqual:
++        __ sltu(SCR2, regl, regr);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  }
++
++  // value operands
++  if (src1->is_stack()) {
++    stack2reg(src1, result, result->type());
++    reg1 = regd;
++  } else if (src1->is_constant()) {
++    const2reg(src1, result, lir_patch_none, NULL);
++    reg1 = regd;
++  } else {
++    reg1 = (src1->type() == T_LONG) ? src1->as_register_lo() : src1->as_register();
++  }
++
++  if (src2->is_stack()) {
++    stack2reg(src2, FrameMap::scr1_opr, result->type());
++    reg2 = SCR1;
++  } else if (src2->is_constant()) {
++    LIR_Opr tmp = src2->type() == T_LONG ? FrameMap::scr1_long_opr : FrameMap::scr1_opr;
++    const2reg(src2, tmp, lir_patch_none, NULL);
++    reg2 = SCR1;
++  } else {
++    reg2 = (src2->type() == T_LONG) ? src2->as_register_lo() : src2->as_register();
++  }
++
++  // cmove
++  switch (condition) {
++    case lir_cond_equal:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_notEqual:
++      __ maskeqz(regd, reg1, SCR2);
++      __ masknez(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_less:
++      __ maskeqz(regd, reg1, SCR2);
++      __ masknez(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_lessEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_greater:
++      __ maskeqz(regd, reg1, SCR2);
++      __ masknez(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_greaterEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_belowEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    case lir_cond_aboveEqual:
++      __ masknez(regd, reg1, SCR2);
++      __ maskeqz(SCR2, reg2, SCR2);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  __ OR(regd, regd, SCR2);
++}
++
++void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
++                             CodeEmitInfo* info, bool pop_fpu_stack) {
++  assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
++
++  if (left->is_single_cpu()) {
++    Register lreg = left->as_register();
++    Register dreg = as_reg(dest);
++
++    if (right->is_single_cpu()) {
++      // cpu register - cpu register
++      assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be");
++      Register rreg = right->as_register();
++      switch (code) {
++        case lir_add: __ add_w (dest->as_register(), lreg, rreg); break;
++        case lir_sub: __ sub_w (dest->as_register(), lreg, rreg); break;
++        case lir_mul: __ mul_w (dest->as_register(), lreg, rreg); break;
++        default:      ShouldNotReachHere();
++      }
++    } else if (right->is_double_cpu()) {
++      Register rreg = right->as_register_lo();
++      // single_cpu + double_cpu: can happen with obj+long
++      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
++      switch (code) {
++        case lir_add: __ add_d(dreg, lreg, rreg); break;
++        case lir_sub: __ sub_d(dreg, lreg, rreg); break;
++        default:      ShouldNotReachHere();
++      }
++    } else if (right->is_constant()) {
++      // cpu register - constant
++      jlong c;
++
++      // FIXME: This is fugly: we really need to factor all this logic.
++      switch(right->type()) {
++        case T_LONG:
++          c = right->as_constant_ptr()->as_jlong();
++          break;
++        case T_INT:
++        case T_ADDRESS:
++          c = right->as_constant_ptr()->as_jint();
++          break;
++        default:
++          ShouldNotReachHere();
++          c = 0; // unreachable
++          break;
++      }
++
++      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
++      if (c == 0 && dreg == lreg) {
++        COMMENT("effective nop elided");
++        return;
++      }
++
++      switch(left->type()) {
++        case T_INT:
++          switch (code) {
++            case lir_add: __ addi_w(dreg, lreg, c); break;
++            case lir_sub: __ addi_w(dreg, lreg, -c); break;
++            default:      ShouldNotReachHere();
++          }
++          break;
++        case T_OBJECT:
++        case T_ADDRESS:
++          switch (code) {
++          case lir_add: __ addi_d(dreg, lreg, c); break;
++          case lir_sub: __ addi_d(dreg, lreg, -c); break;
++          default:      ShouldNotReachHere();
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (left->is_double_cpu()) {
++    Register lreg_lo = left->as_register_lo();
++
++    if (right->is_double_cpu()) {
++      // cpu register - cpu register
++      Register rreg_lo = right->as_register_lo();
++      switch (code) {
++        case lir_add: __ add_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_sub: __ sub_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_mul: __ mul_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_div: __ div_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        case lir_rem: __ mod_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
++        default:      ShouldNotReachHere();
++      }
++
++    } else if (right->is_constant()) {
++      jlong c = right->as_constant_ptr()->as_jlong();
++      Register dreg = as_reg(dest);
++      switch (code) {
++        case lir_add:
++        case lir_sub:
++          if (c == 0 && dreg == lreg_lo) {
++            COMMENT("effective nop elided");
++            return;
++          }
++          code == lir_add ? __ addi_d(dreg, lreg_lo, c) : __ addi_d(dreg, lreg_lo, -c);
++          break;
++        case lir_div:
++          assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
++          if (c == 1) {
++            // move lreg_lo to dreg if divisor is 1
++            __ move(dreg, lreg_lo);
++          } else {
++            unsigned int shift = exact_log2_long(c);
++            // use scr1 as intermediate result register
++            __ srai_d(SCR1, lreg_lo, 63);
++            __ srli_d(SCR1, SCR1, 64 - shift);
++            __ add_d(SCR1, lreg_lo, SCR1);
++            __ srai_d(dreg, SCR1, shift);
++          }
++          break;
++        case lir_rem:
++          assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
++          if (c == 1) {
++            // move 0 to dreg if divisor is 1
++            __ move(dreg, R0);
++          } else {
++            // use scr1/2 as intermediate result register
++            __ sub_d(SCR1, R0, lreg_lo);
++            __ slt(SCR2, SCR1, R0);
++            __ andi(dreg, lreg_lo, c - 1);
++            __ andi(SCR1, SCR1, c - 1);
++            __ sub_d(SCR1, R0, SCR1);
++            __ maskeqz(dreg, dreg, SCR2);
++            __ masknez(SCR1, SCR1, SCR2);
++            __ OR(dreg, dreg, SCR1);
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (left->is_single_fpu()) {
++    assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register");
++    switch (code) {
++      case lir_add: __ fadd_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      case lir_sub: __ fsub_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      case lir_mul: __ fmul_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      case lir_div: __ fdiv_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++      default:      ShouldNotReachHere();
++    }
++  } else if (left->is_double_fpu()) {
++    if (right->is_double_fpu()) {
++      // fpu register - fpu register
++      switch (code) {
++        case lir_add: __ fadd_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        case lir_sub: __ fsub_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        case lir_mul_strictfp: // fall through
++        case lir_mul: __ fmul_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        case lir_div_strictfp: // fall through
++        case lir_div: __ fdiv_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++        default:      ShouldNotReachHere();
++      }
++    } else {
++      if (right->is_constant()) {
++        ShouldNotReachHere();
++      }
++      ShouldNotReachHere();
++    }
++  } else if (left->is_single_stack() || left->is_address()) {
++    assert(left == dest, "left and dest must be equal");
++    ShouldNotReachHere();
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index,
++                                             int dest_index, bool pop_fpu_stack) {
++  Unimplemented();
++}
++
++void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
++  switch(code) {
++    case lir_abs : __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break;
++    case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break;
++    default      : ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
++  assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register");
++  Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
++
++   if (dst->is_single_cpu()) {
++     Register Rdst = dst->as_register();
++     if (right->is_constant()) {
++       switch (code) {
++         case lir_logic_and:
++           if (Assembler::is_uimm(right->as_jint(), 12)) {
++             __ andi(Rdst, Rleft, right->as_jint());
++           } else {
++             __ li(AT, right->as_jint());
++             __ AND(Rdst, Rleft, AT);
++           }
++           break;
++         case lir_logic_or:  __  ori(Rdst, Rleft, right->as_jint()); break;
++         case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jint()); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     } else {
++       Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo();
++       switch (code) {
++         case lir_logic_and: __ AND(Rdst, Rleft, Rright); break;
++         case lir_logic_or:  __  OR(Rdst, Rleft, Rright); break;
++         case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     }
++   } else {
++     Register Rdst = dst->as_register_lo();
++     if (right->is_constant()) {
++       switch (code) {
++         case lir_logic_and:
++           if (Assembler::is_uimm(right->as_jlong(), 12)) {
++             __ andi(Rdst, Rleft, right->as_jlong());
++           } else {
++             // We can guarantee that transform from HIR LogicOp is in range of
++             // uimm(12), but the common code directly generates LIR LogicAnd,
++             // and the right-operand is mask with all ones in the high bits.
++             __ li(AT, right->as_jlong());
++             __ AND(Rdst, Rleft, AT);
++           }
++           break;
++         case lir_logic_or:  __  ori(Rdst, Rleft, right->as_jlong()); break;
++         case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jlong()); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     } else {
++       Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo();
++       switch (code) {
++         case lir_logic_and: __ AND(Rdst, Rleft, Rright); break;
++         case lir_logic_or:  __  OR(Rdst, Rleft, Rright); break;
++         case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break;
++         default:            ShouldNotReachHere(); break;
++       }
++     }
++   }
++}
++
++void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right,
++                                    LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) {
++  // opcode check
++  assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem");
++  bool is_irem = (code == lir_irem);
++
++  // operand check
++  assert(left->is_single_cpu(), "left must be register");
++  assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant");
++  assert(result->is_single_cpu(), "result must be register");
++  Register lreg = left->as_register();
++  Register dreg = result->as_register();
++
++  // power-of-2 constant check and codegen
++  if (right->is_constant()) {
++    int c = right->as_constant_ptr()->as_jint();
++    assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++    if (is_irem) {
++      if (c == 1) {
++        // move 0 to dreg if divisor is 1
++        __ move(dreg, R0);
++      } else {
++        // use scr1/2 as intermediate result register
++        __ sub_w(SCR1, R0, lreg);
++        __ slt(SCR2, SCR1, R0);
++        __ andi(dreg, lreg, c - 1);
++        __ andi(SCR1, SCR1, c - 1);
++        __ sub_w(SCR1, R0, SCR1);
++        __ maskeqz(dreg, dreg, SCR2);
++        __ masknez(SCR1, SCR1, SCR2);
++        __ OR(dreg, dreg, SCR1);
++      }
++    } else {
++      if (c == 1) {
++        // move lreg to dreg if divisor is 1
++        __ move(dreg, lreg);
++      } else {
++        unsigned int shift = exact_log2(c);
++        // use scr1 as intermediate result register
++        __ srai_w(SCR1, lreg, 31);
++        __ srli_w(SCR1, SCR1, 32 - shift);
++        __ add_w(SCR1, lreg, SCR1);
++        __ srai_w(dreg, SCR1, shift);
++      }
++    }
++  } else {
++    Register rreg = right->as_register();
++    if (is_irem)
++      __ mod_w(dreg, lreg, rreg);
++    else
++      __ div_w(dreg, lreg, rreg);
++  }
++}
++
++void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
++  Unimplemented();
++}
++
++void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){
++  if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
++    bool is_unordered_less = (code == lir_ucmp_fd2i);
++    if (left->is_single_fpu()) {
++      if (is_unordered_less) {
++        __ fcmp_clt_s(FCC0, right->as_float_reg(), left->as_float_reg());
++        __ fcmp_cult_s(FCC1, left->as_float_reg(), right->as_float_reg());
++      } else {
++        __ fcmp_cult_s(FCC0, right->as_float_reg(), left->as_float_reg());
++        __ fcmp_clt_s(FCC1, left->as_float_reg(), right->as_float_reg());
++      }
++    } else if (left->is_double_fpu()) {
++      if (is_unordered_less) {
++        __ fcmp_clt_d(FCC0, right->as_double_reg(), left->as_double_reg());
++        __ fcmp_cult_d(FCC1, left->as_double_reg(), right->as_double_reg());
++      } else {
++        __ fcmp_cult_d(FCC0, right->as_double_reg(), left->as_double_reg());
++        __ fcmp_clt_d(FCC1, left->as_double_reg(), right->as_double_reg());
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++    __ movcf2gr(dst->as_register(), FCC0);
++    __ movcf2gr(SCR1, FCC1);
++    __ sub_d(dst->as_register(), dst->as_register(), SCR1);
++  } else if (code == lir_cmp_l2i) {
++    __ slt(SCR1, left->as_register_lo(), right->as_register_lo());
++    __ slt(dst->as_register(), right->as_register_lo(), left->as_register_lo());
++    __ sub_d(dst->as_register(), dst->as_register(), SCR1);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::align_call(LIR_Code code) {}
++
++void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
++  address call = __ trampoline_call(AddressLiteral(op->addr(), rtype));
++  if (call == NULL) {
++    bailout("trampoline stub overflow");
++    return;
++  }
++  add_call_info(code_offset(), op->info());
++}
++
++void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
++  address call = __ ic_call(op->addr());
++  if (call == NULL) {
++    bailout("trampoline stub overflow");
++    return;
++  }
++  add_call_info(code_offset(), op->info());
++}
++
++void LIR_Assembler::emit_static_call_stub() {
++  address call_pc = __ pc();
++  address stub = __ start_a_stub(call_stub_size());
++  if (stub == NULL) {
++    bailout("static call stub overflow");
++    return;
++  }
++
++  int start = __ offset();
++
++  __ relocate(static_stub_Relocation::spec(call_pc));
++
++  // Code stream for loading method may be changed.
++  __ ibar(0);
++
++  // Rmethod contains Method*, it should be relocated for GC
++  // static stub relocation also tags the Method* in the code-stream.
++  __ mov_metadata(Rmethod, NULL);
++  // This is recognized as unresolved by relocs/nativeInst/ic code
++  __ patchable_jump(__ pc());
++
++  assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(),
++         "stub too big");
++  __ end_a_stub();
++}
++
++void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
++  assert(exceptionOop->as_register() == A0, "must match");
++  assert(exceptionPC->as_register() == A1, "must match");
++
++  // exception object is not added to oop map by LinearScan
++  // (LinearScan assumes that no oops are in fixed registers)
++  info->add_register_oop(exceptionOop);
++  Runtime1::StubID unwind_id;
++
++  // get current pc information
++  // pc is only needed if the method has an exception handler, the unwind code does not need it.
++  if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) {
++    // As no instructions have been generated yet for this LIR node it's
++    // possible that an oop map already exists for the current offset.
++    // In that case insert an dummy NOP here to ensure all oop map PCs
++    // are unique. See JDK-8237483.
++    __ nop();
++  }
++  Label L;
++  int pc_for_athrow_offset = __ offset();
++  __ bind(L);
++  __ lipc(exceptionPC->as_register(), L);
++  add_call_info(pc_for_athrow_offset, info); // for exception handler
++
++  __ verify_not_null_oop(A0);
++  // search an exception handler (A0: exception oop, A1: throwing pc)
++  if (compilation()->has_fpu_code()) {
++    unwind_id = Runtime1::handle_exception_id;
++  } else {
++    unwind_id = Runtime1::handle_exception_nofpu_id;
++  }
++  __ call(Runtime1::entry_for(unwind_id), relocInfo::runtime_call_type);
++
++  // FIXME: enough room for two byte trap   ????
++  __ nop();
++}
++
++void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
++  assert(exceptionOop->as_register() == A0, "must match");
++  __ b(_unwind_handler_entry);
++}
++
++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
++  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
++  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
++
++  switch (left->type()) {
++    case T_INT: {
++      switch (code) {
++        case lir_shl:  __ sll_w(dreg, lreg, count->as_register()); break;
++        case lir_shr:  __ sra_w(dreg, lreg, count->as_register()); break;
++        case lir_ushr: __ srl_w(dreg, lreg, count->as_register()); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    case T_LONG:
++    case T_ADDRESS:
++    case T_OBJECT:
++      switch (code) {
++        case lir_shl:  __ sll_d(dreg, lreg, count->as_register()); break;
++        case lir_shr:  __ sra_d(dreg, lreg, count->as_register()); break;
++        case lir_ushr: __ srl_d(dreg, lreg, count->as_register()); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++}
++
++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
++  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
++  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
++
++  switch (left->type()) {
++    case T_INT: {
++      switch (code) {
++        case lir_shl:  __ slli_w(dreg, lreg, count); break;
++        case lir_shr:  __ srai_w(dreg, lreg, count); break;
++        case lir_ushr: __ srli_w(dreg, lreg, count); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    case T_LONG:
++    case T_ADDRESS:
++    case T_OBJECT:
++      switch (code) {
++        case lir_shl:  __ slli_d(dreg, lreg, count); break;
++        case lir_shr:  __ srai_d(dreg, lreg, count); break;
++        case lir_ushr: __ srli_d(dreg, lreg, count); break;
++        default:       ShouldNotReachHere(); break;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++}
++
++void LIR_Assembler::store_parameter(Register r, int offset_from_sp_in_words) {
++  assert(offset_from_sp_in_words >= 0, "invalid offset from sp");
++  int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord;
++  assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
++  __ st_ptr(r, Address(SP, offset_from_sp_in_bytes));
++}
++
++void LIR_Assembler::store_parameter(jint c,     int offset_from_sp_in_words) {
++  assert(offset_from_sp_in_words >= 0, "invalid offset from sp");
++  int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord;
++  assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
++  __ li(SCR2, c);
++  __ st_ptr(SCR2, Address(SP, offset_from_sp_in_bytes));
++}
++
++void LIR_Assembler::store_parameter(jobject o,  int offset_from_sp_in_words) {
++  ShouldNotReachHere();
++}
++
++// This code replaces a call to arraycopy; no exception may
++// be thrown in this code, they must be thrown in the System.arraycopy
++// activation frame; we could save some checks if this would not be the case
++void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
++  Register j_rarg0 = T0;
++  Register j_rarg1 = A0;
++  Register j_rarg2 = A1;
++  Register j_rarg3 = A2;
++  Register j_rarg4 = A3;
++
++  ciArrayKlass* default_type = op->expected_type();
++  Register src = op->src()->as_register();
++  Register dst = op->dst()->as_register();
++  Register src_pos = op->src_pos()->as_register();
++  Register dst_pos = op->dst_pos()->as_register();
++  Register length  = op->length()->as_register();
++  Register tmp = op->tmp()->as_register();
++
++  CodeStub* stub = op->stub();
++  int flags = op->flags();
++  BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
++  if (is_reference_type(basic_type))
++    basic_type = T_OBJECT;
++
++  // if we don't know anything, just go through the generic arraycopy
++  if (default_type == NULL) {
++    Label done;
++    assert(src == T0 && src_pos == A0, "mismatch in calling convention");
++
++    // Save the arguments in case the generic arraycopy fails and we
++    // have to fall back to the JNI stub
++    __ st_ptr(dst, Address(SP, 0 * BytesPerWord));
++    __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++    __ st_ptr(length, Address(SP, 2 * BytesPerWord));
++    __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++    __ st_ptr(src, Address(SP, 4 * BytesPerWord));
++
++    address copyfunc_addr = StubRoutines::generic_arraycopy();
++    assert(copyfunc_addr != NULL, "generic arraycopy stub required");
++
++    // The arguments are in java calling convention so we shift them
++    // to C convention
++    assert_different_registers(A4, j_rarg0, j_rarg1, j_rarg2, j_rarg3);
++    __ move(A4, j_rarg4);
++    assert_different_registers(A3, j_rarg0, j_rarg1, j_rarg2);
++    __ move(A3, j_rarg3);
++    assert_different_registers(A2, j_rarg0, j_rarg1);
++    __ move(A2, j_rarg2);
++    assert_different_registers(A1, j_rarg0);
++    __ move(A1, j_rarg1);
++    __ move(A0, j_rarg0);
++#ifndef PRODUCT
++    if (PrintC1Statistics) {
++      __ li(SCR2, (address)&Runtime1::_generic_arraycopystub_cnt);
++      __ increment(SCR2, 1);
++    }
++#endif
++    __ call(copyfunc_addr, relocInfo::runtime_call_type);
++
++    __ beqz(A0, *stub->continuation());
++    __ move(tmp, A0);
++
++    // Reload values from the stack so they are where the stub
++    // expects them.
++    __ ld_ptr(dst, Address(SP, 0 * BytesPerWord));
++    __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++    __ ld_ptr(length, Address(SP, 2 * BytesPerWord));
++    __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++    __ ld_ptr(src, Address(SP, 4 * BytesPerWord));
++
++    // tmp is -1^K where K == partial copied count
++    __ nor(SCR1, tmp, R0);
++    // adjust length down and src/end pos up by partial copied count
++    __ sub_w(length, length, SCR1);
++    __ add_w(src_pos, src_pos, SCR1);
++    __ add_w(dst_pos, dst_pos, SCR1);
++    __ b(*stub->entry());
++
++    __ bind(*stub->continuation());
++    return;
++  }
++
++  assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(),
++         "must be true at this point");
++
++  int elem_size = type2aelembytes(basic_type);
++  Address::ScaleFactor scale = Address::times(elem_size);
++
++  Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
++  Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
++  Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
++  Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());
++
++  // test for NULL
++  if (flags & LIR_OpArrayCopy::src_null_check) {
++    __ beqz(src, *stub->entry());
++  }
++  if (flags & LIR_OpArrayCopy::dst_null_check) {
++    __ beqz(dst, *stub->entry());
++  }
++
++  // If the compiler was not able to prove that exact type of the source or the destination
++  // of the arraycopy is an array type, check at runtime if the source or the destination is
++  // an instance type.
++  if (flags & LIR_OpArrayCopy::type_check) {
++    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) {
++      __ load_klass(tmp, dst);
++      __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
++      __ li(SCR2, (jlong) Klass::_lh_neutral_value);
++      __ bge_far(SCR1, SCR2, *stub->entry(), true);
++    }
++
++    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) {
++      __ load_klass(tmp, src);
++      __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
++      __ li(SCR2, (jlong) Klass::_lh_neutral_value);
++      __ bge_far(SCR1, SCR2, *stub->entry(), true);
++    }
++  }
++
++  // check if negative
++  if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
++    __ blt_far(src_pos, R0, *stub->entry(), true);
++  }
++  if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
++    __ blt_far(dst_pos, R0, *stub->entry(), true);
++  }
++
++  if (flags & LIR_OpArrayCopy::length_positive_check) {
++    __ blt_far(length, R0, *stub->entry(), true);
++  }
++
++  if (flags & LIR_OpArrayCopy::src_range_check) {
++    __ add_w(tmp, src_pos, length);
++    __ ld_wu(SCR1, src_length_addr);
++    __ blt_far(SCR1, tmp, *stub->entry(), false);
++  }
++  if (flags & LIR_OpArrayCopy::dst_range_check) {
++    __ add_w(tmp, dst_pos, length);
++    __ ld_wu(SCR1, dst_length_addr);
++    __ blt_far(SCR1, tmp, *stub->entry(), false);
++  }
++
++  if (flags & LIR_OpArrayCopy::type_check) {
++    // We don't know the array types are compatible
++    if (basic_type != T_OBJECT) {
++      // Simple test for basic type arrays
++      if (UseCompressedClassPointers) {
++        __ ld_wu(tmp, src_klass_addr);
++        __ ld_wu(SCR1, dst_klass_addr);
++      } else {
++        __ ld_ptr(tmp, src_klass_addr);
++        __ ld_ptr(SCR1, dst_klass_addr);
++      }
++      __ bne_far(tmp, SCR1, *stub->entry());
++    } else {
++      // For object arrays, if src is a sub class of dst then we can
++      // safely do the copy.
++      Label cont, slow;
++
++      __ addi_d(SP, SP, -2 * wordSize);
++      __ st_ptr(dst, Address(SP, 0 * wordSize));
++      __ st_ptr(src, Address(SP, 1 * wordSize));
++
++      __ load_klass(src, src);
++      __ load_klass(dst, dst);
++
++      __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
++
++      __ addi_d(SP, SP, -2 * wordSize);
++      __ st_ptr(dst, Address(SP, 0 * wordSize));
++      __ st_ptr(src, Address(SP, 1 * wordSize));
++      __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
++      __ ld_ptr(dst, Address(SP, 0 * wordSize));
++      __ ld_ptr(src, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++
++      __ bnez(dst, cont);
++
++      __ bind(slow);
++      __ ld_ptr(dst, Address(SP, 0 * wordSize));
++      __ ld_ptr(src, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++
++      address copyfunc_addr = StubRoutines::checkcast_arraycopy();
++      if (copyfunc_addr != NULL) { // use stub if available
++        // src is not a sub class of dst so we have to do a
++        // per-element check.
++
++        int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
++        if ((flags & mask) != mask) {
++          // Check that at least both of them object arrays.
++          assert(flags & mask, "one of the two should be known to be an object array");
++
++          if (!(flags & LIR_OpArrayCopy::src_objarray)) {
++            __ load_klass(tmp, src);
++          } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
++            __ load_klass(tmp, dst);
++          }
++          int lh_offset = in_bytes(Klass::layout_helper_offset());
++          Address klass_lh_addr(tmp, lh_offset);
++          jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
++          __ ld_w(SCR1, klass_lh_addr);
++          __ li(SCR2, objArray_lh);
++          __ XOR(SCR1, SCR1, SCR2);
++          __ bnez(SCR1, *stub->entry());
++        }
++
++        // Spill because stubs can use any register they like and it's
++        // easier to restore just those that we care about.
++        __ st_ptr(dst, Address(SP, 0 * BytesPerWord));
++        __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++        __ st_ptr(length, Address(SP, 2 * BytesPerWord));
++        __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++        __ st_ptr(src, Address(SP, 4 * BytesPerWord));
++
++        __ lea(A0, Address(src, src_pos, scale));
++        __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type));
++        assert_different_registers(A0, dst, dst_pos, length);
++        __ load_klass(A4, dst);
++        assert_different_registers(A4, dst, dst_pos, length);
++        __ lea(A1, Address(dst, dst_pos, scale));
++        __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type));
++        assert_different_registers(A1, length);
++        __ bstrpick_d(A2, length, 31, 0);
++        __ ld_ptr(A4, Address(A4, ObjArrayKlass::element_klass_offset()));
++        __ ld_w(A3, Address(A4, Klass::super_check_offset_offset()));
++        __ call(copyfunc_addr, relocInfo::runtime_call_type);
++
++#ifndef PRODUCT
++        if (PrintC1Statistics) {
++          Label failed;
++          __ bnez(A0, failed);
++          __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_cnt);
++          __ increment(SCR2, 1);
++          __ bind(failed);
++        }
++#endif
++
++        __ beqz(A0, *stub->continuation());
++
++#ifndef PRODUCT
++        if (PrintC1Statistics) {
++          __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_attempt_cnt);
++          __ increment(SCR2, 1);
++        }
++#endif
++        assert_different_registers(dst, dst_pos, length, src_pos, src, tmp, SCR1);
++        __ move(tmp, A0);
++
++        // Restore previously spilled arguments
++        __ ld_ptr(dst, Address(SP, 0 * BytesPerWord));
++        __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
++        __ ld_ptr(length, Address(SP, 2 * BytesPerWord));
++        __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord));
++        __ ld_ptr(src, Address(SP, 4 * BytesPerWord));
++
++        // return value is -1^K where K is partial copied count
++        __ nor(SCR1, tmp, R0);
++        // adjust length down and src/end pos up by partial copied count
++        __ sub_w(length, length, SCR1);
++        __ add_w(src_pos, src_pos, SCR1);
++        __ add_w(dst_pos, dst_pos, SCR1);
++      }
++
++      __ b(*stub->entry());
++
++      __ bind(cont);
++      __ ld_ptr(dst, Address(SP, 0 * wordSize));
++      __ ld_ptr(src, Address(SP, 1 * wordSize));
++      __ addi_d(SP, SP, 2 * wordSize);
++    }
++  }
++
++#ifdef ASSERT
++  if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
++    // Sanity check the known type with the incoming class.  For the
++    // primitive case the types must match exactly with src.klass and
++    // dst.klass each exactly matching the default type.  For the
++    // object array case, if no type check is needed then either the
++    // dst type is exactly the expected type and the src type is a
++    // subtype which we can't check or src is the same array as dst
++    // but not necessarily exactly of type default_type.
++    Label known_ok, halt;
++    __ mov_metadata(tmp, default_type->constant_encoding());
++    if (UseCompressedClassPointers) {
++      __ encode_klass_not_null(tmp);
++    }
++
++    if (basic_type != T_OBJECT) {
++
++      if (UseCompressedClassPointers) {
++        __ ld_wu(SCR1, dst_klass_addr);
++      } else {
++        __ ld_ptr(SCR1, dst_klass_addr);
++      }
++      __ bne(tmp, SCR1, halt);
++      if (UseCompressedClassPointers) {
++        __ ld_wu(SCR1, src_klass_addr);
++      } else {
++        __ ld_ptr(SCR1, src_klass_addr);
++      }
++      __ beq(tmp, SCR1, known_ok);
++    } else {
++      if (UseCompressedClassPointers) {
++        __ ld_wu(SCR1, dst_klass_addr);
++      } else {
++        __ ld_ptr(SCR1, dst_klass_addr);
++      }
++      __ beq(tmp, SCR1, known_ok);
++      __ beq(src, dst, known_ok);
++    }
++    __ bind(halt);
++    __ stop("incorrect type information in arraycopy");
++    __ bind(known_ok);
++  }
++#endif
++
++#ifndef PRODUCT
++  if (PrintC1Statistics) {
++    __ li(SCR2, Runtime1::arraycopy_count_address(basic_type));
++    __ increment(SCR2, 1);
++  }
++#endif
++
++  __ lea(A0, Address(src, src_pos, scale));
++  __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type));
++  assert_different_registers(A0, dst, dst_pos, length);
++  __ lea(A1, Address(dst, dst_pos, scale));
++  __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type));
++  assert_different_registers(A1, length);
++  __ bstrpick_d(A2, length, 31, 0);
++
++  bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
++  bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
++  const char *name;
++  address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
++
++  CodeBlob *cb = CodeCache::find_blob(entry);
++  if (cb) {
++    __ call(entry, relocInfo::runtime_call_type);
++  } else {
++    __ call_VM_leaf(entry, 3);
++  }
++
++  __ bind(*stub->continuation());
++}
++
++void LIR_Assembler::emit_lock(LIR_OpLock* op) {
++  Register obj = op->obj_opr()->as_register(); // may not be an oop
++  Register hdr = op->hdr_opr()->as_register();
++  Register lock = op->lock_opr()->as_register();
++  if (!UseFastLocking) {
++    __ b(*op->stub()->entry());
++  } else if (op->code() == lir_lock) {
++    Register scratch = noreg;
++    if (UseBiasedLocking) {
++      scratch = op->scratch_opr()->as_register();
++    }
++    assert(BasicLock::displaced_header_offset_in_bytes() == 0,
++           "lock_reg must point to the displaced header");
++    // add debug info for NullPointerException only if one is possible
++    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
++    if (op->info() != NULL) {
++      add_debug_info_for_null_check(null_check_offset, op->info());
++    }
++    // done
++  } else if (op->code() == lir_unlock) {
++    assert(BasicLock::displaced_header_offset_in_bytes() == 0,
++           "lock_reg must point to the displaced header");
++    __ unlock_object(hdr, obj, lock, *op->stub()->entry());
++  } else {
++    Unimplemented();
++  }
++  __ bind(*op->stub()->continuation());
++}
++
++void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
++  ciMethod* method = op->profiled_method();
++  ciMethod* callee = op->profiled_callee();
++  int bci = op->profiled_bci();
++
++  // Update counter for all call types
++  ciMethodData* md = method->method_data_or_null();
++  assert(md != NULL, "Sanity");
++  ciProfileData* data = md->bci_to_data(bci);
++  assert(data != NULL && data->is_CounterData(), "need CounterData for calls");
++  assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
++  Register mdo  = op->mdo()->as_register();
++  __ mov_metadata(mdo, md->constant_encoding());
++  Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
++  // Perform additional virtual call profiling for invokevirtual and
++  // invokeinterface bytecodes
++  if (op->should_profile_receiver_type()) {
++    assert(op->recv()->is_single_cpu(), "recv must be allocated");
++    Register recv = op->recv()->as_register();
++    assert_different_registers(mdo, recv);
++    assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
++    ciKlass* known_klass = op->known_holder();
++    if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
++      // We know the type that will be seen at this call site; we can
++      // statically update the MethodData* rather than needing to do
++      // dynamic tests on the receiver type
++
++      // NOTE: we should probably put a lock around this search to
++      // avoid collisions by concurrent compilations
++      ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
++      uint i;
++      for (i = 0; i < VirtualCallData::row_limit(); i++) {
++        ciKlass* receiver = vc_data->receiver(i);
++        if (known_klass->equals(receiver)) {
++          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
++          __ ld_ptr(SCR2, data_addr);
++          __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++          __ st_ptr(SCR2, data_addr);
++          return;
++        }
++      }
++
++      // Receiver type not found in profile data; select an empty slot
++
++      // Note that this is less efficient than it should be because it
++      // always does a write to the receiver part of the
++      // VirtualCallData rather than just the first time
++      for (i = 0; i < VirtualCallData::row_limit(); i++) {
++        ciKlass* receiver = vc_data->receiver(i);
++        if (receiver == NULL) {
++          Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
++          __ mov_metadata(SCR2, known_klass->constant_encoding());
++          __ lea(SCR1, recv_addr);
++          __ st_ptr(SCR2, SCR1, 0);
++          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
++          __ ld_ptr(SCR2, data_addr);
++          __ addi_d(SCR2, SCR1, DataLayout::counter_increment);
++          __ st_ptr(SCR2, data_addr);
++          return;
++        }
++      }
++    } else {
++      __ load_klass(recv, recv);
++      Label update_done;
++      type_profile_helper(mdo, md, data, recv, &update_done);
++      // Receiver did not match any saved receiver and there is no empty row for it.
++      // Increment total counter to indicate polymorphic case.
++      __ ld_ptr(SCR2, counter_addr);
++      __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++      __ st_ptr(SCR2, counter_addr);
++
++      __ bind(update_done);
++    }
++  } else {
++    // Static call
++    __ ld_ptr(SCR2, counter_addr);
++    __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
++    __ st_ptr(SCR2, counter_addr);
++  }
++}
++
++void LIR_Assembler::emit_delay(LIR_OpDelay*) {
++  Unimplemented();
++}
++
++void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
++  __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
++}
++
++void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
++  assert(op->crc()->is_single_cpu(), "crc must be register");
++  assert(op->val()->is_single_cpu(), "byte value must be register");
++  assert(op->result_opr()->is_single_cpu(), "result must be register");
++  Register crc = op->crc()->as_register();
++  Register val = op->val()->as_register();
++  Register res = op->result_opr()->as_register();
++
++  assert_different_registers(val, crc, res);
++  __ li(res, StubRoutines::crc_table_addr());
++  __ nor(crc, crc, R0); // ~crc
++  __ update_byte_crc32(crc, val, res);
++  __ nor(res, crc, R0); // ~crc
++}
++
++void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
++  COMMENT("emit_profile_type {");
++  Register obj = op->obj()->as_register();
++  Register tmp = op->tmp()->as_pointer_register();
++  Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
++  ciKlass* exact_klass = op->exact_klass();
++  intptr_t current_klass = op->current_klass();
++  bool not_null = op->not_null();
++  bool no_conflict = op->no_conflict();
++
++  Label update, next, none;
++
++  bool do_null = !not_null;
++  bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
++  bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
++
++  assert(do_null || do_update, "why are we here?");
++  assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
++  assert(mdo_addr.base() != SCR1, "wrong register");
++
++  __ verify_oop(obj);
++
++  if (tmp != obj) {
++    __ move(tmp, obj);
++  }
++  if (do_null) {
++    __ bnez(tmp, update);
++    if (!TypeEntries::was_null_seen(current_klass)) {
++      __ ld_ptr(SCR2, mdo_addr);
++      __ ori(SCR2, SCR2, TypeEntries::null_seen);
++      __ st_ptr(SCR2, mdo_addr);
++    }
++    if (do_update) {
++#ifndef ASSERT
++      __ b(next);
++    }
++#else
++      __ b(next);
++    }
++  } else {
++    __ bnez(tmp, update);
++    __ stop("unexpected null obj");
++#endif
++  }
++
++  __ bind(update);
++
++  if (do_update) {
++#ifdef ASSERT
++    if (exact_klass != NULL) {
++      Label ok;
++      __ load_klass(tmp, tmp);
++      __ mov_metadata(SCR1, exact_klass->constant_encoding());
++      __ XOR(SCR1, tmp, SCR1);
++      __ beqz(SCR1, ok);
++      __ stop("exact klass and actual klass differ");
++      __ bind(ok);
++    }
++#endif
++    if (!no_conflict) {
++      if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
++        if (exact_klass != NULL) {
++          __ mov_metadata(tmp, exact_klass->constant_encoding());
++        } else {
++          __ load_klass(tmp, tmp);
++        }
++
++        __ ld_ptr(SCR2, mdo_addr);
++        __ XOR(tmp, tmp, SCR2);
++        assert(TypeEntries::type_klass_mask == -4, "must be");
++        __ bstrpick_d(SCR1, tmp, 63, 2);
++        // klass seen before, nothing to do. The unknown bit may have been
++        // set already but no need to check.
++        __ beqz(SCR1, next);
++
++        __ andi(SCR1, tmp, TypeEntries::type_unknown);
++        __ bnez(SCR1, next); // already unknown. Nothing to do anymore.
++
++        if (TypeEntries::is_type_none(current_klass)) {
++          __ beqz(SCR2, none);
++          __ li(SCR1, (u1)TypeEntries::null_seen);
++          __ beq(SCR2, SCR1, none);
++          // There is a chance that the checks above (re-reading profiling
++          // data from memory) fail if another thread has just set the
++          // profiling to this obj's klass
++          membar_acquire();
++          __ ld_ptr(SCR2, mdo_addr);
++          __ XOR(tmp, tmp, SCR2);
++          assert(TypeEntries::type_klass_mask == -4, "must be");
++          __ bstrpick_d(SCR1, tmp, 63, 2);
++          __ beqz(SCR1, next);
++        }
++      } else {
++        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
++               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
++
++        __ ld_ptr(tmp, mdo_addr);
++        __ andi(SCR2, tmp, TypeEntries::type_unknown);
++        __ bnez(SCR2, next); // already unknown. Nothing to do anymore.
++      }
++
++      // different than before. Cannot keep accurate profile.
++      __ ld_ptr(SCR2, mdo_addr);
++      __ ori(SCR2, SCR2, TypeEntries::type_unknown);
++      __ st_ptr(SCR2, mdo_addr);
++
++      if (TypeEntries::is_type_none(current_klass)) {
++        __ b(next);
++
++        __ bind(none);
++        // first time here. Set profile type.
++        __ st_ptr(tmp, mdo_addr);
++      }
++    } else {
++      // There's a single possible klass at this profile point
++      assert(exact_klass != NULL, "should be");
++      if (TypeEntries::is_type_none(current_klass)) {
++        __ mov_metadata(tmp, exact_klass->constant_encoding());
++        __ ld_ptr(SCR2, mdo_addr);
++        __ XOR(tmp, tmp, SCR2);
++        assert(TypeEntries::type_klass_mask == -4, "must be");
++        __ bstrpick_d(SCR1, tmp, 63, 2);
++        __ beqz(SCR1, next);
++#ifdef ASSERT
++        {
++          Label ok;
++          __ ld_ptr(SCR1, mdo_addr);
++          __ beqz(SCR1, ok);
++          __ li(SCR2, (u1)TypeEntries::null_seen);
++          __ beq(SCR1, SCR2, ok);
++          // may have been set by another thread
++          membar_acquire();
++          __ mov_metadata(SCR1, exact_klass->constant_encoding());
++          __ ld_ptr(SCR2, mdo_addr);
++          __ XOR(SCR2, SCR1, SCR2);
++          assert(TypeEntries::type_mask == -2, "must be");
++          __ bstrpick_d(SCR2, SCR2, 63, 1);
++          __ beqz(SCR2, ok);
++
++          __ stop("unexpected profiling mismatch");
++          __ bind(ok);
++        }
++#endif
++        // first time here. Set profile type.
++        __ st_ptr(tmp, mdo_addr);
++      } else {
++        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
++               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
++
++        __ ld_ptr(tmp, mdo_addr);
++        __ andi(SCR1, tmp, TypeEntries::type_unknown);
++        __ bnez(SCR1, next); // already unknown. Nothing to do anymore.
++
++        __ ori(tmp, tmp, TypeEntries::type_unknown);
++        __ st_ptr(tmp, mdo_addr);
++        // FIXME: Write barrier needed here?
++      }
++    }
++
++    __ bind(next);
++  }
++  COMMENT("} emit_profile_type");
++}
++
++void LIR_Assembler::align_backward_branch_target() {}
++
++void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
++  // tmp must be unused
++  assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
++
++  if (left->is_single_cpu()) {
++    assert(dest->is_single_cpu(), "expect single result reg");
++    __ sub_w(dest->as_register(), R0, left->as_register());
++  } else if (left->is_double_cpu()) {
++    assert(dest->is_double_cpu(), "expect double result reg");
++    __ sub_d(dest->as_register_lo(), R0, left->as_register_lo());
++  } else if (left->is_single_fpu()) {
++    assert(dest->is_single_fpu(), "expect single float result reg");
++    __ fneg_s(dest->as_float_reg(), left->as_float_reg());
++  } else {
++    assert(left->is_double_fpu(), "expect double float operand reg");
++    assert(dest->is_double_fpu(), "expect double float result reg");
++    __ fneg_d(dest->as_double_reg(), left->as_double_reg());
++  }
++}
++
++void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code,
++                         CodeEmitInfo* info) {
++  if (patch_code != lir_patch_none) {
++    deoptimize_trap(info);
++    return;
++  }
++
++  __ lea(dest->as_register_lo(), as_Address(addr->as_address_ptr()));
++}
++
++void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args,
++                            LIR_Opr tmp, CodeEmitInfo* info) {
++  assert(!tmp->is_valid(), "don't need temporary");
++  __ call(dest, relocInfo::runtime_call_type);
++  if (info != NULL) {
++    add_call_info_here(info);
++  }
++}
++
++void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type,
++                                     CodeEmitInfo* info) {
++  if (dest->is_address() || src->is_address()) {
++    move_op(src, dest, type, lir_patch_none, info,
++            /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++#ifdef ASSERT
++// emit run-time assertion
++void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
++  assert(op->code() == lir_assert, "must be");
++  Label ok;
++
++  if (op->in_opr1()->is_valid()) {
++    assert(op->in_opr2()->is_valid(), "both operands must be valid");
++    assert(op->in_opr1()->is_cpu_register() || op->in_opr2()->is_cpu_register(), "must be");
++    Register reg1 = as_reg(op->in_opr1());
++    Register reg2 = as_reg(op->in_opr2());
++    switch (op->condition()) {
++      case lir_cond_equal:        __  beq(reg1, reg2, ok); break;
++      case lir_cond_notEqual:     __  bne(reg1, reg2, ok); break;
++      case lir_cond_less:         __  blt(reg1, reg2, ok); break;
++      case lir_cond_lessEqual:    __  bge(reg2, reg1, ok); break;
++      case lir_cond_greaterEqual: __  bge(reg1, reg2, ok); break;
++      case lir_cond_greater:      __  blt(reg2, reg1, ok); break;
++      case lir_cond_belowEqual:   __ bgeu(reg2, reg1, ok); break;
++      case lir_cond_aboveEqual:   __ bgeu(reg1, reg2, ok); break;
++      default:                    ShouldNotReachHere();
++    }
++  } else {
++    assert(op->in_opr2()->is_illegal(), "both operands must be illegal");
++    assert(op->condition() == lir_cond_always, "no other conditions allowed");
++  }
++  if (op->halt()) {
++    const char* str = __ code_string(op->msg());
++    __ stop(str);
++  } else {
++    breakpoint();
++  }
++  __ bind(ok);
++}
++#endif
++
++#ifndef PRODUCT
++#define COMMENT(x) do { __ block_comment(x); } while (0)
++#else
++#define COMMENT(x)
++#endif
++
++void LIR_Assembler::membar() {
++  COMMENT("membar");
++  __ membar(Assembler::AnyAny);
++}
++
++void LIR_Assembler::membar_acquire() {
++  __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore));
++}
++
++void LIR_Assembler::membar_release() {
++  __ membar(Assembler::Membar_mask_bits(Assembler::LoadStore|Assembler::StoreStore));
++}
++
++void LIR_Assembler::membar_loadload() {
++  __ membar(Assembler::LoadLoad);
++}
++
++void LIR_Assembler::membar_storestore() {
++  __ membar(MacroAssembler::StoreStore);
++}
++
++void LIR_Assembler::membar_loadstore() {
++  __ membar(MacroAssembler::LoadStore);
++}
++
++void LIR_Assembler::membar_storeload() {
++  __ membar(MacroAssembler::StoreLoad);
++}
++
++void LIR_Assembler::on_spin_wait() {
++  Unimplemented();
++}
++
++void LIR_Assembler::get_thread(LIR_Opr result_reg) {
++  __ move(result_reg->as_register(), TREG);
++}
++
++void LIR_Assembler::peephole(LIR_List *lir) {
++}
++
++void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data,
++                              LIR_Opr dest, LIR_Opr tmp_op) {
++  Address addr = as_Address(src->as_address_ptr());
++  BasicType type = src->type();
++  Register dst = as_reg(dest);
++  Register tmp = as_reg(tmp_op);
++  bool is_oop = is_reference_type(type);
++
++  if (Assembler::is_simm(addr.disp(), 12)) {
++    __ addi_d(tmp, addr.base(), addr.disp());
++  } else {
++    __ li(tmp, addr.disp());
++    __ add_d(tmp, addr.base(), tmp);
++  }
++  if (addr.index() != noreg) {
++    if (addr.scale() > Address::times_1)
++      __ alsl_d(tmp, addr.index(), tmp, addr.scale() - 1);
++    else
++      __ add_d(tmp, tmp, addr.index());
++  }
++
++  switch(type) {
++  case T_INT:
++    break;
++  case T_LONG:
++    break;
++  case T_OBJECT:
++  case T_ARRAY:
++    if (UseCompressedOops) {
++      // unsigned int
++    } else {
++      // long
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++
++  if (code == lir_xadd) {
++    Register inc = noreg;
++    if (data->is_constant()) {
++      inc = SCR1;
++      __ li(inc, as_long(data));
++    } else {
++      inc = as_reg(data);
++    }
++    switch(type) {
++    case T_INT:
++      __ amadd_db_w(dst, inc, tmp);
++      break;
++    case T_LONG:
++      __ amadd_db_d(dst, inc, tmp);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      if (UseCompressedOops) {
++        __ amadd_db_w(dst, inc, tmp);
++        __ lu32i_d(dst, 0);
++      } else {
++        __ amadd_db_d(dst, inc, tmp);
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++  } else if (code == lir_xchg) {
++    Register obj = as_reg(data);
++    if (is_oop && UseCompressedOops) {
++      __ encode_heap_oop(SCR2, obj);
++      obj = SCR2;
++    }
++    switch(type) {
++    case T_INT:
++      __ amswap_db_w(dst, obj, tmp);
++      break;
++    case T_LONG:
++      __ amswap_db_d(dst, obj, tmp);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      if (UseCompressedOops) {
++        __ amswap_db_w(dst, obj, tmp);
++        __ lu32i_d(dst, 0);
++      } else {
++        __ amswap_db_d(dst, obj, tmp);
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++    if (is_oop && UseCompressedOops) {
++      __ decode_heap_oop(dst);
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++#undef __
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp	2024-01-30 10:00:11.834765144 +0800
+@@ -0,0 +1,83 @@
++/*
++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP
++
++// ArrayCopyStub needs access to bailout
++friend class ArrayCopyStub;
++
++ private:
++  int array_element_size(BasicType type) const;
++
++  void arith_fpu_implementation(LIR_Code code, int left_index, int right_index,
++                                int dest_index, bool pop_fpu_stack);
++
++  // helper functions which checks for overflow and sets bailout if it
++  // occurs.  Always returns a valid embeddable pointer but in the
++  // bailout case the pointer won't be to unique storage.
++  address float_constant(float f);
++  address double_constant(double d);
++
++  address int_constant(jlong n);
++
++  bool is_literal_address(LIR_Address* addr);
++
++  // Ensure we have a valid Address (base+offset) to a stack-slot.
++  Address stack_slot_address(int index, uint shift, int adjust = 0);
++
++  // Record the type of the receiver in ReceiverTypeData
++  void type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
++                           Register recv, Label* update_done);
++  void add_debug_info_for_branch(address adr, CodeEmitInfo* info);
++
++  void casw(Register addr, Register newval, Register cmpval, bool sign);
++  void casl(Register addr, Register newval, Register cmpval);
++
++  void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL);
++
++  static const int max_tableswitches = 20;
++  struct tableswitch switches[max_tableswitches];
++  int tableswitch_count;
++
++  void init() { tableswitch_count = 0; }
++
++  void deoptimize_trap(CodeEmitInfo *info);
++
++  enum {
++    // call stub: CompiledStaticCall::to_interp_stub_size() +
++    //            CompiledStaticCall::to_trampoline_stub_size()
++    _call_stub_size = 13 * NativeInstruction::nop_instruction_size,
++    _call_aot_stub_size = 0,
++    _exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
++    _deopt_handler_size = 7 * NativeInstruction::nop_instruction_size
++  };
++
++public:
++  void store_parameter(Register r, int offset_from_sp_in_words);
++  void store_parameter(jint c,     int offset_from_sp_in_words);
++  void store_parameter(jobject c,  int offset_from_sp_in_words);
++
++#endif // CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp	2024-01-30 10:00:11.834765144 +0800
+@@ -0,0 +1,1396 @@
++/*
++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "c1/c1_Compilation.hpp"
++#include "c1/c1_FrameMap.hpp"
++#include "c1/c1_Instruction.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_LIRGenerator.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "c1/c1_ValueStack.hpp"
++#include "ci/ciArray.hpp"
++#include "ci/ciObjArrayKlass.hpp"
++#include "ci/ciTypeArrayKlass.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#ifdef ASSERT
++#define __ gen()->lir(__FILE__, __LINE__)->
++#else
++#define __ gen()->lir()->
++#endif
++
++// Item will be loaded into a byte register; Intel only
++void LIRItem::load_byte_item() {
++  load_item();
++}
++
++void LIRItem::load_nonconstant() {
++  LIR_Opr r = value()->operand();
++  if (r->is_constant()) {
++    _result = r;
++  } else {
++    load_item();
++  }
++}
++
++//--------------------------------------------------------------
++//               LIRGenerator
++//--------------------------------------------------------------
++
++LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::a0_oop_opr; }
++LIR_Opr LIRGenerator::exceptionPcOpr()  { return FrameMap::a1_opr; }
++LIR_Opr LIRGenerator::divInOpr()        { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::divOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::remOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::shiftCountOpr()   { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::syncLockOpr()     { return new_register(T_INT); }
++LIR_Opr LIRGenerator::syncTempOpr()     { return FrameMap::a0_opr; }
++LIR_Opr LIRGenerator::getThreadTemp()   { return LIR_OprFact::illegalOpr; }
++
++LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
++  LIR_Opr opr;
++  switch (type->tag()) {
++    case intTag:    opr = FrameMap::a0_opr;          break;
++    case objectTag: opr = FrameMap::a0_oop_opr;      break;
++    case longTag:   opr = FrameMap::long0_opr;       break;
++    case floatTag:  opr = FrameMap::fpu0_float_opr;  break;
++    case doubleTag: opr = FrameMap::fpu0_double_opr; break;
++    case addressTag:
++    default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
++  }
++
++  assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
++  return opr;
++}
++
++LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
++  LIR_Opr reg = new_register(T_INT);
++  set_vreg_flag(reg, LIRGenerator::byte_reg);
++  return reg;
++}
++
++//--------- loading items into registers --------------------------------
++
++bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
++  if (v->type()->as_IntConstant() != NULL) {
++    return v->type()->as_IntConstant()->value() == 0L;
++  } else if (v->type()->as_LongConstant() != NULL) {
++    return v->type()->as_LongConstant()->value() == 0L;
++  } else if (v->type()->as_ObjectConstant() != NULL) {
++    return v->type()->as_ObjectConstant()->value()->is_null_object();
++  } else {
++    return false;
++  }
++}
++
++bool LIRGenerator::can_inline_as_constant(Value v) const {
++  // FIXME: Just a guess
++  if (v->type()->as_IntConstant() != NULL) {
++    return Assembler::is_simm(v->type()->as_IntConstant()->value(), 12);
++  } else if (v->type()->as_LongConstant() != NULL) {
++    return v->type()->as_LongConstant()->value() == 0L;
++  } else if (v->type()->as_ObjectConstant() != NULL) {
++    return v->type()->as_ObjectConstant()->value()->is_null_object();
++  } else {
++    return false;
++  }
++}
++
++bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; }
++
++LIR_Opr LIRGenerator::safepoint_poll_register() {
++  return LIR_OprFact::illegalOpr;
++}
++
++LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
++                                            int shift, int disp, BasicType type) {
++  assert(base->is_register(), "must be");
++  intx large_disp = disp;
++
++  // accumulate fixed displacements
++  if (index->is_constant()) {
++    LIR_Const *constant = index->as_constant_ptr();
++    if (constant->type() == T_INT) {
++      large_disp += index->as_jint() << shift;
++    } else {
++      assert(constant->type() == T_LONG, "should be");
++      jlong c = index->as_jlong() << shift;
++      if ((jlong)((jint)c) == c) {
++        large_disp += c;
++        index = LIR_OprFact::illegalOpr;
++      } else {
++        LIR_Opr tmp = new_register(T_LONG);
++        __ move(index, tmp);
++        index = tmp;
++        // apply shift and displacement below
++      }
++    }
++  }
++
++  if (index->is_register()) {
++    // apply the shift and accumulate the displacement
++    if (shift > 0) {
++      LIR_Opr tmp = new_pointer_register();
++      __ shift_left(index, shift, tmp);
++      index = tmp;
++    }
++    if (large_disp != 0) {
++      LIR_Opr tmp = new_pointer_register();
++      if (Assembler::is_simm(large_disp, 12)) {
++        __ add(index, LIR_OprFact::intptrConst(large_disp), tmp);
++        index = tmp;
++      } else {
++        __ move(LIR_OprFact::intptrConst(large_disp), tmp);
++        __ add(tmp, index, tmp);
++        index = tmp;
++      }
++      large_disp = 0;
++    }
++  } else if (large_disp != 0 && !Assembler::is_simm(large_disp, 12)) {
++    // index is illegal so replace it with the displacement loaded into a register
++    index = new_pointer_register();
++    __ move(LIR_OprFact::intptrConst(large_disp), index);
++    large_disp = 0;
++  }
++
++  // at this point we either have base + index or base + displacement
++  if (large_disp == 0 && index->is_register()) {
++    return new LIR_Address(base, index, type);
++  } else {
++    assert(Assembler::is_simm(large_disp, 12), "must be");
++    return new LIR_Address(base, large_disp, type);
++  }
++}
++
++LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, BasicType type) {
++  int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
++  int elem_size = type2aelembytes(type);
++  int shift = exact_log2(elem_size);
++
++  LIR_Address* addr;
++  if (index_opr->is_constant()) {
++    addr = new LIR_Address(array_opr, offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type);
++  } else {
++    if (offset_in_bytes) {
++      LIR_Opr tmp = new_pointer_register();
++      __ add(array_opr, LIR_OprFact::intConst(offset_in_bytes), tmp);
++      array_opr = tmp;
++      offset_in_bytes = 0;
++    }
++    addr =  new LIR_Address(array_opr, index_opr, LIR_Address::scale(type), offset_in_bytes, type);
++  }
++  return addr;
++}
++
++LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
++  LIR_Opr r;
++  if (type == T_LONG) {
++    r = LIR_OprFact::longConst(x);
++    if (!Assembler::is_simm(x, 12)) {
++      LIR_Opr tmp = new_register(type);
++      __ move(r, tmp);
++      return tmp;
++    }
++  } else if (type == T_INT) {
++    r = LIR_OprFact::intConst(x);
++    if (!Assembler::is_simm(x, 12)) {
++      // This is all rather nasty.  We don't know whether our constant
++      // is required for a logical or an arithmetic operation, wo we
++      // don't know what the range of valid values is!!
++      LIR_Opr tmp = new_register(type);
++      __ move(r, tmp);
++      return tmp;
++    }
++  } else {
++    ShouldNotReachHere();
++    r = NULL;  // unreachable
++  }
++  return r;
++}
++
++void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
++  LIR_Opr pointer = new_pointer_register();
++  __ move(LIR_OprFact::intptrConst(counter), pointer);
++  LIR_Address* addr = new LIR_Address(pointer, type);
++  increment_counter(addr, step);
++}
++
++void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
++  LIR_Opr imm = NULL;
++  switch(addr->type()) {
++  case T_INT:
++    imm = LIR_OprFact::intConst(step);
++    break;
++  case T_LONG:
++    imm = LIR_OprFact::longConst(step);
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++  LIR_Opr reg = new_register(addr->type());
++  __ load(addr, reg);
++  __ add(reg, imm, reg);
++  __ store(reg, addr);
++}
++
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base,
++                                      int disp, int c, T tgt, CodeEmitInfo* info) {
++  LIR_Opr reg = new_register(T_INT);
++  __ load(generate_address(base, disp, T_INT), reg, info);
++  __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
++
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base,
++                                      int disp, BasicType type, T tgt, CodeEmitInfo* info) {
++  LIR_Opr reg1 = new_register(T_INT);
++  __ load(generate_address(base, disp, type), reg1, info);
++  __ cmp_branch(condition, reg, reg1, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
++
++bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
++  if (is_power_of_2(c - 1)) {
++    __ shift_left(left, exact_log2(c - 1), tmp);
++    __ add(tmp, left, result);
++    return true;
++  } else if (is_power_of_2(c + 1)) {
++    __ shift_left(left, exact_log2(c + 1), tmp);
++    __ sub(tmp, left, result);
++    return true;
++  } else {
++    return false;
++  }
++}
++
++void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) {
++  BasicType type = item->type();
++  __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type));
++}
++
++void LIRGenerator::array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info,
++                                     ciMethod* profiled_method, int profiled_bci) {
++    LIR_Opr tmp1 = new_register(objectType);
++    LIR_Opr tmp2 = new_register(objectType);
++    LIR_Opr tmp3 = new_register(objectType);
++    __ store_check(value, array, tmp1, tmp2, tmp3, store_check_info, profiled_method, profiled_bci);
++}
++
++//----------------------------------------------------------------------
++//             visitor functions
++//----------------------------------------------------------------------
++
++void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
++  assert(x->is_pinned(),"");
++  LIRItem obj(x->obj(), this);
++  obj.load_item();
++
++  set_no_result(x);
++
++  // "lock" stores the address of the monitor stack slot, so this is not an oop
++  LIR_Opr lock = new_register(T_INT);
++  // Need a scratch register for biased locking
++  LIR_Opr scratch = LIR_OprFact::illegalOpr;
++  if (UseBiasedLocking) {
++    scratch = new_register(T_INT);
++  }
++
++  CodeEmitInfo* info_for_exception = NULL;
++  if (x->needs_null_check()) {
++    info_for_exception = state_for(x);
++  }
++  // this CodeEmitInfo must not have the xhandlers because here the
++  // object is already locked (xhandlers expect object to be unlocked)
++  CodeEmitInfo* info = state_for(x, x->state(), true);
++  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
++                x->monitor_no(), info_for_exception, info);
++}
++
++void LIRGenerator::do_MonitorExit(MonitorExit* x) {
++  assert(x->is_pinned(),"");
++
++  LIRItem obj(x->obj(), this);
++  obj.dont_load_item();
++
++  LIR_Opr lock = new_register(T_INT);
++  LIR_Opr obj_temp = new_register(T_INT);
++  set_no_result(x);
++  monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no());
++}
++
++void LIRGenerator::do_NegateOp(NegateOp* x) {
++  LIRItem from(x->x(), this);
++  from.load_item();
++  LIR_Opr result = rlock_result(x);
++  __ negate (from.result(), result);
++}
++
++// for  _fadd, _fmul, _fsub, _fdiv, _frem
++//      _dadd, _dmul, _dsub, _ddiv, _drem
++void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
++  if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) {
++    // float remainder is implemented as a direct call into the runtime
++    LIRItem right(x->x(), this);
++    LIRItem left(x->y(), this);
++
++    BasicTypeList signature(2);
++    if (x->op() == Bytecodes::_frem) {
++      signature.append(T_FLOAT);
++      signature.append(T_FLOAT);
++    } else {
++      signature.append(T_DOUBLE);
++      signature.append(T_DOUBLE);
++    }
++    CallingConvention* cc = frame_map()->c_calling_convention(&signature);
++
++    const LIR_Opr result_reg = result_register_for(x->type());
++    left.load_item_force(cc->at(1));
++    right.load_item();
++
++    __ move(right.result(), cc->at(0));
++
++    address entry;
++    if (x->op() == Bytecodes::_frem) {
++      entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
++    } else {
++      entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
++    }
++
++    LIR_Opr result = rlock_result(x);
++    __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args());
++    __ move(result_reg, result);
++    return;
++  }
++
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++  LIRItem* left_arg  = &left;
++  LIRItem* right_arg = &right;
++
++  // Always load right hand side.
++  right.load_item();
++
++  if (!left.is_register())
++    left.load_item();
++
++  LIR_Opr reg = rlock(x);
++
++  arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp());
++
++  set_result(x, round_item(reg));
++}
++
++// for  _ladd, _lmul, _lsub, _ldiv, _lrem
++void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
++  // missing test if instr is commutative and if we should swap
++  LIRItem left(x->x(), this);
++  LIRItem right(x->y(), this);
++
++  if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
++    left.load_item();
++    bool need_zero_check = true;
++    if (right.is_constant()) {
++      jlong c = right.get_jlong_constant();
++      // no need to do div-by-zero check if the divisor is a non-zero constant
++      if (c != 0) need_zero_check = false;
++      // do not load right if the divisor is a power-of-2 constant
++      if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) {
++        right.dont_load_item();
++      } else {
++        right.load_item();
++      }
++    } else {
++      right.load_item();
++    }
++    if (need_zero_check) {
++      CodeEmitInfo* info = state_for(x);
++      CodeStub* stub = new DivByZeroStub(info);
++      __ cmp_branch(lir_cond_equal, right.result(), LIR_OprFact::longConst(0), T_LONG, stub);
++    }
++
++    rlock_result(x);
++    switch (x->op()) {
++    case Bytecodes::_lrem:
++      __ rem (left.result(), right.result(), x->operand());
++      break;
++    case Bytecodes::_ldiv:
++      __ div (left.result(), right.result(), x->operand());
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  } else {
++    assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub,
++           "expect lmul, ladd or lsub");
++    // add, sub, mul
++    left.load_item();
++    if (!right.is_register()) {
++      if (x->op() == Bytecodes::_lmul || !right.is_constant() ||
++          (x->op() == Bytecodes::_ladd && !Assembler::is_simm(right.get_jlong_constant(), 12)) ||
++          (x->op() == Bytecodes::_lsub && !Assembler::is_simm(-right.get_jlong_constant(), 12))) {
++        right.load_item();
++      } else { // add, sub
++        assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub");
++        // don't load constants to save register
++        right.load_nonconstant();
++      }
++    }
++    rlock_result(x);
++    arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
++  }
++}
++
++// for: _iadd, _imul, _isub, _idiv, _irem
++void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
++  // Test if instr is commutative and if we should swap
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++  LIRItem* left_arg = &left;
++  LIRItem* right_arg = &right;
++  if (x->is_commutative() && left.is_stack() && right.is_register()) {
++    // swap them if left is real stack (or cached) and right is real register(not cached)
++    left_arg = &right;
++    right_arg = &left;
++  }
++
++  left_arg->load_item();
++
++  // do not need to load right, as we can handle stack and constants
++  if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) {
++    rlock_result(x);
++    bool need_zero_check = true;
++    if (right.is_constant()) {
++      jint c = right.get_jint_constant();
++      // no need to do div-by-zero check if the divisor is a non-zero constant
++      if (c != 0) need_zero_check = false;
++      // do not load right if the divisor is a power-of-2 constant
++      if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) {
++        right_arg->dont_load_item();
++      } else {
++        right_arg->load_item();
++      }
++    } else {
++      right_arg->load_item();
++    }
++    if (need_zero_check) {
++      CodeEmitInfo* info = state_for(x);
++      CodeStub* stub = new DivByZeroStub(info);
++      __ cmp_branch(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0), T_INT, stub);
++    }
++
++    LIR_Opr ill = LIR_OprFact::illegalOpr;
++    if (x->op() == Bytecodes::_irem) {
++      __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
++    } else if (x->op() == Bytecodes::_idiv) {
++      __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
++    }
++  } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) {
++    if (right.is_constant() &&
++        ((x->op() == Bytecodes::_iadd && Assembler::is_simm(right.get_jint_constant(), 12)) ||
++         (x->op() == Bytecodes::_isub && Assembler::is_simm(-right.get_jint_constant(), 12)))) {
++      right.load_nonconstant();
++    } else {
++      right.load_item();
++    }
++    rlock_result(x);
++    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr);
++  } else {
++    assert (x->op() == Bytecodes::_imul, "expect imul");
++    if (right.is_constant()) {
++      jint c = right.get_jint_constant();
++      if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) {
++        right_arg->dont_load_item();
++      } else {
++        // Cannot use constant op.
++        right_arg->load_item();
++      }
++    } else {
++      right.load_item();
++    }
++    rlock_result(x);
++    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT));
++  }
++}
++
++void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
++  // when an operand with use count 1 is the left operand, then it is
++  // likely that no move for 2-operand-LIR-form is necessary
++  if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
++    x->swap_operands();
++  }
++
++  ValueTag tag = x->type()->tag();
++  assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
++  switch (tag) {
++    case floatTag:
++    case doubleTag: do_ArithmeticOp_FPU(x);  return;
++    case longTag:   do_ArithmeticOp_Long(x); return;
++    case intTag:    do_ArithmeticOp_Int(x);  return;
++    default:        ShouldNotReachHere();    return;
++  }
++}
++
++// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
++void LIRGenerator::do_ShiftOp(ShiftOp* x) {
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++
++  left.load_item();
++
++  rlock_result(x);
++  if (right.is_constant()) {
++    right.dont_load_item();
++    int c;
++    switch (x->op()) {
++      case Bytecodes::_ishl:
++        c = right.get_jint_constant() & 0x1f;
++        __ shift_left(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_ishr:
++        c = right.get_jint_constant() & 0x1f;
++        __ shift_right(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_iushr:
++        c = right.get_jint_constant() & 0x1f;
++        __ unsigned_shift_right(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_lshl:
++        c = right.get_jint_constant() & 0x3f;
++        __ shift_left(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_lshr:
++        c = right.get_jint_constant() & 0x3f;
++        __ shift_right(left.result(), c, x->operand());
++        break;
++      case Bytecodes::_lushr:
++        c = right.get_jint_constant() & 0x3f;
++        __ unsigned_shift_right(left.result(), c, x->operand());
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  } else {
++    right.load_item();
++    LIR_Opr tmp = new_register(T_INT);
++    switch (x->op()) {
++    case Bytecodes::_ishl:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
++      __ shift_left(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_ishr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
++      __ shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_iushr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
++      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_lshl:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
++      __ shift_left(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_lshr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
++      __ shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    case Bytecodes::_lushr:
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
++      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++  }
++}
++
++// _iand, _land, _ior, _lor, _ixor, _lxor
++void LIRGenerator::do_LogicOp(LogicOp* x) {
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++
++  left.load_item();
++
++  rlock_result(x);
++  if (right.is_constant()
++      && ((right.type()->tag() == intTag
++           && Assembler::is_uimm(right.get_jint_constant(), 12))
++          || (right.type()->tag() == longTag
++              && Assembler::is_uimm(right.get_jlong_constant(), 12)))) {
++    right.dont_load_item();
++  } else {
++    right.load_item();
++  }
++  switch (x->op()) {
++    case Bytecodes::_iand:
++    case Bytecodes::_land:
++      __ logical_and(left.result(), right.result(), x->operand()); break;
++    case Bytecodes::_ior:
++    case Bytecodes::_lor:
++      __ logical_or (left.result(), right.result(), x->operand()); break;
++    case Bytecodes::_ixor:
++    case Bytecodes::_lxor:
++      __ logical_xor(left.result(), right.result(), x->operand()); break;
++    default: Unimplemented();
++  }
++}
++
++// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
++void LIRGenerator::do_CompareOp(CompareOp* x) {
++  LIRItem left(x->x(), this);
++  LIRItem right(x->y(), this);
++  ValueTag tag = x->x()->type()->tag();
++  if (tag == longTag) {
++    left.set_destroys_register();
++  }
++  left.load_item();
++  right.load_item();
++  LIR_Opr reg = rlock_result(x);
++
++  if (x->x()->type()->is_float_kind()) {
++    Bytecodes::Code code = x->op();
++    __ fcmp2int(left.result(), right.result(), reg,
++                (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
++  } else if (x->x()->type()->tag() == longTag) {
++    __ lcmp2int(left.result(), right.result(), reg);
++  } else {
++    Unimplemented();
++  }
++}
++
++LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr,
++                                     LIRItem& cmp_value, LIRItem& new_value) {
++  LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience
++  new_value.load_item();
++  cmp_value.load_item();
++  LIR_Opr result = new_register(T_INT);
++  if (is_reference_type(type)) {
++    __ cas_obj(addr, cmp_value.result(), new_value.result(),
++               new_register(T_INT), new_register(T_INT), result);
++  } else if (type == T_INT) {
++    __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(),
++               new_value.result(), ill, ill);
++  } else if (type == T_LONG) {
++    __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(),
++                new_value.result(), ill, ill);
++  } else {
++    ShouldNotReachHere();
++    Unimplemented();
++  }
++  __ move(FrameMap::scr1_opr, result);
++  return result;
++}
++
++LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) {
++  bool is_oop = is_reference_type(type);
++  LIR_Opr result = new_register(type);
++  value.load_item();
++  assert(type == T_INT || is_oop || type == T_LONG , "unexpected type");
++  LIR_Opr tmp = new_register(T_INT);
++  __ xchg(addr, value.result(), result, tmp);
++  return result;
++}
++
++LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) {
++  LIR_Opr result = new_register(type);
++  value.load_item();
++  assert(type == T_INT || type == T_LONG , "unexpected type");
++  LIR_Opr tmp = new_register(T_INT);
++  __ xadd(addr, value.result(), result, tmp);
++  return result;
++}
++
++void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
++  assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow),
++         "wrong type");
++  if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
++      x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
++      x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
++      x->id() == vmIntrinsics::_dlog10) {
++    do_LibmIntrinsic(x);
++    return;
++  }
++  switch (x->id()) {
++    case vmIntrinsics::_dabs:
++    case vmIntrinsics::_dsqrt: {
++      assert(x->number_of_arguments() == 1, "wrong type");
++      LIRItem value(x->argument_at(0), this);
++      value.load_item();
++      LIR_Opr dst = rlock_result(x);
++
++      switch (x->id()) {
++        case vmIntrinsics::_dsqrt:
++          __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
++          break;
++        case vmIntrinsics::_dabs:
++          __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    }
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
++  LIRItem value(x->argument_at(0), this);
++  value.set_destroys_register();
++
++  LIR_Opr calc_result = rlock_result(x);
++  LIR_Opr result_reg = result_register_for(x->type());
++
++  CallingConvention* cc = NULL;
++
++  if (x->id() == vmIntrinsics::_dpow) {
++    LIRItem value1(x->argument_at(1), this);
++
++    value1.set_destroys_register();
++
++    BasicTypeList signature(2);
++    signature.append(T_DOUBLE);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
++    value1.load_item_force(cc->at(1));
++  } else {
++    BasicTypeList signature(1);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
++  }
++
++  switch (x->id()) {
++    case vmIntrinsics::_dexp:
++      if (StubRoutines::dexp() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dlog:
++      if (StubRoutines::dlog() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dlog10:
++      if (StubRoutines::dlog10() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dpow:
++      if (StubRoutines::dpow() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dsin:
++      if (StubRoutines::dsin() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dcos:
++      if (StubRoutines::dcos() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dtan:
++      if (StubRoutines::dtan() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    default:  ShouldNotReachHere();
++  }
++  __ move(result_reg, calc_result);
++}
++
++void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
++  Register j_rarg0 = RT0;
++  Register j_rarg1 = A0;
++  Register j_rarg2 = A1;
++  Register j_rarg3 = A2;
++  Register j_rarg4 = A3;
++  Register j_rarg5 = A4;
++
++  assert(x->number_of_arguments() == 5, "wrong type");
++
++  // Make all state_for calls early since they can emit code
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  LIRItem src(x->argument_at(0), this);
++  LIRItem src_pos(x->argument_at(1), this);
++  LIRItem dst(x->argument_at(2), this);
++  LIRItem dst_pos(x->argument_at(3), this);
++  LIRItem length(x->argument_at(4), this);
++
++  // operands for arraycopy must use fixed registers, otherwise
++  // LinearScan will fail allocation (because arraycopy always needs a
++  // call)
++
++  // The java calling convention will give us enough registers
++  // so that on the stub side the args will be perfect already.
++  // On the other slow/special case side we call C and the arg
++  // positions are not similar enough to pick one as the best.
++  // Also because the java calling convention is a "shifted" version
++  // of the C convention we can process the java args trivially into C
++  // args without worry of overwriting during the xfer
++
++  src.load_item_force     (FrameMap::as_oop_opr(j_rarg0));
++  src_pos.load_item_force (FrameMap::as_opr(j_rarg1));
++  dst.load_item_force     (FrameMap::as_oop_opr(j_rarg2));
++  dst_pos.load_item_force (FrameMap::as_opr(j_rarg3));
++  length.load_item_force  (FrameMap::as_opr(j_rarg4));
++
++  LIR_Opr tmp =           FrameMap::as_opr(j_rarg5);
++
++  set_no_result(x);
++
++  int flags;
++  ciArrayKlass* expected_type;
++  arraycopy_helper(x, &flags, &expected_type);
++
++  __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(),
++               length.result(), tmp, expected_type, flags, info); // does add_safepoint
++}
++
++void LIRGenerator::do_update_CRC32(Intrinsic* x) {
++  assert(UseCRC32Intrinsics, "why are we here?");
++  // Make all state_for calls early since they can emit code
++  LIR_Opr result = rlock_result(x);
++  int flags = 0;
++  switch (x->id()) {
++    case vmIntrinsics::_updateCRC32: {
++      LIRItem crc(x->argument_at(0), this);
++      LIRItem val(x->argument_at(1), this);
++      // val is destroyed by update_crc32
++      val.set_destroys_register();
++      crc.load_item();
++      val.load_item();
++      __ update_crc32(crc.result(), val.result(), result);
++      break;
++    }
++    case vmIntrinsics::_updateBytesCRC32:
++    case vmIntrinsics::_updateByteBufferCRC32: {
++      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
++
++      LIRItem crc(x->argument_at(0), this);
++      LIRItem buf(x->argument_at(1), this);
++      LIRItem off(x->argument_at(2), this);
++      LIRItem len(x->argument_at(3), this);
++      buf.load_item();
++      off.load_nonconstant();
++
++      LIR_Opr index = off.result();
++      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
++      if(off.result()->is_constant()) {
++        index = LIR_OprFact::illegalOpr;
++       offset += off.result()->as_jint();
++      }
++      LIR_Opr base_op = buf.result();
++
++      if (index->is_valid()) {
++        LIR_Opr tmp = new_register(T_LONG);
++        __ convert(Bytecodes::_i2l, index, tmp);
++        index = tmp;
++      }
++
++      if (offset) {
++        LIR_Opr tmp = new_pointer_register();
++        __ add(base_op, LIR_OprFact::intConst(offset), tmp);
++        base_op = tmp;
++        offset = 0;
++      }
++
++      LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE);
++      BasicTypeList signature(3);
++      signature.append(T_INT);
++      signature.append(T_ADDRESS);
++      signature.append(T_INT);
++      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
++      const LIR_Opr result_reg = result_register_for(x->type());
++
++      LIR_Opr addr = new_pointer_register();
++      __ leal(LIR_OprFact::address(a), addr);
++
++      crc.load_item_force(cc->at(0));
++      __ move(addr, cc->at(1));
++      len.load_item_force(cc->at(2));
++
++      __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args());
++      __ move(result_reg, result);
++
++      break;
++    }
++    default: {
++      ShouldNotReachHere();
++    }
++  }
++}
++
++void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
++  assert(UseCRC32CIntrinsics, "why are we here?");
++  // Make all state_for calls early since they can emit code
++  LIR_Opr result = rlock_result(x);
++  int flags = 0;
++  switch (x->id()) {
++    case vmIntrinsics::_updateBytesCRC32C:
++    case vmIntrinsics::_updateDirectByteBufferCRC32C: {
++      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C);
++      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
++
++      LIRItem crc(x->argument_at(0), this);
++      LIRItem buf(x->argument_at(1), this);
++      LIRItem off(x->argument_at(2), this);
++      LIRItem end(x->argument_at(3), this);
++
++      buf.load_item();
++      off.load_nonconstant();
++      end.load_nonconstant();
++
++      // len = end - off
++      LIR_Opr len  = end.result();
++      LIR_Opr tmpA = new_register(T_INT);
++      LIR_Opr tmpB = new_register(T_INT);
++      __ move(end.result(), tmpA);
++      __ move(off.result(), tmpB);
++      __ sub(tmpA, tmpB, tmpA);
++      len = tmpA;
++
++      LIR_Opr index = off.result();
++      if(off.result()->is_constant()) {
++        index = LIR_OprFact::illegalOpr;
++        offset += off.result()->as_jint();
++      }
++      LIR_Opr base_op = buf.result();
++
++      if (index->is_valid()) {
++        LIR_Opr tmp = new_register(T_LONG);
++        __ convert(Bytecodes::_i2l, index, tmp);
++        index = tmp;
++      }
++
++      if (offset) {
++        LIR_Opr tmp = new_pointer_register();
++        __ add(base_op, LIR_OprFact::intConst(offset), tmp);
++        base_op = tmp;
++        offset = 0;
++      }
++
++      LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE);
++      BasicTypeList signature(3);
++      signature.append(T_INT);
++      signature.append(T_ADDRESS);
++      signature.append(T_INT);
++      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
++      const LIR_Opr result_reg = result_register_for(x->type());
++
++      LIR_Opr addr = new_pointer_register();
++      __ leal(LIR_OprFact::address(a), addr);
++
++      crc.load_item_force(cc->at(0));
++      __ move(addr, cc->at(1));
++      __ move(len, cc->at(2));
++
++      __ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), getThreadTemp(), result_reg, cc->args());
++      __ move(result_reg, result);
++
++      break;
++    }
++    default: {
++      ShouldNotReachHere();
++    }
++  }
++}
++
++void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
++  assert(x->number_of_arguments() == 3, "wrong type");
++  assert(UseFMA, "Needs FMA instructions support.");
++  LIRItem value(x->argument_at(0), this);
++  LIRItem value1(x->argument_at(1), this);
++  LIRItem value2(x->argument_at(2), this);
++
++  value.load_item();
++  value1.load_item();
++  value2.load_item();
++
++  LIR_Opr calc_input = value.result();
++  LIR_Opr calc_input1 = value1.result();
++  LIR_Opr calc_input2 = value2.result();
++  LIR_Opr calc_result = rlock_result(x);
++
++  switch (x->id()) {
++    case vmIntrinsics::_fmaD:
++      __ fmad(calc_input, calc_input1, calc_input2, calc_result);
++      break;
++    case vmIntrinsics::_fmaF:
++      __ fmaf(calc_input, calc_input1, calc_input2, calc_result);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
++  fatal("vectorizedMismatch intrinsic is not implemented on this platform");
++}
++
++// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
++// _i2b, _i2c, _i2s
++void LIRGenerator::do_Convert(Convert* x) {
++  LIRItem value(x->value(), this);
++  value.load_item();
++  LIR_Opr input = value.result();
++  LIR_Opr result = rlock(x);
++
++  // arguments of lir_convert
++  LIR_Opr conv_input = input;
++  LIR_Opr conv_result = result;
++
++  switch (x->op()) {
++    case Bytecodes::_f2i:
++    case Bytecodes::_f2l:
++      __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_FLOAT));
++      break;
++    case Bytecodes::_d2i:
++    case Bytecodes::_d2l:
++      __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_DOUBLE));
++      break;
++    default:
++      __ convert(x->op(), conv_input, conv_result);
++      break;
++  }
++
++  assert(result->is_virtual(), "result must be virtual register");
++  set_result(x, result);
++}
++
++void LIRGenerator::do_NewInstance(NewInstance* x) {
++#ifndef PRODUCT
++  if (PrintNotLoaded && !x->klass()->is_loaded()) {
++    tty->print_cr("   ###class not loaded at new bci %d", x->printable_bci());
++  }
++#endif
++  CodeEmitInfo* info = state_for(x, x->state());
++  LIR_Opr reg = result_register_for(x->type());
++  new_instance(reg, x->klass(), x->is_unresolved(),
++                       FrameMap::t0_oop_opr,
++                       FrameMap::t1_oop_opr,
++                       FrameMap::a4_oop_opr,
++                       LIR_OprFact::illegalOpr,
++                       FrameMap::a3_metadata_opr, info);
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  LIRItem length(x->length(), this);
++  length.load_item_force(FrameMap::s0_opr);
++
++  LIR_Opr reg = result_register_for(x->type());
++  LIR_Opr tmp1 = FrameMap::t0_oop_opr;
++  LIR_Opr tmp2 = FrameMap::t1_oop_opr;
++  LIR_Opr tmp3 = FrameMap::a5_oop_opr;
++  LIR_Opr tmp4 = reg;
++  LIR_Opr klass_reg = FrameMap::a3_metadata_opr;
++  LIR_Opr len = length.result();
++  BasicType elem_type = x->elt_type();
++
++  __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
++
++  CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
++  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
++
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
++  LIRItem length(x->length(), this);
++  // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction
++  // and therefore provide the state before the parameters have been consumed
++  CodeEmitInfo* patching_info = NULL;
++  if (!x->klass()->is_loaded() || PatchALot) {
++    patching_info =  state_for(x, x->state_before());
++  }
++
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  LIR_Opr reg = result_register_for(x->type());
++  LIR_Opr tmp1 = FrameMap::t0_oop_opr;
++  LIR_Opr tmp2 = FrameMap::t1_oop_opr;
++  LIR_Opr tmp3 = FrameMap::a5_oop_opr;
++  LIR_Opr tmp4 = reg;
++  LIR_Opr klass_reg = FrameMap::a3_metadata_opr;
++
++  length.load_item_force(FrameMap::s0_opr);
++  LIR_Opr len = length.result();
++
++  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
++  ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass());
++  if (obj == ciEnv::unloaded_ciobjarrayklass()) {
++    BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
++  }
++  klass2reg_with_patching(klass_reg, obj, patching_info);
++  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
++
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
++  Values* dims = x->dims();
++  int i = dims->length();
++  LIRItemList* items = new LIRItemList(i, i, NULL);
++  while (i-- > 0) {
++    LIRItem* size = new LIRItem(dims->at(i), this);
++    items->at_put(i, size);
++  }
++
++  // Evaluate state_for early since it may emit code.
++  CodeEmitInfo* patching_info = NULL;
++  if (!x->klass()->is_loaded() || PatchALot) {
++    patching_info = state_for(x, x->state_before());
++
++    // Cannot re-use same xhandlers for multiple CodeEmitInfos, so
++    // clone all handlers (NOTE: Usually this is handled transparently
++    // by the CodeEmitInfo cloning logic in CodeStub constructors but
++    // is done explicitly here because a stub isn't being used).
++    x->set_exception_handlers(new XHandlers(x->exception_handlers()));
++  }
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  i = dims->length();
++  while (i-- > 0) {
++    LIRItem* size = items->at(i);
++    size->load_item();
++
++    store_stack_parameter(size->result(), in_ByteSize(i*4));
++  }
++
++  LIR_Opr klass_reg = FrameMap::a0_metadata_opr;
++  klass2reg_with_patching(klass_reg, x->klass(), patching_info);
++
++  LIR_Opr rank = FrameMap::s0_opr;
++  __ move(LIR_OprFact::intConst(x->rank()), rank);
++  LIR_Opr varargs = FrameMap::a2_opr;
++  __ move(FrameMap::sp_opr, varargs);
++  LIR_OprList* args = new LIR_OprList(3);
++  args->append(klass_reg);
++  args->append(rank);
++  args->append(varargs);
++  LIR_Opr reg = result_register_for(x->type());
++  __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
++                  LIR_OprFact::illegalOpr,
++                  reg, args, info);
++
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_BlockBegin(BlockBegin* x) {
++  // nothing to do for now
++}
++
++void LIRGenerator::do_CheckCast(CheckCast* x) {
++  LIRItem obj(x->obj(), this);
++
++  CodeEmitInfo* patching_info = NULL;
++  if (!x->klass()->is_loaded() ||
++      (PatchALot && !x->is_incompatible_class_change_check() &&
++       !x->is_invokespecial_receiver_check())) {
++    // must do this before locking the destination register as an oop register,
++    // and before the obj is loaded (the latter is for deoptimization)
++    patching_info = state_for(x, x->state_before());
++  }
++  obj.load_item();
++
++  // info for exceptions
++  CodeEmitInfo* info_for_exception =
++      (x->needs_exception_state() ? state_for(x) :
++                                    state_for(x, x->state_before(), true /*ignore_xhandler*/));
++
++  CodeStub* stub;
++  if (x->is_incompatible_class_change_check()) {
++    assert(patching_info == NULL, "can't patch this");
++    stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id,
++                                   LIR_OprFact::illegalOpr, info_for_exception);
++  } else if (x->is_invokespecial_receiver_check()) {
++    assert(patching_info == NULL, "can't patch this");
++    stub = new DeoptimizeStub(info_for_exception,
++                              Deoptimization::Reason_class_check,
++                              Deoptimization::Action_none);
++  } else {
++    stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id,
++                                   obj.result(), info_for_exception);
++  }
++  LIR_Opr reg = rlock_result(x);
++  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
++  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
++    tmp3 = new_register(objectType);
++  }
++  __ checkcast(reg, obj.result(), x->klass(),
++               new_register(objectType), new_register(objectType), tmp3,
++               x->direct_compare(), info_for_exception, patching_info, stub,
++               x->profiled_method(), x->profiled_bci());
++}
++
++void LIRGenerator::do_InstanceOf(InstanceOf* x) {
++  LIRItem obj(x->obj(), this);
++
++  // result and test object may not be in same register
++  LIR_Opr reg = rlock_result(x);
++  CodeEmitInfo* patching_info = NULL;
++  if ((!x->klass()->is_loaded() || PatchALot)) {
++    // must do this before locking the destination register as an oop register
++    patching_info = state_for(x, x->state_before());
++  }
++  obj.load_item();
++  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
++  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
++    tmp3 = new_register(objectType);
++  }
++  __ instanceof(reg, obj.result(), x->klass(),
++                new_register(objectType), new_register(objectType), tmp3,
++                x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci());
++}
++
++void LIRGenerator::do_If(If* x) {
++  assert(x->number_of_sux() == 2, "inconsistency");
++  ValueTag tag = x->x()->type()->tag();
++  bool is_safepoint = x->is_safepoint();
++
++  If::Condition cond = x->cond();
++
++  LIRItem xitem(x->x(), this);
++  LIRItem yitem(x->y(), this);
++  LIRItem* xin = &xitem;
++  LIRItem* yin = &yitem;
++
++  if (tag == longTag) {
++    // for longs, only conditions "eql", "neq", "lss", "geq" are valid;
++    // mirror for other conditions
++    if (cond == If::gtr || cond == If::leq) {
++      cond = Instruction::mirror(cond);
++      xin = &yitem;
++      yin = &xitem;
++    }
++    xin->set_destroys_register();
++  }
++  xin->load_item();
++
++  if (tag == longTag) {
++    if (yin->is_constant() && yin->get_jlong_constant() == 0) {
++      yin->dont_load_item();
++    } else {
++      yin->load_item();
++    }
++  } else if (tag == intTag) {
++    if (yin->is_constant() && yin->get_jint_constant() == 0)  {
++      yin->dont_load_item();
++    } else {
++      yin->load_item();
++    }
++  } else {
++    yin->load_item();
++  }
++
++  set_no_result(x);
++
++  LIR_Opr left = xin->result();
++  LIR_Opr right = yin->result();
++
++  // add safepoint before generating condition code so it can be recomputed
++  if (x->is_safepoint()) {
++    // increment backedge counter if needed
++    increment_backedge_counter_conditionally(lir_cond(cond), left, right, state_for(x, x->state_before()),
++                                             x->tsux()->bci(), x->fsux()->bci(), x->profiled_bci());
++    __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before()));
++  }
++
++  // Generate branch profiling. Profiling code doesn't kill flags.
++  profile_branch(x, cond, left, right);
++  move_to_phi(x->state());
++  if (x->x()->type()->is_float_kind()) {
++    __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux(), x->usux());
++  } else {
++    __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux());
++  }
++  assert(x->default_sux() == x->fsux(), "wrong destination above");
++  __ jump(x->default_sux());
++}
++
++LIR_Opr LIRGenerator::getThreadPointer() {
++   return FrameMap::as_pointer_opr(TREG);
++}
++
++void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); }
++
++void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
++                                        CodeEmitInfo* info) {
++  __ volatile_store_mem_reg(value, address, info);
++}
++
++void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
++                                       CodeEmitInfo* info) {
++  // 8179954: We need to make sure that the code generated for
++  // volatile accesses forms a sequentially-consistent set of
++  // operations when combined with STLR and LDAR.  Without a leading
++  // membar it's possible for a simple Dekker test to fail if loads
++  // use LD;DMB but stores use STLR.  This can happen if C2 compiles
++  // the stores in one method and C1 compiles the loads in another.
++  if (!UseBarriersForVolatile) {
++    __ membar();
++  }
++  __ volatile_load_mem_reg(address, result, info);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp	2024-01-30 10:00:11.834765144 +0800
+@@ -0,0 +1,75 @@
++/*
++ * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/register.hpp"
++#include "c1/c1_LIR.hpp"
++
++FloatRegister LIR_OprDesc::as_float_reg() const {
++  return as_FloatRegister(fpu_regnr());
++}
++
++FloatRegister LIR_OprDesc::as_double_reg() const {
++  return as_FloatRegister(fpu_regnrLo());
++}
++
++// Reg2 unused.
++LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
++  assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform");
++  return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
++                             (reg1 << LIR_OprDesc::reg2_shift) |
++                             LIR_OprDesc::double_type          |
++                             LIR_OprDesc::fpu_register         |
++                             LIR_OprDesc::double_size);
++}
++
++#ifndef PRODUCT
++void LIR_Address::verify() const {
++  assert(base()->is_cpu_register(), "wrong base operand");
++  assert(index()->is_illegal() || index()->is_double_cpu() ||
++         index()->is_single_cpu(), "wrong index operand");
++  assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT ||
++         base()->type() == T_LONG || base()->type() == T_METADATA,
++         "wrong type for addresses");
++}
++#endif // PRODUCT
++
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  append(new LIR_OpCmpBranch(condition, left, right, tgt, info));
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  append(new LIR_OpCmpBranch(condition, left, right, block, unordered));
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  append(new LIR_Op4(lir_cmp_cmove, condition, left, right, src1, src2, dst, type));
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,344 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/markOop.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++
++#define T0 RT0
++#define T1 RT1
++
++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
++  const int aligned_mask = BytesPerWord -1;
++  const int hdr_offset = oopDesc::mark_offset_in_bytes();
++  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
++  int null_check_offset = -1;
++  Label done;
++
++  verify_oop(obj);
++
++  // save object being locked into the BasicObjectLock
++  st_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++
++  if (UseBiasedLocking) {
++    assert(scratch != noreg, "should have scratch register at this point");
++    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
++  } else {
++    null_check_offset = offset();
++  }
++
++  // Load object header
++  ld_ptr(hdr, Address(obj, hdr_offset));
++  // and mark it as unlocked
++  ori(hdr, hdr, markOopDesc::unlocked_value);
++  // save unlocked object header into the displaced header location on the stack
++  st_ptr(hdr, Address(disp_hdr, 0));
++  // test if object header is still the same (i.e. unlocked), and if so, store the
++  // displaced header address in the object header - if it is not the same, get the
++  // object header instead
++  lea(SCR2, Address(obj, hdr_offset));
++  cmpxchg(Address(SCR2, 0), hdr, disp_hdr, SCR1, true, false, done);
++  // if the object header was the same, we're done
++  // if the object header was not the same, it is now in the hdr register
++  // => test if it is a stack pointer into the same stack (recursive locking), i.e.:
++  //
++  // 1) (hdr & aligned_mask) == 0
++  // 2) sp <= hdr
++  // 3) hdr <= sp + page_size
++  //
++  // these 3 tests can be done by evaluating the following expression:
++  //
++  // (hdr - sp) & (aligned_mask - page_size)
++  //
++  // assuming both the stack pointer and page_size have their least
++  // significant 2 bits cleared and page_size is a power of 2
++  sub_d(hdr, hdr, SP);
++  li(SCR1, aligned_mask - os::vm_page_size());
++  andr(hdr, hdr, SCR1);
++  // for recursive locking, the result is zero => save it in the displaced header
++  // location (NULL in the displaced hdr location indicates recursive locking)
++  st_ptr(hdr, Address(disp_hdr, 0));
++  // otherwise we don't care about the result and handle locking via runtime call
++  bnez(hdr, slow_case);
++  // done
++  bind(done);
++  if (PrintBiasedLockingStatistics) {
++    atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, SCR1, SCR2);
++  }
++  return null_check_offset;
++}
++
++void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
++  const int aligned_mask = BytesPerWord -1;
++  const int hdr_offset = oopDesc::mark_offset_in_bytes();
++  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
++  Label done;
++
++  if (UseBiasedLocking) {
++    // load object
++    ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++    biased_locking_exit(obj, hdr, done);
++  }
++
++  // load displaced header
++  ld_ptr(hdr, Address(disp_hdr, 0));
++  // if the loaded hdr is NULL we had recursive locking
++  // if we had recursive locking, we are done
++  beqz(hdr, done);
++  if (!UseBiasedLocking) {
++    // load object
++    ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++  }
++  verify_oop(obj);
++  // test if object header is pointing to the displaced header, and if so, restore
++  // the displaced header in the object - if the object header is not pointing to
++  // the displaced header, get the object header instead
++  // if the object header was not pointing to the displaced header,
++  // we do unlocking via runtime call
++  if (hdr_offset) {
++    lea(SCR1, Address(obj, hdr_offset));
++    cmpxchg(Address(SCR1, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case);
++  } else {
++    cmpxchg(Address(obj, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case);
++  }
++  // done
++  bind(done);
++}
++
++// Defines obj, preserves var_size_in_bytes
++void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes,
++                                     int con_size_in_bytes, Register t1, Register t2,
++                                     Label& slow_case) {
++  if (UseTLAB) {
++    tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
++  } else {
++    eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
++  }
++}
++
++void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len,
++                                          Register t1, Register t2) {
++  assert_different_registers(obj, klass, len);
++  if (UseBiasedLocking && !len->is_valid()) {
++    assert_different_registers(obj, klass, len, t1, t2);
++    ld_ptr(t1, Address(klass, Klass::prototype_header_offset()));
++  } else {
++    // This assumes that all prototype bits fit in an int32_t
++    li(t1, (int32_t)(intptr_t)markOopDesc::prototype());
++  }
++  st_ptr(t1, Address(obj, oopDesc::mark_offset_in_bytes()));
++
++  if (UseCompressedClassPointers) { // Take care not to kill klass
++    encode_klass_not_null(t1, klass);
++    st_w(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
++  } else {
++    st_ptr(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
++  }
++
++  if (len->is_valid()) {
++    st_w(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
++  } else if (UseCompressedClassPointers) {
++    store_klass_gap(obj, R0);
++  }
++}
++
++// preserves obj, destroys len_in_bytes
++//
++// Scratch registers: t1 = T0, t2 = T1
++//
++void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes,
++                                        int hdr_size_in_bytes, Register t1, Register t2) {
++  assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
++  assert(t1 == T0 && t2 == T1, "must be");
++  Label done;
++
++  // len_in_bytes is positive and ptr sized
++  addi_d(len_in_bytes, len_in_bytes, -hdr_size_in_bytes);
++  beqz(len_in_bytes, done);
++
++  // zero_words() takes ptr in t1 and count in bytes in t2
++  lea(t1, Address(obj, hdr_size_in_bytes));
++  addi_d(t2, len_in_bytes, -BytesPerWord);
++
++  Label loop;
++  bind(loop);
++  stx_d(R0, t1, t2);
++  addi_d(t2, t2, -BytesPerWord);
++  bge(t2, R0, loop);
++
++  bind(done);
++}
++
++void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size,
++                                        int object_size, Register klass, Label& slow_case) {
++  assert_different_registers(obj, t1, t2);
++  assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
++
++  try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
++
++  initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
++}
++
++// Scratch registers: t1 = T0, t2 = T1
++void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes,
++                                          int con_size_in_bytes, Register t1, Register t2,
++                                          bool is_tlab_allocated) {
++  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
++         "con_size_in_bytes is not multiple of alignment");
++  const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
++
++  initialize_header(obj, klass, noreg, t1, t2);
++
++  if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
++     // clear rest of allocated space
++     const Register index = t2;
++     if (var_size_in_bytes != noreg) {
++       move(index, var_size_in_bytes);
++       initialize_body(obj, index, hdr_size_in_bytes, t1, t2);
++     } else if (con_size_in_bytes > hdr_size_in_bytes) {
++       con_size_in_bytes -= hdr_size_in_bytes;
++       lea(t1, Address(obj, hdr_size_in_bytes));
++       Label loop;
++       li(SCR1, con_size_in_bytes - BytesPerWord);
++       bind(loop);
++       stx_d(R0, t1, SCR1);
++       addi_d(SCR1, SCR1, -BytesPerWord);
++       bge(SCR1, R0, loop);
++     }
++  }
++
++  membar(StoreStore);
++
++  if (CURRENT_ENV->dtrace_alloc_probes()) {
++    assert(obj == A0, "must be");
++    call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type);
++  }
++
++  verify_oop(obj);
++}
++
++void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2,
++                                       int header_size, int f, Register klass, Label& slow_case) {
++  assert_different_registers(obj, len, t1, t2, klass);
++
++  // determine alignment mask
++  assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work");
++
++  // check for negative or excessive length
++  li(SCR1, (int32_t)max_array_allocation_length);
++  bge_far(len, SCR1, slow_case, false);
++
++  const Register arr_size = t2; // okay to be the same
++  // align object end
++  li(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
++  slli_w(SCR1, len, f);
++  add_d(arr_size, arr_size, SCR1);
++  bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0);
++
++  try_allocate(obj, arr_size, 0, t1, t2, slow_case);
++
++  initialize_header(obj, klass, len, t1, t2);
++
++  // clear rest of allocated space
++  initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2);
++
++  membar(StoreStore);
++
++  if (CURRENT_ENV->dtrace_alloc_probes()) {
++    assert(obj == A0, "must be");
++    call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type);
++  }
++
++  verify_oop(obj);
++}
++
++void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
++  assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
++  // Make sure there is enough stack space for this method's activation.
++  // Note that we do this before creating a frame.
++  generate_stack_overflow_check(bang_size_in_bytes);
++  MacroAssembler::build_frame(framesize);
++}
++
++void C1_MacroAssembler::remove_frame(int framesize) {
++  MacroAssembler::remove_frame(framesize);
++}
++
++void C1_MacroAssembler::verified_entry() {
++  // If we have to make this method not-entrant we'll overwrite its
++  // first instruction with a jump.  For this action to be legal we
++  // must ensure that this first instruction is a b, bl, nop, break.
++  // Make it a NOP.
++  nop();
++}
++
++void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
++  // rbp, + 0: link
++  //      + 1: return address
++  //      + 2: argument with offset 0
++  //      + 3: argument with offset 1
++  //      + 4: ...
++
++  ld_ptr(reg, Address(FP, (offset_in_words + 2) * BytesPerWord));
++}
++
++#ifndef PRODUCT
++void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
++  if (!VerifyOops) return;
++  verify_oop_addr(Address(SP, stack_offset), "oop");
++}
++
++void C1_MacroAssembler::verify_not_null_oop(Register r) {
++  if (!VerifyOops) return;
++  Label not_null;
++  bnez(r, not_null);
++  stop("non-null oop required");
++  bind(not_null);
++  verify_oop(r);
++}
++
++void C1_MacroAssembler::invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2,
++                                             bool inv_a3, bool inv_a4, bool inv_a5) {
++#ifdef ASSERT
++  static int nn;
++  if (inv_a0) li(A0, 0xDEAD);
++  if (inv_s0) li(S0, 0xDEAD);
++  if (inv_a2) li(A2, nn++);
++  if (inv_a3) li(A3, 0xDEAD);
++  if (inv_a4) li(A4, 0xDEAD);
++  if (inv_a5) li(A5, 0xDEAD);
++#endif
++}
++#endif // ifndef PRODUCT
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,112 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
++
++using MacroAssembler::build_frame;
++using MacroAssembler::null_check;
++
++// C1_MacroAssembler contains high-level macros for C1
++
++ private:
++  int _rsp_offset; // track rsp changes
++  // initialization
++  void pd_init() { _rsp_offset = 0; }
++
++ public:
++  void try_allocate(
++    Register obj,               // result: pointer to object after successful allocation
++    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes, // object size in bytes if   known at compile time
++    Register t1,                // temp register
++    Register t2,                // temp register
++    Label&   slow_case          // continuation point if fast allocation fails
++  );
++
++  void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2);
++  void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2);
++
++  // locking
++  // hdr     : must be A0, contents destroyed
++  // obj     : must point to the object to lock, contents preserved
++  // disp_hdr: must point to the displaced header location, contents preserved
++  // scratch : scratch register, contents destroyed
++  // returns code offset at which to add null check debug information
++  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
++
++  // unlocking
++  // hdr     : contents destroyed
++  // obj     : must point to the object to lock, contents preserved
++  // disp_hdr: must be A0 & must point to the displaced header location, contents destroyed
++  void unlock_object(Register swap, Register obj, Register lock, Label& slow_case);
++
++  void initialize_object(
++    Register obj,               // result: pointer to object after successful allocation
++    Register klass,             // object klass
++    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes, // object size in bytes if   known at compile time
++    Register t1,                // temp register
++    Register t2,                // temp register
++    bool     is_tlab_allocated  // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
++  );
++
++  // allocation of fixed-size objects
++  // (can also be used to allocate fixed-size arrays, by setting
++  // hdr_size correctly and storing the array length afterwards)
++  // obj        : will contain pointer to allocated object
++  // t1, t2     : scratch registers - contents destroyed
++  // header_size: size of object header in words
++  // object_size: total size of object in words
++  // slow_case  : exit to slow case implementation if fast allocation fails
++  void allocate_object(Register obj, Register t1, Register t2, int header_size,
++                       int object_size, Register klass, Label& slow_case);
++
++  enum {
++    max_array_allocation_length = 0x00FFFFFF
++  };
++
++  // allocation of arrays
++  // obj        : will contain pointer to allocated object
++  // len        : array length in number of elements
++  // t          : scratch register - contents destroyed
++  // header_size: size of object header in words
++  // f          : element scale factor
++  // slow_case  : exit to slow case implementation if fast allocation fails
++  void allocate_array(Register obj, Register len, Register t, Register t2, int header_size,
++                      int f, Register klass, Label& slow_case);
++
++  int rsp_offset() const { return _rsp_offset; }
++  void set_rsp_offset(int n) { _rsp_offset = n; }
++
++  void invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, bool inv_a3,
++                            bool inv_a4, bool inv_a5) PRODUCT_RETURN;
++
++  // This platform only uses signal-based null checks. The Label is not needed.
++  void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); }
++
++  void load_parameter(int offset_in_words, Register reg);
++
++#endif // CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,1138 @@
++/*
++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "c1/c1_CodeStubs.hpp"
++#include "c1/c1_Defs.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "compiler/disassembler.hpp"
++#include "compiler/oopMap.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "register_loongarch.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframe.hpp"
++#include "runtime/vframeArray.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T5 RT5
++#define T6 RT6
++#define T8 RT8
++
++// Implementation of StubAssembler
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) {
++  // setup registers
++  assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result,
++         "registers must be different");
++  assert(oop_result1 != TREG && metadata_result != TREG, "registers must be different");
++  assert(args_size >= 0, "illegal args_size");
++  bool align_stack = false;
++
++  move(A0, TREG);
++  set_num_rt_args(0); // Nothing on stack
++
++  Label retaddr;
++  set_last_Java_frame(SP, FP, retaddr);
++
++  // do the call
++  call(entry, relocInfo::runtime_call_type);
++  bind(retaddr);
++  int call_offset = offset();
++  // verify callee-saved register
++#ifdef ASSERT
++  { Label L;
++    get_thread(SCR1);
++    beq(TREG, SCR1, L);
++    stop("StubAssembler::call_RT: TREG not callee saved?");
++    bind(L);
++  }
++#endif
++  reset_last_Java_frame(true);
++
++  // check for pending exceptions
++  { Label L;
++    // check for pending exceptions (java_thread is set upon return)
++    ld_ptr(SCR1, Address(TREG, in_bytes(Thread::pending_exception_offset())));
++    beqz(SCR1, L);
++    // exception pending => remove activation and forward to exception handler
++    // make sure that the vm_results are cleared
++    if (oop_result1->is_valid()) {
++      st_ptr(R0, Address(TREG, JavaThread::vm_result_offset()));
++    }
++    if (metadata_result->is_valid()) {
++      st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset()));
++    }
++    if (frame_size() == no_frame_size) {
++      leave();
++      jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    } else if (_stub_id == Runtime1::forward_exception_id) {
++      should_not_reach_here();
++    } else {
++      jmp(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type);
++    }
++    bind(L);
++  }
++  // get oop results if there are any and reset the values in the thread
++  if (oop_result1->is_valid()) {
++    get_vm_result(oop_result1, TREG);
++  }
++  if (metadata_result->is_valid()) {
++    get_vm_result_2(metadata_result, TREG);
++  }
++  return call_offset;
++}
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
++                           address entry, Register arg1) {
++  move(A1, arg1);
++  return call_RT(oop_result1, metadata_result, entry, 1);
++}
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
++                           address entry, Register arg1, Register arg2) {
++  if (A1 == arg2) {
++    if (A2 == arg1) {
++      move(SCR1, arg1);
++      move(arg1, arg2);
++      move(arg2, SCR1);
++    } else {
++      move(A2, arg2);
++      move(A1, arg1);
++    }
++  } else {
++    move(A1, arg1);
++    move(A2, arg2);
++  }
++  return call_RT(oop_result1, metadata_result, entry, 2);
++}
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
++                           address entry, Register arg1, Register arg2, Register arg3) {
++  // if there is any conflict use the stack
++  if (arg1 == A2 || arg1 == A3 ||
++      arg2 == A1 || arg2 == A3 ||
++      arg3 == A1 || arg3 == A2) {
++    addi_d(SP, SP, -4 * wordSize);
++    st_ptr(arg1, Address(SP, 0 * wordSize));
++    st_ptr(arg2, Address(SP, 1 * wordSize));
++    st_ptr(arg3, Address(SP, 2 * wordSize));
++    ld_ptr(arg1, Address(SP, 0 * wordSize));
++    ld_ptr(arg2, Address(SP, 1 * wordSize));
++    ld_ptr(arg3, Address(SP, 2 * wordSize));
++    addi_d(SP, SP, 4 * wordSize);
++  } else {
++    move(A1, arg1);
++    move(A2, arg2);
++    move(A3, arg3);
++  }
++  return call_RT(oop_result1, metadata_result, entry, 3);
++}
++
++enum return_state_t {
++  does_not_return, requires_return
++};
++
++// Implementation of StubFrame
++
++class StubFrame: public StackObj {
++ private:
++  StubAssembler* _sasm;
++  bool _return_state;
++
++ public:
++  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments,
++            return_state_t return_state=requires_return);
++  void load_argument(int offset_in_words, Register reg);
++
++  ~StubFrame();
++};;
++
++void StubAssembler::prologue(const char* name, bool must_gc_arguments) {
++  set_info(name, must_gc_arguments);
++  enter();
++}
++
++void StubAssembler::epilogue() {
++  leave();
++  jr(RA);
++}
++
++#define __ _sasm->
++
++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments,
++                     return_state_t return_state) {
++  _sasm = sasm;
++  _return_state = return_state;
++  __ prologue(name, must_gc_arguments);
++}
++
++// load parameters that were stored with LIR_Assembler::store_parameter
++// Note: offsets for store_parameter and load_argument must match
++void StubFrame::load_argument(int offset_in_words, Register reg) {
++  __ load_parameter(offset_in_words, reg);
++}
++
++StubFrame::~StubFrame() {
++  if (_return_state == requires_return) {
++    __ epilogue();
++  } else {
++    __ should_not_reach_here();
++  }
++}
++
++#undef __
++
++// Implementation of Runtime1
++
++#define __ sasm->
++
++const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2;
++
++// Stack layout for saving/restoring  all the registers needed during a runtime
++// call (this includes deoptimization)
++// Note: note that users of this frame may well have arguments to some runtime
++// while these values are on the stack. These positions neglect those arguments
++// but the code in save_live_registers will take the argument count into
++// account.
++//
++
++enum reg_save_layout {
++  reg_save_frame_size = 32 /* float */ + 30 /* integer, except zr, tp */
++};
++
++// Save off registers which might be killed by calls into the runtime.
++// Tries to smart of about FP registers.  In particular we separate
++// saving and describing the FPU registers for deoptimization since we
++// have to save the FPU registers twice if we describe them.  The
++// deopt blob is the only thing which needs to describe FPU registers.
++// In all other cases it should be sufficient to simply save their
++// current value.
++
++static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs];
++static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs];
++static int reg_save_size_in_words;
++static int frame_size_in_bytes = -1;
++
++static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
++  int frame_size_in_bytes = reg_save_frame_size * BytesPerWord;
++  sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
++  int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size;
++  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
++
++  for (int i = A0->encoding(); i <= T8->encoding(); i++) {
++    Register r = as_Register(i);
++    if (i != SCR1->encoding() && i != SCR2->encoding()) {
++      int sp_offset = cpu_reg_save_offsets[i];
++      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
++    }
++  }
++
++  if (save_fpu_registers) {
++    for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
++      FloatRegister r = as_FloatRegister(i);
++      int sp_offset = fpu_reg_save_offsets[i];
++      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
++    }
++  }
++
++  return oop_map;
++}
++
++static OopMap* save_live_registers(StubAssembler* sasm,
++                                   bool save_fpu_registers = true) {
++  __ block_comment("save_live_registers");
++
++  // integer registers except zr & ra & tp & sp
++  __ addi_d(SP, SP, -(32 - 4 + 32) * wordSize);
++
++  for (int i = 4; i < 32; i++)
++    __ st_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize));
++
++  if (save_fpu_registers) {
++    for (int i = 0; i < 32; i++)
++      __ fst_d(as_FloatRegister(i), Address(SP, i * wordSize));
++  }
++
++  return generate_oop_map(sasm, save_fpu_registers);
++}
++
++static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
++  if (restore_fpu_registers) {
++    for (int i = 0; i < 32; i ++)
++      __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize));
++  }
++
++  for (int i = 4; i < 32; i++)
++    __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize));
++
++  __ addi_d(SP, SP, (32 - 4 + 32) * wordSize);
++}
++
++static void restore_live_registers_except_a0(StubAssembler* sasm, bool restore_fpu_registers = true)  {
++  if (restore_fpu_registers) {
++    for (int i = 0; i < 32; i ++)
++      __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize));
++  }
++
++  for (int i = 5; i < 32; i++)
++    __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize));
++
++  __ addi_d(SP, SP, (32 - 4 + 32) * wordSize);
++}
++
++void Runtime1::initialize_pd() {
++  int sp_offset = 0;
++  int i;
++
++  // all float registers are saved explicitly
++  assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here");
++  for (i = 0; i < FrameMap::nof_fpu_regs; i++) {
++    fpu_reg_save_offsets[i] = sp_offset;
++    sp_offset += 2; // SP offsets are in halfwords
++  }
++
++  for (i = 4; i < FrameMap::nof_cpu_regs; i++) {
++    Register r = as_Register(i);
++    cpu_reg_save_offsets[i] = sp_offset;
++    sp_offset += 2; // SP offsets are in halfwords
++  }
++}
++
++// target: the entry point of the method that creates and posts the exception oop
++// has_argument: true if the exception needs arguments (passed in SCR1 and SCR2)
++
++OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target,
++                                              bool has_argument) {
++  // make a frame and preserve the caller's caller-save registers
++  OopMap* oop_map = save_live_registers(sasm);
++  int call_offset;
++  if (!has_argument) {
++    call_offset = __ call_RT(noreg, noreg, target);
++  } else {
++    __ move(A1, SCR1);
++    __ move(A2, SCR2);
++    call_offset = __ call_RT(noreg, noreg, target);
++  }
++  OopMapSet* oop_maps = new OopMapSet();
++  oop_maps->add_gc_map(call_offset, oop_map);
++  return oop_maps;
++}
++
++OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
++  __ block_comment("generate_handle_exception");
++
++  // incoming parameters
++  const Register exception_oop = A0;
++  const Register exception_pc  = A1;
++  // other registers used in this stub
++
++  // Save registers, if required.
++  OopMapSet* oop_maps = new OopMapSet();
++  OopMap* oop_map = NULL;
++  switch (id) {
++  case forward_exception_id:
++    // We're handling an exception in the context of a compiled frame.
++    // The registers have been saved in the standard places.  Perform
++    // an exception lookup in the caller and dispatch to the handler
++    // if found.  Otherwise unwind and dispatch to the callers
++    // exception handler.
++    oop_map = generate_oop_map(sasm, 1 /*thread*/);
++
++    // load and clear pending exception oop into A0
++    __ ld_ptr(exception_oop, Address(TREG, Thread::pending_exception_offset()));
++    __ st_ptr(R0, Address(TREG, Thread::pending_exception_offset()));
++
++    // load issuing PC (the return address for this stub) into A1
++    __ ld_ptr(exception_pc, Address(FP, 1 * BytesPerWord));
++
++    // make sure that the vm_results are cleared (may be unnecessary)
++    __ st_ptr(R0, Address(TREG, JavaThread::vm_result_offset()));
++    __ st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset()));
++    break;
++  case handle_exception_nofpu_id:
++  case handle_exception_id:
++    // At this point all registers MAY be live.
++    oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id);
++    break;
++  case handle_exception_from_callee_id: {
++    // At this point all registers except exception oop (A0) and
++    // exception pc (RA) are dead.
++    const int frame_size = 2 /*fp, return address*/;
++    oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0);
++    sasm->set_frame_size(frame_size);
++    break;
++  }
++  default: ShouldNotReachHere();
++  }
++
++  // verify that only A0 and A1 are valid at this time
++  __ invalidate_registers(false, true, true, true, true, true);
++  // verify that A0 contains a valid exception
++  __ verify_not_null_oop(exception_oop);
++
++#ifdef ASSERT
++  // check that fields in JavaThread for exception oop and issuing pc are
++  // empty before writing to them
++  Label oop_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset()));
++  __ beqz(SCR1, oop_empty);
++  __ stop("exception oop already set");
++  __ bind(oop_empty);
++
++  Label pc_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset()));
++  __ beqz(SCR1, pc_empty);
++  __ stop("exception pc already set");
++  __ bind(pc_empty);
++#endif
++
++  // save exception oop and issuing pc into JavaThread
++  // (exception handler will load it from here)
++  __ st_ptr(exception_oop, Address(TREG, JavaThread::exception_oop_offset()));
++  __ st_ptr(exception_pc, Address(TREG, JavaThread::exception_pc_offset()));
++
++  // patch throwing pc into return address (has bci & oop map)
++  __ st_ptr(exception_pc, Address(FP, 1 * BytesPerWord));
++
++  // compute the exception handler.
++  // the exception oop and the throwing pc are read from the fields in JavaThread
++  int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
++  oop_maps->add_gc_map(call_offset, oop_map);
++
++  // A0: handler address
++  //      will be the deopt blob if nmethod was deoptimized while we looked up
++  //      handler regardless of whether handler existed in the nmethod.
++
++  // only A0 is valid at this time, all other registers have been destroyed by the runtime call
++  __ invalidate_registers(false, true, true, true, true, true);
++
++  // patch the return address, this stub will directly return to the exception handler
++  __ st_ptr(A0, Address(FP, 1 * BytesPerWord));
++
++  switch (id) {
++    case forward_exception_id:
++    case handle_exception_nofpu_id:
++    case handle_exception_id:
++      // Restore the registers that were saved at the beginning.
++      restore_live_registers(sasm, id != handle_exception_nofpu_id);
++      break;
++    case handle_exception_from_callee_id:
++      break;
++    default:  ShouldNotReachHere();
++  }
++
++  return oop_maps;
++}
++
++void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
++  // incoming parameters
++  const Register exception_oop = A0;
++  // callee-saved copy of exception_oop during runtime call
++  const Register exception_oop_callee_saved = S0;
++  // other registers used in this stub
++  const Register exception_pc = A1;
++  const Register handler_addr = A3;
++
++  // verify that only A0, is valid at this time
++  __ invalidate_registers(false, true, true, true, true, true);
++
++#ifdef ASSERT
++  // check that fields in JavaThread for exception oop and issuing pc are empty
++  Label oop_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset()));
++  __ beqz(SCR1, oop_empty);
++  __ stop("exception oop must be empty");
++  __ bind(oop_empty);
++
++  Label pc_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset()));
++  __ beqz(SCR1, pc_empty);
++  __ stop("exception pc must be empty");
++  __ bind(pc_empty);
++#endif
++
++  // Save our return address because
++  // exception_handler_for_return_address will destroy it.  We also
++  // save exception_oop
++  __ addi_d(SP, SP, -2 * wordSize);
++  __ st_ptr(RA, Address(SP, 0 * wordSize));
++  __ st_ptr(exception_oop, Address(SP, 1 * wordSize));
++
++  // search the exception handler address of the caller (using the return address)
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), TREG, RA);
++  // V0: exception handler address of the caller
++
++  // Only V0 is valid at this time; all other registers have been
++  // destroyed by the call.
++  __ invalidate_registers(false, true, true, true, false, true);
++
++  // move result of call into correct register
++  __ move(handler_addr, A0);
++
++  // get throwing pc (= return address).
++  // RA has been destroyed by the call
++  __ ld_ptr(RA, Address(SP, 0 * wordSize));
++  __ ld_ptr(exception_oop, Address(SP, 1 * wordSize));
++  __ addi_d(SP, SP, 2 * wordSize);
++  __ move(A1, RA);
++
++  __ verify_not_null_oop(exception_oop);
++
++  // continue at exception handler (return address removed)
++  // note: do *not* remove arguments when unwinding the
++  //       activation since the caller assumes having
++  //       all arguments on the stack when entering the
++  //       runtime to determine the exception handler
++  //       (GC happens at call site with arguments!)
++  // A0: exception oop
++  // A1: throwing pc
++  // A3: exception handler
++  __ jr(handler_addr);
++}
++
++OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
++  // use the maximum number of runtime-arguments here because it is difficult to
++  // distinguish each RT-Call.
++  // Note: This number affects also the RT-Call in generate_handle_exception because
++  //       the oop-map is shared for all calls.
++  DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
++  assert(deopt_blob != NULL, "deoptimization blob must have been created");
++
++  OopMap* oop_map = save_live_registers(sasm);
++
++  __ move(A0, TREG);
++  Label retaddr;
++  __ set_last_Java_frame(SP, FP, retaddr);
++  // do the call
++  __ call(target, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++  OopMapSet* oop_maps = new OopMapSet();
++  oop_maps->add_gc_map(__ offset(), oop_map);
++  // verify callee-saved register
++#ifdef ASSERT
++  { Label L;
++    __ get_thread(SCR1);
++    __ beq(TREG, SCR1, L);
++    __ stop("StubAssembler::call_RT: rthread not callee saved?");
++    __ bind(L);
++  }
++#endif
++
++  __ reset_last_Java_frame(true);
++
++#ifdef ASSERT
++  // check that fields in JavaThread for exception oop and issuing pc are empty
++  Label oop_empty;
++  __ ld_ptr(SCR1, Address(TREG, Thread::pending_exception_offset()));
++  __ beqz(SCR1, oop_empty);
++  __ stop("exception oop must be empty");
++  __ bind(oop_empty);
++
++  Label pc_empty;
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset()));
++  __ beqz(SCR1, pc_empty);
++  __ stop("exception pc must be empty");
++  __ bind(pc_empty);
++#endif
++
++  // Runtime will return true if the nmethod has been deoptimized, this is the
++  // expected scenario and anything else is  an error. Note that we maintain a
++  // check on the result purely as a defensive measure.
++  Label no_deopt;
++  __ beqz(A0, no_deopt); // Have we deoptimized?
++
++  // Perform a re-execute. The proper return  address is already on the stack,
++  // we just need  to restore registers, pop  all of our frame  but the return
++  // address and jump to the deopt blob.
++  restore_live_registers(sasm);
++  __ leave();
++  __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
++
++  __ bind(no_deopt);
++  __ stop("deopt not performed");
++
++  return oop_maps;
++}
++
++OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
++  // for better readability
++  const bool must_gc_arguments = true;
++  const bool dont_gc_arguments = false;
++
++  // default value; overwritten for some optimized stubs that are called
++  // from methods that do not use the fpu
++  bool save_fpu_registers = true;
++
++  // stub code & info for the different stubs
++  OopMapSet* oop_maps = NULL;
++  OopMap* oop_map = NULL;
++  switch (id) {
++    {
++    case forward_exception_id:
++      {
++        oop_maps = generate_handle_exception(id, sasm);
++        __ leave();
++        __ jr(RA);
++      }
++      break;
++
++    case throw_div0_exception_id:
++      {
++        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
++      }
++      break;
++
++    case throw_null_pointer_exception_id:
++      {
++        StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
++      }
++      break;
++
++    case new_instance_id:
++    case fast_new_instance_id:
++    case fast_new_instance_init_check_id:
++      {
++        Register klass = A3; // Incoming
++        Register obj   = A0; // Result
++
++        if (id == new_instance_id) {
++          __ set_info("new_instance", dont_gc_arguments);
++        } else if (id == fast_new_instance_id) {
++          __ set_info("fast new_instance", dont_gc_arguments);
++        } else {
++          assert(id == fast_new_instance_init_check_id, "bad StubID");
++          __ set_info("fast new_instance init check", dont_gc_arguments);
++        }
++
++        // If TLAB is disabled, see if there is support for inlining contiguous
++        // allocations.
++        // Otherwise, just go to the slow path.
++        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
++            !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
++          Label slow_path;
++          Register obj_size = S0;
++          Register t1       = T0;
++          Register t2       = T1;
++          assert_different_registers(klass, obj, obj_size, t1, t2);
++
++          __ addi_d(SP, SP, -2 * wordSize);
++          __ st_ptr(S0, Address(SP, 0));
++
++          if (id == fast_new_instance_init_check_id) {
++            // make sure the klass is initialized
++            __ ld_bu(SCR1, Address(klass, InstanceKlass::init_state_offset()));
++            __ li(SCR2, InstanceKlass::fully_initialized);
++            __ bne_far(SCR1, SCR2, slow_path);
++          }
++
++#ifdef ASSERT
++          // assert object can be fast path allocated
++          {
++            Label ok, not_ok;
++            __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset()));
++            __ bge(R0, obj_size, not_ok); // make sure it's an instance (LH > 0)
++            __ andi(SCR1, obj_size, Klass::_lh_instance_slow_path_bit);
++            __ beqz(SCR1, ok);
++            __ bind(not_ok);
++            __ stop("assert(can be fast path allocated)");
++            __ should_not_reach_here();
++            __ bind(ok);
++          }
++#endif // ASSERT
++
++          // get the instance size (size is postive so movl is fine for 64bit)
++          __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset()));
++
++          __ eden_allocate(obj, obj_size, 0, t1, slow_path);
++
++          __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
++          __ verify_oop(obj);
++          __ ld_ptr(S0, Address(SP, 0));
++          __ addi_d(SP, SP, 2 * wordSize);
++          __ jr(RA);
++
++          __ bind(slow_path);
++          __ ld_ptr(S0, Address(SP, 0));
++          __ addi_d(SP, SP, 2 * wordSize);
++        }
++
++        __ enter();
++        OopMap* map = save_live_registers(sasm);
++        int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers_except_a0(sasm);
++        __ verify_oop(obj);
++        __ leave();
++        __ jr(RA);
++
++        // A0,: new instance
++      }
++
++      break;
++
++    case counter_overflow_id:
++      {
++        Register bci = A0, method = A1;
++        __ enter();
++        OopMap* map = save_live_registers(sasm);
++        // Retrieve bci
++        __ ld_w(bci, Address(FP, 2 * BytesPerWord));
++        // And a pointer to the Method*
++        __ ld_d(method, Address(FP, 3 * BytesPerWord));
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method);
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm);
++        __ leave();
++        __ jr(RA);
++      }
++      break;
++
++    case new_type_array_id:
++    case new_object_array_id:
++      {
++        Register length   = S0; // Incoming
++        Register klass    = A3; // Incoming
++        Register obj      = A0; // Result
++
++        if (id == new_type_array_id) {
++          __ set_info("new_type_array", dont_gc_arguments);
++        } else {
++          __ set_info("new_object_array", dont_gc_arguments);
++        }
++
++#ifdef ASSERT
++        // assert object type is really an array of the proper kind
++        {
++          Label ok;
++          Register t0 = obj;
++          __ ld_w(t0, Address(klass, Klass::layout_helper_offset()));
++          __ srai_w(t0, t0, Klass::_lh_array_tag_shift);
++          int tag = ((id == new_type_array_id)
++                     ? Klass::_lh_array_tag_type_value
++                     : Klass::_lh_array_tag_obj_value);
++          __ li(SCR1, tag);
++          __ beq(t0, SCR1, ok);
++          __ stop("assert(is an array klass)");
++          __ should_not_reach_here();
++          __ bind(ok);
++        }
++#endif // ASSERT
++
++        // If TLAB is disabled, see if there is support for inlining contiguous
++        // allocations.
++        // Otherwise, just go to the slow path.
++        if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
++          Register arr_size = A5;
++          Register t1       = T0;
++          Register t2       = T1;
++          Label slow_path;
++          assert_different_registers(length, klass, obj, arr_size, t1, t2);
++
++          // check that array length is small enough for fast path.
++          __ li(SCR1, C1_MacroAssembler::max_array_allocation_length);
++          __ blt_far(SCR1, length, slow_path, false);
++
++          // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
++          // since size is positive ldrw does right thing on 64bit
++          __ ld_w(t1, Address(klass, Klass::layout_helper_offset()));
++          // since size is positive movw does right thing on 64bit
++          __ move(arr_size, length);
++          __ sll_w(arr_size, length, t1);
++          __ bstrpick_d(t1, t1, Klass::_lh_header_size_shift +
++                        exact_log2(Klass::_lh_header_size_mask + 1) - 1,
++                        Klass::_lh_header_size_shift);
++          __ add_d(arr_size, arr_size, t1);
++          __ addi_d(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up
++          __ bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0);
++
++          __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size
++
++          __ initialize_header(obj, klass, length, t1, t2);
++          __ ld_bu(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
++          assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
++          assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
++          __ andi(t1, t1, Klass::_lh_header_size_mask);
++          __ sub_d(arr_size, arr_size, t1); // body length
++          __ add_d(t1, t1, obj); // body start
++          __ initialize_body(t1, arr_size, 0, t1, t2);
++          __ membar(Assembler::StoreStore);
++          __ verify_oop(obj);
++
++          __ jr(RA);
++
++          __ bind(slow_path);
++        }
++
++        __ enter();
++        OopMap* map = save_live_registers(sasm);
++        int call_offset;
++        if (id == new_type_array_id) {
++          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
++        } else {
++          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
++        }
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers_except_a0(sasm);
++
++        __ verify_oop(obj);
++        __ leave();
++        __ jr(RA);
++
++        // A0: new array
++      }
++      break;
++
++    case new_multi_array_id:
++      {
++        StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
++        // A0,: klass
++        // S0,: rank
++        // A2: address of 1st dimension
++        OopMap* map = save_live_registers(sasm);
++        __ move(A1, A0);
++        __ move(A3, A2);
++        __ move(A2, S0);
++        int call_offset = __ call_RT(A0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), A1, A2, A3);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers_except_a0(sasm);
++
++        // A0,: new multi array
++        __ verify_oop(A0);
++      }
++      break;
++
++    case register_finalizer_id:
++      {
++        __ set_info("register_finalizer", dont_gc_arguments);
++
++        // This is called via call_runtime so the arguments
++        // will be place in C abi locations
++
++        __ verify_oop(A0);
++
++        // load the klass and check the has finalizer flag
++        Label register_finalizer;
++        Register t = A5;
++        __ load_klass(t, A0);
++        __ ld_w(t, Address(t, Klass::access_flags_offset()));
++        __ li(SCR1, JVM_ACC_HAS_FINALIZER);
++        __ andr(SCR1, t, SCR1);
++        __ bnez(SCR1, register_finalizer);
++        __ jr(RA);
++
++        __ bind(register_finalizer);
++        __ enter();
++        OopMap* oop_map = save_live_registers(sasm);
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), A0);
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, oop_map);
++
++        // Now restore all the live registers
++        restore_live_registers(sasm);
++
++        __ leave();
++        __ jr(RA);
++      }
++      break;
++
++    case throw_class_cast_exception_id:
++      {
++        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
++      }
++      break;
++
++    case throw_incompatible_class_change_error_id:
++      {
++        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
++      }
++      break;
++
++    case slow_subtype_check_id:
++      {
++        // Typical calling sequence:
++        // __ push(klass_RInfo);  // object klass or other subclass
++        // __ push(sup_k_RInfo);  // array element klass or other superclass
++        // __ bl(slow_subtype_check);
++        // Note that the subclass is pushed first, and is therefore deepest.
++        enum layout {
++          a0_off, a0_off_hi,
++          a2_off, a2_off_hi,
++          a4_off, a4_off_hi,
++          a5_off, a5_off_hi,
++          sup_k_off, sup_k_off_hi,
++          klass_off, klass_off_hi,
++          framesize,
++          result_off = sup_k_off
++        };
++
++        __ set_info("slow_subtype_check", dont_gc_arguments);
++        __ addi_d(SP, SP, -4 * wordSize);
++        __ st_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size));
++        __ st_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size));
++        __ st_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size));
++        __ st_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size));
++
++        // This is called by pushing args and not with C abi
++        __ ld_ptr(A4, Address(SP, klass_off * VMRegImpl::stack_slot_size)); // subclass
++        __ ld_ptr(A0, Address(SP, sup_k_off * VMRegImpl::stack_slot_size)); // superclass
++
++        Label miss;
++        __ check_klass_subtype_slow_path(A4, A0, A2, A5, NULL, &miss);
++
++        // fallthrough on success:
++        __ li(SCR1, 1);
++        __ st_ptr(SCR1, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result
++        __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size));
++        __ addi_d(SP, SP, 4 * wordSize);
++        __ jr(RA);
++
++        __ bind(miss);
++        __ st_ptr(R0, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result
++        __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size));
++        __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size));
++        __ addi_d(SP, SP, 4 * wordSize);
++        __ jr(RA);
++      }
++      break;
++
++    case monitorenter_nofpu_id:
++      save_fpu_registers = false;
++      // fall through
++    case monitorenter_id:
++      {
++        StubFrame f(sasm, "monitorenter", dont_gc_arguments);
++        OopMap* map = save_live_registers(sasm, save_fpu_registers);
++
++        // Called with store_parameter and not C abi
++
++        f.load_argument(1, A0); // A0,: object
++        f.load_argument(0, A1); // A1,: lock address
++
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), A0, A1);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm, save_fpu_registers);
++      }
++      break;
++
++    case monitorexit_nofpu_id:
++      save_fpu_registers = false;
++      // fall through
++    case monitorexit_id:
++      {
++        StubFrame f(sasm, "monitorexit", dont_gc_arguments);
++        OopMap* map = save_live_registers(sasm, save_fpu_registers);
++
++        // Called with store_parameter and not C abi
++
++        f.load_argument(0, A0); // A0,: lock address
++
++        // note: really a leaf routine but must setup last java sp
++        //       => use call_RT for now (speed can be improved by
++        //       doing last java sp setup manually)
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), A0);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm, save_fpu_registers);
++      }
++      break;
++
++    case deoptimize_id:
++      {
++        StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return);
++        OopMap* oop_map = save_live_registers(sasm);
++        f.load_argument(0, A1);
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), A1);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, oop_map);
++        restore_live_registers(sasm);
++        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
++        assert(deopt_blob != NULL, "deoptimization blob must have been created");
++        __ leave();
++        __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
++      }
++      break;
++
++    case throw_range_check_failed_id:
++      {
++        StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
++      }
++      break;
++
++    case unwind_exception_id:
++      {
++        __ set_info("unwind_exception", dont_gc_arguments);
++        // note: no stubframe since we are about to leave the current
++        //       activation and we are calling a leaf VM function only.
++        generate_unwind_exception(sasm);
++      }
++      break;
++
++    case access_field_patching_id:
++      {
++        StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
++      }
++      break;
++
++    case load_klass_patching_id:
++      {
++        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
++      }
++      break;
++
++    case load_mirror_patching_id:
++      {
++        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
++      }
++      break;
++
++    case load_appendix_patching_id:
++      {
++        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
++      }
++      break;
++
++    case handle_exception_nofpu_id:
++    case handle_exception_id:
++      {
++        StubFrame f(sasm, "handle_exception", dont_gc_arguments);
++        oop_maps = generate_handle_exception(id, sasm);
++      }
++      break;
++
++    case handle_exception_from_callee_id:
++      {
++        StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
++        oop_maps = generate_handle_exception(id, sasm);
++      }
++      break;
++
++    case throw_index_exception_id:
++      {
++        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
++      }
++      break;
++
++    case throw_array_store_exception_id:
++      {
++        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return);
++        // tos + 0: link
++        //     + 1: return address
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
++      }
++      break;
++
++    case predicate_failed_trap_id:
++      {
++        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return);
++
++        OopMap* map = save_live_registers(sasm);
++
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap));
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm);
++        __ leave();
++        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
++        assert(deopt_blob != NULL, "deoptimization blob must have been created");
++
++        __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
++      }
++      break;
++
++    case dtrace_object_alloc_id:
++      {
++        // A0: object
++        StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
++        save_live_registers(sasm);
++
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), A0);
++
++        restore_live_registers(sasm);
++      }
++      break;
++
++    default:
++      {
++        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return);
++        __ li(A0, (int)id);
++        __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A0);
++      }
++      break;
++    }
++  }
++  return oop_maps;
++}
++
++#undef __
++
++const char *Runtime1::pd_name_for_address(address entry) {
++  Unimplemented();
++  return 0;
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,94 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the server compiler.
++// (see c2_globals.hpp).  Alpha-sorted.
++define_pd_global(bool, BackgroundCompilation,        true);
++define_pd_global(bool, UseTLAB,                      true);
++define_pd_global(bool, ResizeTLAB,                   true);
++define_pd_global(bool, CICompileOSR,                 true);
++define_pd_global(bool, InlineIntrinsics,             true);
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 true);
++define_pd_global(bool, UseOnStackReplacement,        true);
++#ifdef CC_INTERP
++define_pd_global(bool, ProfileInterpreter,           false);
++#else
++define_pd_global(bool, ProfileInterpreter,           true);
++#endif // CC_INTERP
++define_pd_global(bool, TieredCompilation,            true);
++define_pd_global(intx, CompileThreshold,             10000);
++define_pd_global(intx, BackEdgeThreshold,            100000);
++
++define_pd_global(intx, OnStackReplacePercentage,     140);
++define_pd_global(intx, ConditionalMoveLimit,         3);
++define_pd_global(intx, FLOATPRESSURE,                6);
++define_pd_global(intx, FreqInlineSize,               325);
++define_pd_global(intx, MinJumpTableSize,             10);
++define_pd_global(intx, INTPRESSURE,                  13);
++define_pd_global(intx, InteriorEntryAlignment,       16);
++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
++define_pd_global(intx, LoopUnrollLimit,              60);
++define_pd_global(intx, LoopPercentProfileLimit,      10);
++// InitialCodeCacheSize derived from specjbb2000 run.
++define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
++define_pd_global(intx, CodeCacheExpansionSize,       64*K);
++
++// Ergonomics related flags
++define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
++define_pd_global(intx, RegisterCostAreaRatio,        16000);
++
++// Peephole and CISC spilling both break the graph, and so makes the
++// scheduler sick.
++define_pd_global(bool, OptoPeephole,                 false);
++define_pd_global(bool, UseCISCSpill,                 false);
++define_pd_global(bool, OptoScheduling,               false);
++define_pd_global(bool, OptoBundling,                 false);
++define_pd_global(bool, OptoRegScheduling,            false);
++define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
++define_pd_global(bool, IdealizeClearArrayNode,       true);
++
++define_pd_global(intx, ReservedCodeCacheSize,        48*M);
++define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
++define_pd_global(intx, ProfiledCodeHeapSize,         22*M);
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
++define_pd_global(uintx, CodeCacheMinBlockLength,     4);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++
++define_pd_global(bool,  TrapBasedRangeChecks,        false);
++
++// Heap related flags
++define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
++
++// Ergonomics related flags
++define_pd_global(bool, NeverActAsServerClassMachine, false);
++
++#endif // CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
++
++// processor dependent initialization for LoongArch
++
++extern void reg_mask_init();
++
++void Compile::pd_compiler2_init() {
++  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
++  reg_mask_init();
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP
++#define CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP
++
++private:
++  void pd_initialize() {}
++
++public:
++  void flush_bundle(bool start_new_bundle) {}
++
++#endif // CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,148 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/compiledIC.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nmethod.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/safepoint.hpp"
++
++// ----------------------------------------------------------------------------
++
++#define __ _masm.
++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
++  precond(cbuf.stubs()->start() != badAddress);
++  precond(cbuf.stubs()->end() != badAddress);
++
++  if (mark == NULL) {
++    mark = cbuf.insts_mark();  // get mark within main instrs section
++  }
++
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a stub.
++  MacroAssembler _masm(&cbuf);
++
++  address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size());
++  if (base == NULL)  return NULL;  // CodeBuffer::expand failed
++  // static stub relocation stores the instruction address of the call
++
++  __ relocate(static_stub_Relocation::spec(mark), 0);
++
++  // Code stream for loading method may be changed.
++  __ ibar(0);
++
++  // Rmethod contains methodOop, it should be relocated for GC
++  // static stub relocation also tags the methodOop in the code-stream.
++  __ mov_metadata(Rmethod, NULL);
++  // This is recognized as unresolved by relocs/nativeInst/ic code
++
++  cbuf.set_insts_mark();
++  __ patchable_jump(__ pc());
++  // Update current stubs pointer and restore code_end.
++  __ end_a_stub();
++  return base;
++}
++#undef __
++
++int CompiledStaticCall::to_interp_stub_size() {
++  return NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeGeneralJump::instruction_size;
++}
++
++int CompiledStaticCall::to_trampoline_stub_size() {
++  return  NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size;
++}
++
++// Relocation entries for call stub, compiled java to interpreter.
++int CompiledStaticCall::reloc_to_interp_stub() {
++  return 16;
++}
++
++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
++  address stub = find_stub(false /* is_aot */);
++  guarantee(stub != NULL, "stub not found");
++
++  if (TraceICs) {
++    ResourceMark rm;
++    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
++                  p2i(instruction_address()),
++                  callee->name_and_sig_as_C_string());
++  }
++
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump*        jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++
++  assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(),
++         "a) MT-unsafe modification of inline cache");
++  assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry,
++         "b) MT-unsafe modification of inline cache");
++
++  // Update stub.
++  method_holder->set_data((intptr_t)callee());
++  jump->set_jump_destination(entry);
++
++  // Update jump to call.
++  set_destination_mt_safe(stub);
++}
++
++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
++  // Reset stub.
++  address stub = static_stub->addr();
++  assert(stub != NULL, "stub not found");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++  method_holder->set_data(0);
++  jump->set_jump_destination(jump->instruction_address());
++}
++
++//-----------------------------------------------------------------------------
++// Non-product mode code
++#ifndef PRODUCT
++
++void CompiledDirectStaticCall::verify() {
++  // Verify call.
++  _call->verify();
++  if (os::is_MP()) {
++    _call->verify_alignment();
++  }
++
++  // Verify stub.
++  address stub = find_stub(false /* is_aot */);
++  assert(stub != NULL, "no stub found for static call");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++
++
++  // Verify state.
++  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++}
++
++#endif // !PRODUCT
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/copy_loongarch.hpp b/src/hotspot/cpu/loongarch/copy_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/copy_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/copy_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_COPY_LOONGARCH_HPP
++#define CPU_LOONGARCH_COPY_LOONGARCH_HPP
++
++// Inline functions for memory copy and fill.
++
++// Contains inline asm implementations
++#include OS_CPU_HEADER_INLINE(copy)
++
++// Template for atomic, element-wise copy.
++template <class T>
++static void copy_conjoint_atomic(const T* from, T* to, size_t count) {
++  if (from > to) {
++    while (count-- > 0) {
++      // Copy forwards
++      *to++ = *from++;
++    }
++  } else {
++    from += count - 1;
++    to   += count - 1;
++    while (count-- > 0) {
++      // Copy backwards
++      *to-- = *from--;
++    }
++  }
++}
++
++
++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
++  julong* to = (julong*) tohw;
++  julong  v  = ((julong) value << 32) | value;
++  while (count-- > 0) {
++    *to++ = v;
++  }
++}
++
++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
++  pd_fill_to_words(tohw, count, value);
++}
++
++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
++  (void)memset(to, value, count);
++}
++
++static void pd_zero_to_words(HeapWord* tohw, size_t count) {
++  pd_fill_to_words(tohw, count, 0);
++}
++
++static void pd_zero_to_bytes(void* to, size_t count) {
++  (void)memset(to, 0, count);
++}
++
++#endif //CPU_LOONGARCH_COPY_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp b/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "compiler/disassembler.hpp"
++#include "depChecker_loongarch.hpp"
++
++// Nothing to do on LoongArch
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp b/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP
++#define CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP
++
++// Nothing to do on LoongArch
++
++#endif // CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP
++
++  static int pd_instruction_alignment() {
++    return sizeof(int);
++  }
++
++  static const char* pd_cpu_opts() {
++    return "gpr-names=64";
++  }
++
++#endif // CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/frame_loongarch.cpp b/src/hotspot/cpu/loongarch/frame_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/frame_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/frame_loongarch.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,690 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/markOop.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/monitorChunk.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++#ifdef ASSERT
++void RegisterMap::check_location_valid() {
++}
++#endif
++
++
++// Profiling/safepoint support
++// for Profiling - acting on another frame. walks sender frames
++// if valid.
++// frame profile_find_Java_sender_frame(JavaThread *thread);
++
++bool frame::safe_for_sender(JavaThread *thread) {
++  address   sp = (address)_sp;
++  address   fp = (address)_fp;
++  address   unextended_sp = (address)_unextended_sp;
++
++  // consider stack guards when trying to determine "safe" stack pointers
++  static size_t stack_guard_size = os::uses_stack_guard_pages() ?
++    JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size() : 0;
++  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
++
++  // sp must be within the usable part of the stack (not in guards)
++  bool sp_safe = (sp < thread->stack_base()) &&
++                 (sp >= thread->stack_base() - usable_stack_size);
++
++
++  if (!sp_safe) {
++    return false;
++  }
++
++  // unextended sp must be within the stack and above or equal sp
++  bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
++                            (unextended_sp >= sp);
++
++  if (!unextended_sp_safe) {
++    return false;
++  }
++
++  // an fp must be within the stack and above (but not equal) sp
++  // second evaluation on fp+ is added to handle situation where fp is -1
++  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (java_frame_return_addr_offset * sizeof(void*))) < thread->stack_base())));
++
++  // We know sp/unextended_sp are safe only fp is questionable here
++
++  // If the current frame is known to the code cache then we can attempt to
++  // construct the sender and do some validation of it. This goes a long way
++  // toward eliminating issues when we get in frame construction code
++
++  if (_cb != NULL ) {
++
++    // First check if frame is complete and tester is reliable
++    // Unfortunately we can only check frame complete for runtime stubs and nmethod
++    // other generic buffer blobs are more problematic so we just assume they are
++    // ok. adapter blobs never have a frame complete and are never ok.
++
++    if (!_cb->is_frame_complete_at(_pc)) {
++      if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
++        return false;
++      }
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!_cb->code_contains(_pc)) {
++      return false;
++    }
++
++    // Entry frame checks
++    if (is_entry_frame()) {
++      // an entry frame must have a valid fp.
++      return fp_safe && is_entry_frame_valid(thread);
++    }
++
++    intptr_t* sender_sp = NULL;
++    intptr_t* sender_unextended_sp = NULL;
++    address   sender_pc = NULL;
++    intptr_t* saved_fp =  NULL;
++
++    if (is_interpreted_frame()) {
++      // fp must be safe
++      if (!fp_safe) {
++        return false;
++      }
++
++      sender_pc = (address) this->fp()[java_frame_return_addr_offset];
++      // for interpreted frames, the value below is the sender "raw" sp,
++      // which can be different from the sender unextended sp (the sp seen
++      // by the sender) because of current frame local variables
++      sender_sp = (intptr_t*) addr_at(java_frame_sender_sp_offset);
++      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
++      saved_fp = (intptr_t*) this->fp()[java_frame_link_offset];
++
++    } else {
++      // must be some sort of compiled/runtime frame
++      // fp does not have to be safe (although it could be check for c1?)
++
++      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
++      if (_cb->frame_size() <= 0) {
++        return false;
++      }
++
++      sender_sp = _unextended_sp + _cb->frame_size();
++      // Is sender_sp safe?
++      if ((address)sender_sp >= thread->stack_base()) {
++        return false;
++      }
++      sender_unextended_sp = sender_sp;
++      // On LA the return_address is always the word on the stack
++      sender_pc = (address) *(sender_sp-1);
++      // Note: frame::java_frame_sender_sp_offset is only valid for compiled frame
++      saved_fp = (intptr_t*) *(sender_sp - frame::java_frame_sender_sp_offset);
++    }
++
++
++    // If the potential sender is the interpreter then we can do some more checking
++    if (Interpreter::contains(sender_pc)) {
++
++      // FP is always saved in a recognizable place in any code we generate. However
++      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP
++      // is really a frame pointer.
++
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      return sender.is_interpreted_frame_valid(thread);
++
++    }
++
++    // We must always be able to find a recognizable pc
++    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
++    if (sender_pc == NULL ||  sender_blob == NULL) {
++      return false;
++    }
++
++    // Could be a zombie method
++    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
++      return false;
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!sender_blob->code_contains(sender_pc)) {
++      return false;
++    }
++
++    // We should never be able to see an adapter if the current frame is something from code cache
++    if (sender_blob->is_adapter_blob()) {
++      return false;
++    }
++
++    // Could be the call_stub
++    if (StubRoutines::returns_to_call_stub(sender_pc)) {
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      // Validate the JavaCallWrapper an entry frame must have
++      address jcw = (address)sender.entry_frame_call_wrapper();
++
++      bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp());
++
++      return jcw_safe;
++    }
++
++    CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
++    if (nm != NULL) {
++      if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
++        nm->method()->is_method_handle_intrinsic()) {
++        return false;
++      }
++    }
++
++    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
++    // because the return address counts against the callee's frame.
++
++    if (sender_blob->frame_size() <= 0) {
++      assert(!sender_blob->is_compiled(), "should count return address at least");
++      return false;
++    }
++
++    // We should never be able to see anything here except an nmethod. If something in the
++    // code cache (current frame) is called by an entity within the code cache that entity
++    // should not be anything but the call stub (already covered), the interpreter (already covered)
++    // or an nmethod.
++
++    if (!sender_blob->is_compiled()) {
++        return false;
++    }
++
++    // Could put some more validation for the potential non-interpreted sender
++    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
++
++    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
++
++    // We've validated the potential sender that would be created
++    return true;
++  }
++
++  // Must be native-compiled frame. Since sender will try and use fp to find
++  // linkages it must be safe
++
++  if (!fp_safe) {
++    return false;
++  }
++
++  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
++
++  if ( (address) this->fp()[java_frame_return_addr_offset] == NULL) return false;
++
++
++  // could try and do some more potential verification of native frame if we could think of some...
++
++  return true;
++
++}
++
++void frame::patch_pc(Thread* thread, address pc) {
++  address* pc_addr = &(((address*) sp())[-1]);
++  if (TracePcPatching) {
++    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
++                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
++  }
++  // Either the return address is the original one or we are going to
++  // patch in the same address that's already there.
++  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
++  *pc_addr = pc;
++  _cb = CodeCache::find_blob(pc);
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    assert(original_pc == _pc, "expected original PC to be stored before patching");
++    _deopt_state = is_deoptimized;
++    // leave _pc as is
++  } else {
++    _deopt_state = not_deoptimized;
++    _pc = pc;
++  }
++}
++
++bool frame::is_interpreted_frame() const  {
++  return Interpreter::contains(pc());
++}
++
++int frame::frame_size(RegisterMap* map) const {
++  frame sender = this->sender(map);
++  return sender.sp() - sp();
++}
++
++intptr_t* frame::entry_frame_argument_at(int offset) const {
++  // convert offset to index to deal with tsi
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  // Entry frame's arguments are always in relation to unextended_sp()
++  return &unextended_sp()[index];
++}
++
++// sender_sp
++#ifdef CC_INTERP
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  // QQQ why does this specialize method exist if frame::sender_sp() does same thing?
++  // seems odd and if we always know interpreted vs. non then sender_sp() is really
++  // doing too much work.
++  return get_interpreterState()->sender_sp();
++}
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return get_interpreterState()->monitor_base();
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  return (BasicObjectLock*) get_interpreterState()->stack_base();
++}
++
++#else // CC_INTERP
++
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
++}
++
++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
++}
++
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
++  // make sure the pointer points inside the frame
++  assert((intptr_t) fp() >  (intptr_t) result, "result must <  than frame pointer");
++  assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer");
++  return result;
++}
++
++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
++  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
++}
++
++// Used by template based interpreter deoptimization
++void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
++  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
++}
++#endif // CC_INTERP
++
++frame frame::sender_for_entry_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++  // Java frame called from C; skip all C frames and return top C
++  // frame of that chunk as the sender
++  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
++  assert(!entry_frame_is_first(), "next Java fp must be non zero");
++  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
++  map->clear();
++  assert(map->include_argument_oops(), "should be set by clear");
++  if (jfa->last_Java_pc() != NULL ) {
++    frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
++    return fr;
++  }
++  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
++  return fr;
++}
++
++frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
++  // sp is the raw sp from the sender after adapter or interpreter extension
++  intptr_t* sender_sp = this->sender_sp();
++
++  // This is the sp before any possible extension (adapter/locals).
++  intptr_t* unextended_sp = interpreter_frame_sender_sp();
++
++  // The interpreter and compiler(s) always save FP in a known
++  // location on entry. We must record where that location is
++  // so this if FP was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves FP if we record where it is then
++  // we don't have to always save FP on entry and exit to c2 compiled
++  // code, on entry will be enough.
++#ifdef COMPILER2_OR_JVMCI
++  if (map->update_map()) {
++    update_map_with_saved_link(map, (intptr_t**) addr_at(java_frame_link_offset));
++  }
++#endif // COMPILER2_OR_JVMCI
++  return frame(sender_sp, unextended_sp, link(), sender_pc());
++}
++
++
++//------------------------------------------------------------------------------
++// frame::verify_deopt_original_pc
++//
++// Verifies the calculated original PC of a deoptimization PC for the
++// given unextended SP.  The unextended SP might also be the saved SP
++// for MethodHandle call sites.
++#ifdef ASSERT
++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
++  frame fr;
++
++  // This is ugly but it's better than to change {get,set}_original_pc
++  // to take an SP value as argument.  And it's only a debugging
++  // method anyway.
++  fr._unextended_sp = unextended_sp;
++
++  address original_pc = nm->get_original_pc(&fr);
++  assert(nm->insts_contains(original_pc),
++         "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
++}
++#endif
++
++
++//------------------------------------------------------------------------------
++// frame::adjust_unextended_sp
++void frame::adjust_unextended_sp() {
++  // On LoongArch, sites calling method handle intrinsics and lambda forms are treated
++  // as any other call site. Therefore, no special action is needed when we are
++  // returning to any of these call sites.
++
++  if (_cb != NULL) {
++    CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
++    if (sender_cm != NULL) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (sender_cm->is_deopt_entry(_pc) ||
++          sender_cm->is_deopt_mh_entry(_pc)) {
++        DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
++      }
++    }
++  }
++}
++
++//------------------------------------------------------------------------------
++// frame::update_map_with_saved_link
++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
++  // The interpreter and compiler(s) always save fp in a known
++  // location on entry. We must record where that location is
++  // so that if fp was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves fp if we record where it is then
++  // we don't have to always save fp on entry and exit to c2 compiled
++  // code, on entry will be enough.
++  map->set_location(FP->as_VMReg(), (address) link_addr);
++  // this is weird "H" ought to be at a higher address however the
++  // oopMaps seems to have the "H" regs at the same address and the
++  // vanilla register.
++  // XXXX make this go away
++  if (true) {
++    map->set_location(FP->as_VMReg()->next(), (address) link_addr);
++  }
++}
++
++//------------------------------sender_for_compiled_frame-----------------------
++frame frame::sender_for_compiled_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++
++  // frame owned by optimizing compiler
++  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
++
++  intptr_t* sender_sp = unextended_sp() + _cb->frame_size();
++  intptr_t* unextended_sp = sender_sp;
++
++  // On Loongson the return_address is always the word on the stack
++  // the fp in compiler points to sender fp, but in interpreter, fp points to return address,
++  // so getting sender for compiled frame is not same as interpreter frame.
++  // we hard code here temporarily
++  // spark
++  address sender_pc = (address) *(sender_sp-1);
++
++  intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::java_frame_sender_sp_offset);
++
++  if (map->update_map()) {
++    // Tell GC to use argument oopmaps for some runtime stubs that need it.
++    // For C1, the runtime stub might not have oop maps, so set this flag
++    // outside of update_register_map.
++    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
++    if (_cb->oop_maps() != NULL) {
++      OopMapSet::update_register_map(this, map);
++    }
++
++    // Since the prolog does the save and restore of epb there is no oopmap
++    // for it so we must fill in its location as if there was an oopmap entry
++    // since if our caller was compiled code there could be live jvm state in it.
++    update_map_with_saved_link(map, saved_fp_addr);
++  }
++  assert(sender_sp != sp(), "must have changed");
++  return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
++}
++
++frame frame::sender(RegisterMap* map) const {
++  // Default is we done have to follow them. The sender_for_xxx will
++  // update it accordingly
++  map->set_include_argument_oops(false);
++
++  if (is_entry_frame())       return sender_for_entry_frame(map);
++  if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
++  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
++
++  if (_cb != NULL) {
++    return sender_for_compiled_frame(map);
++  }
++  // Must be native-compiled frame, i.e. the marshaling code for native
++  // methods that exists in the core system.
++  return frame(sender_sp(), link(), sender_pc());
++}
++
++bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
++// QQQ
++#ifdef CC_INTERP
++#else
++  assert(is_interpreted_frame(), "Not an interpreted frame");
++  // These are reasonable sanity checks
++  if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
++    return false;
++  }
++  // These are hacks to keep us out of trouble.
++  // The problem with these is that they mask other problems
++  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
++    return false;
++  }
++
++  // do some validation of frame elements
++
++  // first the method
++
++  Method* m = safe_interpreter_frame_method();
++
++  // validate the method we'd find in this potential sender
++  if (!Method::is_valid_method(m)) return false;
++
++  // stack frames shouldn't be much larger than max_stack elements
++
++  //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) {
++  if (fp() - sp() > 4096) {  // stack frames shouldn't be large.
++    return false;
++  }
++
++  // validate bci/bcp
++
++  address bcp    = interpreter_frame_bcp();
++  if (m->validate_bci_from_bcp(bcp) < 0) {
++    return false;
++  }
++
++  // validate ConstantPoolCache*
++
++  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
++
++  if (MetaspaceObj::is_valid(cp) == false) return false;
++
++  // validate locals
++
++  address locals =  (address) *interpreter_frame_locals_addr();
++
++  if (locals > thread->stack_base() || locals < (address) fp()) return false;
++
++  // We'd have to be pretty unlucky to be mislead at this point
++
++#endif // CC_INTERP
++  return true;
++}
++
++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
++#ifdef CC_INTERP
++  // Needed for JVMTI. The result should always be in the interpreterState object
++  assert(false, "NYI");
++  interpreterState istate = get_interpreterState();
++#endif // CC_INTERP
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  Method* method = interpreter_frame_method();
++  BasicType type = method->result_type();
++
++  intptr_t* tos_addr;
++  if (method->is_native()) {
++    // Prior to calling into the runtime to report the method_exit the possible
++    // return value is pushed to the native stack. If the result is a jfloat/jdouble
++    // then ST0 is saved. See the note in generate_native_result
++    tos_addr = (intptr_t*)sp();
++    if (type == T_FLOAT || type == T_DOUBLE) {
++      tos_addr += 2;
++    }
++  } else {
++    tos_addr = (intptr_t*)interpreter_frame_tos_address();
++  }
++
++  switch (type) {
++    case T_OBJECT  :
++    case T_ARRAY   : {
++      oop obj;
++      if (method->is_native()) {
++#ifdef CC_INTERP
++        obj = istate->_oop_temp;
++#else
++        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
++#endif // CC_INTERP
++      } else {
++        oop* obj_p = (oop*)tos_addr;
++        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
++      }
++      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
++      *oop_result = obj;
++      break;
++    }
++    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
++    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
++    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
++    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
++    case T_INT     : value_result->i = *(jint*)tos_addr; break;
++    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
++    case T_FLOAT   : value_result->f = *(jfloat*)tos_addr; break;
++    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
++    case T_VOID    : /* Nothing to do */ break;
++    default        : ShouldNotReachHere();
++  }
++
++  return type;
++}
++
++
++intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  return &interpreter_frame_tos_address()[index];
++}
++
++#ifndef PRODUCT
++
++#define DESCRIBE_FP_OFFSET(name) \
++  values.describe(frame_no, fp() + frame::name##_offset, #name)
++
++void frame::describe_pd(FrameValues& values, int frame_no) {
++  if (is_interpreted_frame()) {
++    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_method);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
++    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
++    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
++  }
++}
++#endif
++
++intptr_t *frame::initial_deoptimization_info() {
++  // used to reset the saved FP
++  return fp();
++}
++
++intptr_t* frame::real_fp() const {
++  if (_cb != NULL) {
++    // use the frame size if valid
++    int size = _cb->frame_size();
++    if (size > 0) {
++      return unextended_sp() + size;
++    }
++  }
++  // else rely on fp()
++  assert(! is_compiled_frame(), "unknown compiled frame size");
++  return fp();
++}
++
++#ifndef PRODUCT
++// This is a generic constructor which is only used by pns() in debug.cpp.
++frame::frame(void* sp, void* fp, void* pc) {
++  init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
++}
++
++void frame::pd_ps() {}
++#endif
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/frame_loongarch.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/frame_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/frame_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,171 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_HPP
++#define CPU_LOONGARCH_FRAME_LOONGARCH_HPP
++
++#include "runtime/synchronizer.hpp"
++
++// A frame represents a physical stack frame (an activation).  Frames can be
++// C or Java frames, and the Java frames can be interpreted or compiled.
++// In contrast, vframes represent source-level activations, so that one physical frame
++// can correspond to multiple source level frames because of inlining.
++// A frame is comprised of {pc, fp, sp}
++// ------------------------------ Asm interpreter ----------------------------------------
++// Layout of asm interpreter frame:
++// Low
++//    [expression stack      ] * <- sp
++//    [monitors              ]   \
++//     ...                        | monitor block size
++//    [monitors              ]   /
++//    [monitor block size    ]
++//    [byte code index/pointr]                   = bcx()                bcx_offset
++//    [pointer to locals     ]                   = locals()             locals_offset
++//    [constant pool cache   ]                   = cache()              cache_offset
++//    [methodData            ]                   = mdp()                mdx_offset
++//    [methodOop             ]                   = method()             method_offset
++//    [last sp               ]                   = last_sp()            last_sp_offset
++//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
++//    [old frame pointer     ]   <- fp           = link()
++//    [return pc             ]
++//    [oop temp              ]                     (only for native calls)
++//    [locals and parameters ]
++// High                          <- sender sp
++// ------------------------------ Asm interpreter ----------------------------------------
++//
++// ------------------------------ Native (C frame) ---------------------------------------
++// Layout of C frame:
++// High
++//            |
++//            - <----- fp        <- sender sp
++//      fp -8 | [ra]                             = sender_pc()
++//      fp-16 | [fp (sender)]                    = link()
++//            | [...]
++//            |
++//            - <----- sp
++//            |
++//            v
++// Low
++// ------------------------------ Native (C frame) ---------------------------------------
++
++ public:
++  enum {
++    pc_return_offset                                 =  0,
++
++    // Java frames
++    java_frame_link_offset                           =  0,
++    java_frame_return_addr_offset                    =  1,
++    java_frame_sender_sp_offset                      =  2,
++
++    // Native frames
++    native_frame_link_offset                         = -2,
++    native_frame_return_addr_offset                  = -1,
++    native_frame_sender_sp_offset                    =  0,
++
++    // Interpreter frames
++    interpreter_frame_result_handler_offset          =  3, // for native calls only
++    interpreter_frame_oop_temp_offset                =  2, // for native calls only
++
++    interpreter_frame_sender_fp_offset               =  0,
++    interpreter_frame_sender_sp_offset               = -1,
++    // outgoing sp before a call to an invoked method
++    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
++    interpreter_frame_locals_offset                  = interpreter_frame_last_sp_offset - 1,
++    interpreter_frame_method_offset                  = interpreter_frame_locals_offset - 1,
++    interpreter_frame_mirror_offset                  = interpreter_frame_method_offset - 1,
++    interpreter_frame_mdp_offset                     = interpreter_frame_mirror_offset - 1,
++    interpreter_frame_cache_offset                   = interpreter_frame_mdp_offset - 1,
++    interpreter_frame_bcp_offset                     = interpreter_frame_cache_offset - 1,
++    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
++
++    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
++    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
++
++    // Entry frames
++    entry_frame_call_wrapper_offset                  = -9,
++
++    // Native frames
++
++    native_frame_initial_param_offset                =  2
++
++  };
++
++  intptr_t ptr_at(int offset) const {
++    return *ptr_at_addr(offset);
++  }
++
++  void ptr_at_put(int offset, intptr_t value) {
++    *ptr_at_addr(offset) = value;
++  }
++
++ private:
++  // an additional field beyond _sp and _pc:
++  intptr_t*   _fp; // frame pointer
++  // The interpreter and adapters will extend the frame of the caller.
++  // Since oopMaps are based on the sp of the caller before extension
++  // we need to know that value. However in order to compute the address
++  // of the return address we need the real "raw" sp. Since sparc already
++  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
++  // original sp we use that convention.
++
++  intptr_t*     _unextended_sp;
++  void adjust_unextended_sp();
++
++  intptr_t* ptr_at_addr(int offset) const {
++    return (intptr_t*) addr_at(offset);
++  }
++#ifdef ASSERT
++  // Used in frame::sender_for_{interpreter,compiled}_frame
++  static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp);
++#endif
++
++ public:
++  // Constructors
++
++  frame(intptr_t* sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* fp);
++
++  void init(intptr_t* sp, intptr_t* fp, address pc);
++
++  // accessors for the instance variables
++  intptr_t*   fp() const { return _fp; }
++
++  inline address* sender_pc_addr() const;
++
++  // expression stack tos if we are nested in a java call
++  intptr_t* interpreter_frame_last_sp() const;
++
++  // helper to update a map with callee-saved FP
++  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
++
++  // deoptimization support
++  void interpreter_frame_set_last_sp(intptr_t* sp);
++
++  static jint interpreter_frame_expression_stack_direction() { return -1; }
++
++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp
+--- a/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,252 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP
++
++#include "code/codeCache.hpp"
++#include "code/vmreg.inline.hpp"
++
++// Inline functions for Loongson frames:
++
++// Constructors:
++
++inline frame::frame() {
++  _pc = NULL;
++  _sp = NULL;
++  _unextended_sp = NULL;
++  _fp = NULL;
++  _cb = NULL;
++  _deopt_state = unknown;
++}
++
++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
++  init(sp, fp, pc);
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = unextended_sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = (address)(sp[-1]);
++
++  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
++  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
++  // unlucky the junk value could be to a zombied method and we'll die on the
++  // find_blob call. This is also why we can have no asserts on the validity
++  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
++  // -> pd_last_frame should use a specialized version of pd_last_frame which could
++  // call a specilaized frame constructor instead of this one.
++  // Then we could use the assert below. However this assert is of somewhat dubious
++  // value.
++  // assert(_pc != NULL, "no pc?");
++
++  _cb = CodeCache::find_blob(_pc);
++  adjust_unextended_sp();
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++// Accessors
++
++inline bool frame::equal(frame other) const {
++  bool ret =  sp() == other.sp()
++              && unextended_sp() == other.unextended_sp()
++              && fp() == other.fp()
++              && pc() == other.pc();
++  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
++  return ret;
++}
++
++// Return unique id for this frame. The id must have a value where we can distinguish
++// identity and younger/older relationship. NULL represents an invalid (incomparable)
++// frame.
++inline intptr_t* frame::id(void) const { return unextended_sp(); }
++
++// Relationals on frames based
++// Return true if the frame is younger (more recent activation) than the frame represented by id
++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() < id ; }
++
++// Return true if the frame is older (less recent activation) than the frame represented by id
++inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() > id ; }
++
++
++
++inline intptr_t* frame::link() const {
++  if (is_java_frame())
++    return (intptr_t*) *(intptr_t **)addr_at(java_frame_link_offset);
++  return (intptr_t*) *(intptr_t **)addr_at(native_frame_link_offset);
++}
++
++inline intptr_t* frame::link_or_null() const {
++  intptr_t** ptr = is_java_frame() ? (intptr_t **)addr_at(java_frame_link_offset)
++                                   : (intptr_t **)addr_at(native_frame_link_offset);
++  return os::is_readable_pointer(ptr) ? *ptr : NULL;
++}
++
++inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
++
++// Return address:
++
++inline address* frame::sender_pc_addr() const {
++  if (is_java_frame())
++    return (address*) addr_at(java_frame_return_addr_offset);
++  return (address*) addr_at(native_frame_return_addr_offset);
++}
++
++inline address  frame::sender_pc()      const { return *sender_pc_addr(); }
++
++inline intptr_t* frame::sender_sp() const {
++  if (is_java_frame())
++    return addr_at(java_frame_sender_sp_offset);
++  return addr_at(native_frame_sender_sp_offset);
++}
++
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_last_sp() const {
++  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_bcp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
++}
++
++
++inline intptr_t* frame::interpreter_frame_mdp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
++}
++
++
++
++// Constant pool cache
++
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
++}
++
++// Method
++
++inline Method** frame::interpreter_frame_method_addr() const {
++  return (Method**)addr_at(interpreter_frame_method_offset);
++}
++
++// Mirror
++
++inline oop* frame::interpreter_frame_mirror_addr() const {
++  return (oop*)addr_at(interpreter_frame_mirror_offset);
++}
++
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++  intptr_t* last_sp = interpreter_frame_last_sp();
++  if (last_sp == NULL ) {
++    return sp();
++  } else {
++    // sp() may have been extended by an adapter
++    assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos");
++    return last_sp;
++  }
++}
++
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
++}
++
++inline int frame::interpreter_frame_monitor_size() {
++  return BasicObjectLock::size();
++}
++
++
++// expression stack
++// (the max_stack arguments are used by the GC; see class FrameClosure)
++
++inline intptr_t* frame::interpreter_frame_expression_stack() const {
++  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
++  return monitor_end-1;
++}
++
++// Entry frames
++
++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
++  return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
++}
++
++// Compiled frames
++
++inline oop frame::saved_oop_result(RegisterMap* map) const       {
++  return *((oop*) map->location(V0->as_VMReg()));
++}
++
++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
++  *((oop*) map->location(V0->as_VMReg())) = obj;
++}
++
++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,523 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/g1/g1BarrierSet.hpp"
++#include "gc/g1/g1BarrierSetAssembler.hpp"
++#include "gc/g1/g1BarrierSetRuntime.hpp"
++#include "gc/g1/g1CardTable.hpp"
++#include "gc/g1/g1ThreadLocalData.hpp"
++#include "gc/g1/heapRegion.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "utilities/macros.hpp"
++#ifdef COMPILER1
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/g1/c1/g1BarrierSetC1.hpp"
++#endif
++
++#define __ masm->
++
++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                            Register addr, Register count, RegSet saved_regs) {
++  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
++
++  if (!dest_uninitialized) {
++#ifndef OPT_THREAD
++    Register thread = T9;
++    __ get_thread(thread);
++#else
++    Register thread = TREG;
++#endif
++
++    Label filtered;
++    Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++    // Is marking active?
++    if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++      __ ld_w(AT, in_progress);
++    } else {
++      assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++      __ ld_b(AT, in_progress);
++    }
++
++    __ beqz(AT, filtered);
++
++    __ push(saved_regs);
++    if (count == A0) {
++      if (addr == A1) {
++        __ move(AT, A0);
++        __ move(A0, A1);
++        __ move(A1, AT);
++      } else {
++        __ move(A1, count);
++        __ move(A0, addr);
++      }
++    } else {
++      __ move(A0, addr);
++      __ move(A1, count);
++    }
++    if (UseCompressedOops) {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
++    } else {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
++    }
++    __ pop(saved_regs);
++
++    __ bind(filtered);
++  }
++}
++
++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                             Register addr, Register count, Register tmp, RegSet saved_regs) {
++  __ push(saved_regs);
++  if (count == A0) {
++    assert_different_registers(A1, addr);
++    __ move(A1, count);
++    __ move(A0, addr);
++  } else {
++    assert_different_registers(A0, count);
++    __ move(A0, addr);
++    __ move(A1, count);
++  }
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
++  __ pop(saved_regs);
++}
++
++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
++  bool on_oop = type == T_OBJECT || type == T_ARRAY;
++  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
++  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
++  bool on_reference = on_weak || on_phantom;
++  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++  if (on_oop && on_reference) {
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    // RA is live. It must be saved around calls.
++    __ enter(); // barrier may call runtime
++    // Generate the G1 pre-barrier code to log the value of
++    // the referent field in an SATB buffer.
++    g1_write_barrier_pre(masm /* masm */,
++                         noreg /* obj */,
++                         dst /* pre_val */,
++                         thread /* thread */,
++                         tmp1 /* tmp */,
++                         true /* tosca_live */,
++                         true /* expand_call */);
++    __ leave();
++  }
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
++                                                 Register obj,
++                                                 Register pre_val,
++                                                 Register thread,
++                                                 Register tmp,
++                                                 bool tosca_live,
++                                                 bool expand_call) {
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++
++  assert(thread == TREG, "must be");
++
++  Label done;
++  Label runtime;
++
++  assert(pre_val != noreg, "check this code");
++
++  if (obj != noreg) {
++    assert_different_registers(obj, pre_val, tmp);
++    assert(pre_val != V0, "check this code");
++  }
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  // Is marking active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++    __ ld_w(AT, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ ld_b(AT, in_progress);
++  }
++  __ beqz(AT, done);
++
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    __ load_heap_oop(pre_val, Address(obj, 0));
++  }
++
++  // Is the previous value null?
++  __ beqz(pre_val, done);
++
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
++
++  __ ld_d(tmp, index);
++  __ beqz(tmp, runtime);
++
++  __ addi_d(tmp, tmp, -1 * wordSize);
++  __ st_d(tmp, index);
++  __ ld_d(AT, buffer);
++
++  // Record the previous value
++  __ stx_d(pre_val, tmp, AT);
++  __ b(done);
++
++  __ bind(runtime);
++  // save the live input values
++  if (tosca_live) __ push(V0);
++
++  if (obj != noreg && obj != V0) __ push(obj);
++
++  if (pre_val != V0) __ push(pre_val);
++
++  // Calling the runtime using the regular call_VM_leaf mechanism generates
++  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
++  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
++  //
++  // If we care generating the pre-barrier without a frame (e.g. in the
++  // intrinsified Reference.get() routine) then ebp might be pointing to
++  // the caller frame and so this check will most likely fail at runtime.
++  //
++  // Expanding the call directly bypasses the generation of the check.
++  // So when we do not have have a full interpreter frame on the stack
++  // expand_call should be passed true.
++
++  if (expand_call) {
++    assert(pre_val != A1, "smashed arg");
++    if (thread != A1) __ move(A1, thread);
++    if (pre_val != A0) __ move(A0, pre_val);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  }
++
++  // save the live input values
++  if (pre_val != V0)
++    __ pop(pre_val);
++
++  if (obj != noreg && obj != V0)
++    __ pop(obj);
++
++  if (tosca_live) __ pop(V0);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
++                                                  Register store_addr,
++                                                  Register new_val,
++                                                  Register thread,
++                                                  Register tmp,
++                                                  Register tmp2) {
++  assert_different_registers(tmp, tmp2, AT);
++  assert(thread == TREG, "must be");
++
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
++  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  Label done;
++  Label runtime;
++
++  // Does store cross heap regions?
++  __ xorr(AT, store_addr, new_val);
++  __ srli_d(AT, AT, HeapRegion::LogOfHRGrainBytes);
++  __ beqz(AT, done);
++
++  // crosses regions, storing NULL?
++  __ beqz(new_val, done);
++
++  // storing region crossing non-NULL, is card already dirty?
++  const Register card_addr = tmp;
++  const Register cardtable = tmp2;
++
++  __ move(card_addr, store_addr);
++  __ srli_d(card_addr, card_addr, CardTable::card_shift);
++  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
++  // a valid address and therefore is not properly handled by the relocation code.
++  __ li(cardtable, (intptr_t)ct->card_table()->byte_map_base());
++  __ add_d(card_addr, card_addr, cardtable);
++
++  __ ld_bu(AT, card_addr, 0);
++  __ addi_d(AT, AT, -1 * (int)G1CardTable::g1_young_card_val());
++  __ beqz(AT, done);
++
++  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
++
++  __ membar(__ StoreLoad);
++  __ ld_bu(AT, card_addr, 0);
++  __ beqz(AT, done);
++
++  // storing a region crossing, non-NULL oop, card is clean.
++  // dirty card and log.
++  __ st_b(R0, card_addr, 0);
++
++  __ ld_d(AT, queue_index);
++  __ beqz(AT, runtime);
++  __ addi_d(AT, AT, -1 * wordSize);
++  __ st_d(AT, queue_index);
++  __ ld_d(tmp2, buffer);
++  __ ld_d(AT, queue_index);
++  __ stx_d(card_addr, tmp2, AT);
++  __ b(done);
++
++  __ bind(runtime);
++  // save the live input values
++  __ push(store_addr);
++  __ push(new_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG);
++  __ pop(new_val);
++  __ pop(store_addr);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool as_normal = (decorators & AS_NORMAL) != 0;
++  assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported");
++
++  bool needs_pre_barrier = as_normal;
++  bool needs_post_barrier = val != noreg && in_heap;
++
++  Register tmp3 = RT3;
++  Register rthread = TREG;
++  // flatten object address if needed
++  // We do it regardless of precise because we need the registers
++  if (dst.index() == noreg && dst.disp() == 0) {
++    if (dst.base() != tmp3) {
++      __ move(tmp3, dst.base());
++    }
++  } else {
++    __ lea(tmp3, dst);
++  }
++
++  if (needs_pre_barrier) {
++    g1_write_barrier_pre(masm /*masm*/,
++                         tmp3 /* obj */,
++                         tmp2 /* pre_val */,
++                         rthread /* thread */,
++                         tmp1  /* tmp */,
++                         val != noreg /* tosca_live */,
++                         false /* expand_call */);
++  }
++  if (val == noreg) {
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg);
++  } else {
++    Register new_val = val;
++    if (needs_post_barrier) {
++      // G1 barrier needs uncompressed oop for region cross check.
++      if (UseCompressedOops) {
++        new_val = tmp2;
++        __ move(new_val, val);
++      }
++    }
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg);
++    if (needs_post_barrier) {
++      g1_write_barrier_post(masm /*masm*/,
++                            tmp3 /* store_adr */,
++                            new_val /* new_val */,
++                            rthread /* thread */,
++                            tmp1 /* tmp */,
++                            tmp2 /* tmp2 */);
++    }
++  }
++}
++
++#ifdef COMPILER1
++
++#undef __
++#define __ ce->masm()->
++
++void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
++  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  // At this point we know that marking is in progress.
++  // If do_load() is true then we have to emit the
++  // load of the previous value; otherwise it has already
++  // been loaded into _pre_val.
++
++  __ bind(*stub->entry());
++
++  assert(stub->pre_val()->is_register(), "Precondition.");
++
++  Register pre_val_reg = stub->pre_val()->as_register();
++
++  if (stub->do_load()) {
++    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
++  }
++  __ beqz(pre_val_reg, *stub->continuation());
++  ce->store_parameter(stub->pre_val()->as_register(), 0);
++  __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type);
++  __ b(*stub->continuation());
++}
++
++void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
++  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  __ bind(*stub->entry());
++  assert(stub->addr()->is_register(), "Precondition.");
++  assert(stub->new_val()->is_register(), "Precondition.");
++  Register new_val_reg = stub->new_val()->as_register();
++  __ beqz(new_val_reg, *stub->continuation());
++  ce->store_parameter(stub->addr()->as_pointer_register(), 0);
++  __ call(bs->post_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type);
++  __ b(*stub->continuation());
++}
++
++#undef __
++
++#define __ sasm->
++
++void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("g1_pre_barrier", false);
++
++  // arg0 : previous value of memory
++
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  const Register pre_val = A0;
++  const Register thread = TREG;
++  const Register tmp = SCR2;
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  Label done;
++  Label runtime;
++
++  // Is marking still active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++    __ ld_w(tmp, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ ld_b(tmp, in_progress);
++  }
++  __ beqz(tmp, done);
++
++  // Can we store original value in the thread's buffer?
++  __ ld_ptr(tmp, queue_index);
++  __ beqz(tmp, runtime);
++
++  __ addi_d(tmp, tmp, -wordSize);
++  __ st_ptr(tmp, queue_index);
++  __ ld_ptr(SCR1, buffer);
++  __ add_d(tmp, tmp, SCR1);
++  __ load_parameter(0, SCR1);
++  __ st_ptr(SCR1, Address(tmp, 0));
++  __ b(done);
++
++  __ bind(runtime);
++  __ pushad();
++  __ load_parameter(0, pre_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  __ popad();
++  __ bind(done);
++
++  __ epilogue();
++}
++
++void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("g1_post_barrier", false);
++
++  // arg0: store_address
++  Address store_addr(FP, 2 * BytesPerWord);
++
++  BarrierSet* bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++
++  Label done;
++  Label runtime;
++
++  // At this point we know new_value is non-NULL and the new_value crosses regions.
++  // Must check to see if card is already dirty
++
++  const Register thread = TREG;
++
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  const Register card_offset = SCR2;
++  // RA is free here, so we can use it to hold the byte_map_base.
++  const Register byte_map_base = RA;
++
++  assert_different_registers(card_offset, byte_map_base, SCR1);
++
++  __ load_parameter(0, card_offset);
++  __ srli_d(card_offset, card_offset, CardTable::card_shift);
++  __ load_byte_map_base(byte_map_base);
++  __ ldx_bu(SCR1, byte_map_base, card_offset);
++  __ addi_d(SCR1, SCR1, -(int)G1CardTable::g1_young_card_val());
++  __ beqz(SCR1, done);
++
++  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
++
++  __ membar(__ StoreLoad);
++  __ ldx_bu(SCR1, byte_map_base, card_offset);
++  __ beqz(SCR1, done);
++
++  // storing region crossing non-NULL, card is clean.
++  // dirty card and log.
++  __ stx_b(R0, byte_map_base, card_offset);
++
++  // Convert card offset into an address in card_addr
++  Register card_addr = card_offset;
++  __ add_d(card_addr, byte_map_base, card_addr);
++
++  __ ld_ptr(SCR1, queue_index);
++  __ beqz(SCR1, runtime);
++  __ addi_d(SCR1, SCR1, -wordSize);
++  __ st_ptr(SCR1, queue_index);
++
++  // Reuse RA to hold buffer_addr
++  const Register buffer_addr = RA;
++
++  __ ld_ptr(buffer_addr, buffer);
++  __ stx_d(card_addr, buffer_addr, SCR1);
++  __ b(done);
++
++  __ bind(runtime);
++  __ pushad();
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
++  __ popad();
++  __ bind(done);
++  __ epilogue();
++}
++
++#undef __
++
++#endif // COMPILER1
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++class LIR_Assembler;
++class StubAssembler;
++class G1PreBarrierStub;
++class G1PostBarrierStub;
++
++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
++ protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, RegSet saved_regs);
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp, RegSet saved_regs);
++
++  void g1_write_barrier_pre(MacroAssembler* masm,
++                            Register obj,
++                            Register pre_val,
++                            Register thread,
++                            Register tmp,
++                            bool tosca_live,
++                            bool expand_call);
++
++  void g1_write_barrier_post(MacroAssembler* masm,
++                             Register store_addr,
++                             Register new_val,
++                             Register thread,
++                             Register tmp,
++                             Register tmp2);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++
++ public:
++  void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
++  void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
++
++  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++};
++
++#endif // CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,255 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/thread.hpp"
++
++#define __ masm->
++
++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
++  // RA is live. It must be saved around calls.
++
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (UseCompressedOops) {
++        __ ld_wu(dst, src);
++        if (is_not_null) {
++          __ decode_heap_oop_not_null(dst);
++        } else {
++          __ decode_heap_oop(dst);
++        }
++      } else
++      {
++        __ ld_ptr(dst, src);
++      }
++    } else {
++      assert(in_native, "why else?");
++      __ ld_ptr(dst, src);
++    }
++    break;
++  }
++  case T_BOOLEAN: __ ld_bu   (dst, src);    break;
++  case T_BYTE:    __ ld_b    (dst, src);    break;
++  case T_CHAR:    __ ld_hu   (dst, src);    break;
++  case T_SHORT:   __ ld_h    (dst, src);    break;
++  case T_INT:     __ ld_w    (dst, src);    break;
++  case T_LONG:    __ ld_d    (dst, src);    break;
++  case T_ADDRESS: __ ld_ptr(dst, src);    break;
++  case T_FLOAT:
++    assert(dst == noreg, "only to ftos");
++    __ fld_s(FSF, src);
++    break;
++  case T_DOUBLE:
++    assert(dst == noreg, "only to dtos");
++    __ fld_d(FSF, src);
++    break;
++  default: Unimplemented();
++  }
++}
++
++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                   Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (val == noreg) {
++        assert(!is_not_null, "inconsistent access");
++        if (UseCompressedOops) {
++          __ st_w(R0, dst);
++        } else {
++          __ st_d(R0, dst);
++        }
++      } else {
++        if (UseCompressedOops) {
++          assert(!dst.uses(val), "not enough registers");
++          if (is_not_null) {
++            __ encode_heap_oop_not_null(val);
++          } else {
++            __ encode_heap_oop(val);
++          }
++          __ st_w(val, dst);
++        } else
++        {
++          __ st_ptr(val, dst);
++        }
++      }
++    } else {
++      assert(in_native, "why else?");
++      assert(val != noreg, "not supported");
++      __ st_ptr(val, dst);
++    }
++    break;
++  }
++  case T_BOOLEAN:
++    __ andi(val, val, 0x1);  // boolean is true if LSB is 1
++    __ st_b(val, dst);
++    break;
++  case T_BYTE:
++    __ st_b(val, dst);
++    break;
++  case T_SHORT:
++    __ st_h(val, dst);
++    break;
++  case T_CHAR:
++    __ st_h(val, dst);
++    break;
++  case T_INT:
++    __ st_w(val, dst);
++    break;
++  case T_LONG:
++    __ st_d(val, dst);
++    break;
++  case T_FLOAT:
++    assert(val == noreg, "only tos");
++    __ fst_s(FSF, dst);
++    break;
++  case T_DOUBLE:
++    assert(val == noreg, "only tos");
++    __ fst_d(FSF, dst);
++    break;
++  case T_ADDRESS:
++    __ st_ptr(val, dst);
++    break;
++  default: Unimplemented();
++  }
++}
++
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Address obj2) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Register obj2) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                                        Register obj, Register tmp, Label& slowpath) {
++  __ clear_jweak_tag(obj);
++  __ ld_ptr(obj, Address(obj, 0));
++}
++
++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Register t2,
++                                        Label& slow_case) {
++  assert_different_registers(obj, t2);
++  assert_different_registers(obj, var_size_in_bytes);
++  Register end = t2;
++
++  // verify_tlab();
++
++  __ ld_ptr(obj, Address(TREG, JavaThread::tlab_top_offset()));
++  if (var_size_in_bytes == noreg) {
++    __ lea(end, Address(obj, con_size_in_bytes));
++  } else {
++    __ lea(end, Address(obj, var_size_in_bytes, Address::times_1, 0));
++  }
++  __ ld_ptr(SCR1, Address(TREG, JavaThread::tlab_end_offset()));
++  __ blt_far(SCR1, end, slow_case, false);
++
++  // update the tlab top pointer
++  __ st_ptr(end, Address(TREG, JavaThread::tlab_top_offset()));
++
++  // recover var_size_in_bytes if necessary
++  if (var_size_in_bytes == end) {
++    __ sub_d(var_size_in_bytes, var_size_in_bytes, obj);
++  }
++  // verify_tlab();
++}
++
++// Defines obj, preserves var_size_in_bytes
++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Label& slow_case) {
++  assert_different_registers(obj, var_size_in_bytes, t1);
++  if (!Universe::heap()->supports_inline_contig_alloc()) {
++    __ b_far(slow_case);
++  } else {
++    Register end = t1;
++    Register heap_end = SCR2;
++    Label retry;
++    __ bind(retry);
++
++    __ li(SCR1, (address)Universe::heap()->end_addr());
++    __ ld_d(heap_end, SCR1, 0);
++
++    // Get the current top of the heap
++    __ li(SCR1, (address) Universe::heap()->top_addr());
++    __ ll_d(obj, SCR1, 0);
++
++    // Adjust it my the size of our new object
++    if (var_size_in_bytes == noreg)
++      __ addi_d(end, obj, con_size_in_bytes);
++    else
++      __ add_d(end, obj, var_size_in_bytes);
++
++    // if end < obj then we wrapped around high memory
++    __ blt_far(end, obj, slow_case, false);
++    __ blt_far(heap_end, end, slow_case, false);
++
++    // If heap top hasn't been changed by some other thread, update it.
++    __ sc_d(end, SCR1, 0);
++    __ beqz(end, retry);
++
++    incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, t1);
++  }
++}
++
++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
++                                               Register var_size_in_bytes,
++                                               int con_size_in_bytes,
++                                               Register t1) {
++  assert(t1->is_valid(), "need temp reg");
++
++  __ ld_ptr(t1, Address(TREG, in_bytes(JavaThread::allocated_bytes_offset())));
++  if (var_size_in_bytes->is_valid())
++    __ add_d(t1, t1, var_size_in_bytes);
++  else
++    __ addi_d(t1, t1, con_size_in_bytes);
++  __ st_ptr(t1, Address(TREG, in_bytes(JavaThread::allocated_bytes_offset())));
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,88 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "memory/allocation.hpp"
++#include "oops/access.hpp"
++
++class InterpreterMacroAssembler;
++
++class BarrierSetAssembler: public CHeapObj<mtGC> {
++private:
++  void incr_allocated_bytes(MacroAssembler* masm,
++                            Register var_size_in_bytes,
++                            int con_size_in_bytes,
++                            Register t1);
++
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, RegSet saved_regs) {}
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch, RegSet saved_regs) {}
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++
++
++  virtual void obj_equals(MacroAssembler* masm,
++                          Register obj1, Register obj2);
++  virtual void obj_equals(MacroAssembler* masm,
++                          Register obj1, Address obj2);
++
++  virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) {
++    // Default implementation does not need to do anything.
++  }
++
++  // Support for jniFastGetField to try resolving a jobject/jweak in native
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
++
++  virtual void tlab_allocate(MacroAssembler* masm,
++    Register obj,               // result: pointer to object after successful allocation
++    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes, // object size in bytes if   known at compile time
++    Register t1,                // temp register
++    Register t2,                // temp register
++    Label&   slow_case          // continuation point if fast allocation fails
++  );
++
++  void eden_allocate(MacroAssembler* masm,
++    Register obj,               // result: pointer to object after successful allocation
++    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes, // object size in bytes if   known at compile time
++    Register t1,                // temp register
++    Label&   slow_case          // continuation point if fast allocation fails
++  );
++
++  virtual void barrier_stubs_init() {}
++};
++
++#endif // CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,140 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/cardTableBarrierSetAssembler.hpp"
++
++#define __ masm->
++
++#define T4 RT4
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++
++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                                    Register addr, Register count, Register tmp,
++                                                                    RegSet saved_regs) {
++  BarrierSet *bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++  intptr_t disp = (intptr_t) ct->byte_map_base();
++
++  Label L_loop, L_done;
++  const Register end = count;
++  assert_different_registers(addr, end);
++
++  __ beq(count, R0, L_done); // zero count - nothing to do
++
++  if (ct->scanned_concurrently()) __ membar(__ StoreStore);
++
++  __ li(tmp, disp);
++
++  __ lea(end, Address(addr, count, TIMES_OOP, 0));  // end == addr+count*oop_size
++  __ addi_d(end, end, -BytesPerHeapOop); // end - 1 to make inclusive
++  __ shr(addr, CardTable::card_shift);
++  __ shr(end, CardTable::card_shift);
++  __ sub_d(end, end, addr); // end --> cards count
++
++  __ add_d(addr, addr, tmp);
++
++  __ BIND(L_loop);
++  __ stx_b(R0, addr, count);
++  __ addi_d(count, count, -1);
++  __ bge(count, R0, L_loop);
++
++  __ BIND(L_done);
++}
++
++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) {
++  // Does a store check for the oop in register obj. The content of
++  // register obj is destroyed afterwards.
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  __ shr(obj, CardTable::card_shift);
++
++  Address card_addr;
++
++  intptr_t byte_map_base = (intptr_t)ct->byte_map_base();
++  Register tmp = T4;
++  assert_different_registers(tmp, obj);
++  __ li(tmp, byte_map_base);
++  __ add_d(tmp, tmp, obj);
++
++  assert(CardTable::dirty_card_val() == 0, "must be");
++
++  jbyte dirty = CardTable::dirty_card_val();
++  if (UseCondCardMark) {
++    Label L_already_dirty;
++    __ membar(__ StoreLoad);
++    __ ld_b(AT, tmp, 0);
++    __ addi_d(AT, AT, -1 * dirty);
++    __ beq(AT, R0, L_already_dirty);
++    __ st_b(R0, tmp, 0);
++    __ bind(L_already_dirty);
++  } else {
++    if (ct->scanned_concurrently()) {
++      __ membar(Assembler::StoreStore);
++    }
++    __ st_b(R0, tmp, 0);
++  }
++}
++
++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                                Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++
++  bool is_array = (decorators & IS_ARRAY) != 0;
++  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
++  bool precise = is_array || on_anonymous;
++
++  bool needs_post_barrier = val != noreg && in_heap;
++
++  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
++  if (needs_post_barrier) {
++    // flatten object address if needed
++    if (!precise || (dst.index() == noreg && dst.disp() == 0)) {
++      store_check(masm, dst.base(), dst);
++    } else {
++      __ lea(tmp1, dst);
++      store_check(masm, tmp1, dst);
++    }
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
++protected:
++  void store_check(MacroAssembler* masm, Register obj, Address dst);
++
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register addr, Register count, Register tmp,
++                                                RegSet saved_regs);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++};
++
++#endif // CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++#define __ masm->
++
++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register dst, Register count, RegSet saved_regs) {
++  if (is_oop) {
++    gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs);
++  }
++}
++
++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register dst, Register count, Register scratch, RegSet saved_regs) {
++  if (is_oop) {
++    gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch, saved_regs);
++  }
++}
++
++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  if (type == T_OBJECT || type == T_ARRAY) {
++    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  } else {
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++
++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
++// accesses, which are overridden in the concrete BarrierSetAssembler.
++
++class ModRefBarrierSetAssembler: public BarrierSetAssembler {
++protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                               Register addr, Register count, RegSet saved_regs) {}
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register addr, Register count, Register tmp, RegSet saved_regs) {}
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, RegSet saved_regs);
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch, RegSet saved_regs);
++
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++};
++
++#endif // CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP
++#define CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP
++// Size of LoongArch Instructions
++const int BytesPerInstWord = 4;
++
++const int StackAlignmentInBytes = (2*wordSize);
++
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are properly extended to 64 bits.
++// If set, SharedRuntime::c_calling_convention() must adapt
++// signatures accordingly.
++const bool CCallingConventionRequiresIntsAsLongs = false;
++
++#define SUPPORTS_NATIVE_CX8
++
++// FIXME: LA
++// This makes the games we play when patching difficult, so when we
++// come across an access that needs patching we deoptimize.  There are
++// ways we can avoid this, but these would slow down C1-compiled code
++// in the default case.  We could revisit this decision if we get any
++// evidence that it's worth doing.
++#define DEOPTIMIZE_WHEN_PATCHING
++
++#define SUPPORT_RESERVED_STACK_AREA
++
++#define THREAD_LOCAL_POLL
++
++#endif // CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/globals_loongarch.hpp b/src/hotspot/cpu/loongarch/globals_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/globals_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/globals_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,109 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, ShareVtableStubs,         true);
++define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
++
++define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
++define_pd_global(bool, TrapBasedNullChecks,      false); // Not needed on x86.
++define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs passed to check cast
++
++define_pd_global(uintx, CodeCacheSegmentSize,    64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
++define_pd_global(intx, CodeEntryAlignment,       16);
++define_pd_global(intx, OptoLoopAlignment,        16);
++define_pd_global(intx, InlineFrequencyCount,     100);
++define_pd_global(intx, InlineSmallCode,          2000);
++
++#define DEFAULT_STACK_YELLOW_PAGES (2)
++#define DEFAULT_STACK_RED_PAGES (1)
++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4))
++#define DEFAULT_STACK_RESERVED_PAGES (1)
++
++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
++#define MIN_STACK_RED_PAGES    DEFAULT_STACK_RED_PAGES
++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
++#define MIN_STACK_RESERVED_PAGES (0)
++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
++
++define_pd_global(intx, StackYellowPages, 2);
++define_pd_global(intx, StackRedPages, 1);
++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
++
++define_pd_global(bool, RewriteBytecodes,     true);
++define_pd_global(bool, RewriteFrequentPairs, true);
++define_pd_global(bool, UseMembar,            true);
++// GC Ergo Flags
++define_pd_global(intx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
++
++define_pd_global(uintx, TypeProfileLevel, 111);
++
++define_pd_global(bool, CompactStrings, true);
++
++define_pd_global(bool, PreserveFramePointer, false);
++
++define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
++
++define_pd_global(bool, ThreadLocalHandshakes, true);
++// Only c2 cares about this at the moment
++define_pd_global(intx, AllocatePrefetchStyle,        2);
++define_pd_global(intx, AllocatePrefetchDistance,     -1);
++
++#define ARCH_FLAGS(develop, \
++                   product, \
++                   diagnostic, \
++                   experimental, \
++                   notproduct, \
++                   range, \
++                   constraint, \
++                   writeable) \
++                                                                            \
++  product(bool, UseCodeCacheAllocOpt, true,                                 \
++                "Allocate code cache within 32-bit memory address space")   \
++                                                                            \
++  product(bool, UseLSX, false,                                              \
++                "Use LSX 128-bit vector instructions")                      \
++                                                                            \
++  product(bool, UseLASX, false,                                             \
++                "Use LASX 256-bit vector instructions")                     \
++                                                                            \
++  product(bool, UseBarriersForVolatile, false,                              \
++          "Use memory barriers to implement volatile accesses")             \
++                                                                            \
++  product(bool, UseCRC32, false,                                            \
++          "Use CRC32 instructions for CRC32 computation")                   \
++                                                                            \
++  product(bool, UseActiveCoresMP, false,                                    \
++                "Eliminate barriers for single active cpu")
++
++#endif // CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/icache_loongarch.cpp b/src/hotspot/cpu/loongarch/icache_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/icache_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/icache_loongarch.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
++
++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub)
++{
++#define __ _masm->
++  StubCodeMark mark(this, "ICache", "flush_icache_stub");
++  address start = __ pc();
++
++  __ ibar(0);
++  __ ori(V0, A2, 0);
++  __ jr(RA);
++
++  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
++#undef __
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/icache_loongarch.hpp b/src/hotspot/cpu/loongarch/icache_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/icache_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/icache_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_ICACHE_LOONGARCH_HPP
++#define CPU_LOONGARCH_ICACHE_LOONGARCH_HPP
++
++// Interface for updating the instruction cache.  Whenever the VM modifies
++// code, part of the processor instruction cache potentially has to be flushed.
++
++class ICache : public AbstractICache {
++ public:
++  enum {
++    stub_size      = 3 * BytesPerInstWord, // Size of the icache flush stub in bytes
++    line_size      = 32,                   // flush instruction affects a dword
++    log2_line_size = 5                     // log2(line_size)
++  };
++};
++
++#endif // CPU_LOONGARCH_ICACHE_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/icBuffer.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/bytecodes.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/oop.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++int InlineCacheBuffer::ic_stub_code_size() {
++  return NativeMovConstReg::instruction_size +
++         NativeGeneralJump::instruction_size +
++         1;
++  // so that code_end can be set in CodeBuffer
++  // 64bit 15 = 6 + 8 bytes + 1 byte
++  // 32bit 7 = 2 + 4 bytes + 1 byte
++}
++
++
++// we use T1 as cached oop(klass) now. this is the target of virtual call,
++// when reach here, the receiver in T0
++// refer to shareRuntime_loongarch.cpp,gen_i2c2i_adapters
++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value,
++                                                address entry_point) {
++  ResourceMark rm;
++  CodeBuffer code(code_begin, ic_stub_code_size());
++  MacroAssembler* masm = new MacroAssembler(&code);
++  // note: even though the code contains an embedded oop, we do not need reloc info
++  // because
++  // (1) the oop is old (i.e., doesn't matter for scavenges)
++  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
++  //  assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop");
++#define __ masm->
++  __ patchable_li52(T1, (long)cached_value);
++  // TODO: confirm reloc
++  __ jmp(entry_point, relocInfo::runtime_call_type);
++  __ flush();
++#undef __
++}
++
++
++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  return jump->jump_destination();
++}
++
++
++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
++  // creation also verifies the object
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);
++  // Verifies the jump
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  void* o= (void*)move->data();
++  return o;
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,2043 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interp_masm_loongarch.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/markOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// Implementation of InterpreterMacroAssembler
++
++#ifdef CC_INTERP
++void InterpreterMacroAssembler::get_method(Register reg) {
++}
++#endif // CC_INTERP
++
++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
++  if (UseUnalignedAccesses) {
++    ld_hu(reg, BCP, offset);
++  } else {
++    ld_bu(reg, BCP, offset);
++    ld_bu(tmp, BCP, offset + 1);
++    bstrins_d(reg, tmp, 15, 8);
++  }
++}
++
++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, int offset) {
++  if (UseUnalignedAccesses) {
++    ld_wu(reg, BCP, offset);
++  } else {
++    ldr_w(reg, BCP, offset);
++    ldl_w(reg, BCP, offset + 3);
++    lu32i_d(reg, 0);
++  }
++}
++
++void InterpreterMacroAssembler::jump_to_entry(address entry) {
++  assert(entry, "Entry must have been generated by now");
++  jmp(entry);
++}
++
++#ifndef CC_INTERP
++
++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
++                                                  int number_of_arguments) {
++  // interpreter specific
++  //
++  // Note: No need to save/restore bcp & locals pointer
++  //       since these are callee saved registers and no blocking/
++  //       GC can happen in leaf calls.
++  // Further Note: DO NOT save/restore bcp/locals. If a caller has
++  // already saved them so that it can use BCP/LVP as temporaries
++  // then a save/restore here will DESTROY the copy the caller
++  // saved! There used to be a save_bcp() that only happened in
++  // the ASSERT path (no restore_bcp). Which caused bizarre failures
++  // when jvm built with ASSERTs.
++#ifdef ASSERT
++  save_bcp();
++  {
++    Label L;
++    ld_d(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT,R0,L);
++    stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL");
++    bind(L);
++  }
++#endif
++  // super call
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
++  // interpreter specific
++  // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals
++  // but since they may not have been saved (and we don't want to
++  // save them here (see note above) the assert is invalid.
++}
++
++void InterpreterMacroAssembler::call_VM_base(Register oop_result,
++                                             Register java_thread,
++                                             Register last_java_sp,
++                                             address  entry_point,
++                                             int      number_of_arguments,
++                                             bool     check_exceptions) {
++  // interpreter specific
++  //
++  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
++  //       really make a difference for these runtime calls, since they are
++  //       slow anyway. Btw., bcp must be saved/restored since it may change
++  //       due to GC.
++  assert(java_thread == noreg , "not expecting a precomputed java thread");
++  save_bcp();
++#ifdef ASSERT
++  {
++    Label L;
++    ld_d(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT, R0, L);
++    stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL");
++    bind(L);
++  }
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp,
++                               entry_point, number_of_arguments,
++                               check_exceptions);
++  // interpreter specific
++  restore_bcp();
++  restore_locals();
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
++  if (JvmtiExport::can_pop_frame()) {
++    Label L;
++    // Initiate popframe handling only if it is not already being
++    // processed.  If the flag has the popframe_processing bit set, it
++    // means that this code is called *during* popframe handling - we
++    // don't want to reenter.
++    // This method is only called just after the call into the vm in
++    // call_VM_base, so the arg registers are available.
++    // Not clear if any other register is available, so load AT twice
++    assert(AT != java_thread, "check");
++    ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_pending_bit);
++    beq(AT, R0, L);
++
++    ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_processing_bit);
++    bne(AT, R0, L);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
++    jr(V0);
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++  const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset()));
++  const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++  //V0, oop_addr,V1,val_addr
++  switch (state) {
++    case atos:
++      ld_ptr(V0, oop_addr);
++      st_ptr(R0, oop_addr);
++      verify_oop(V0, state);
++      break;
++    case ltos:
++      ld_ptr(V0, val_addr);               // fall through
++      break;
++    case btos:                                     // fall through
++    case ztos:                                     // fall through
++    case ctos:                                     // fall through
++    case stos:                                     // fall through
++    case itos:
++      ld_w(V0, val_addr);
++      break;
++    case ftos:
++      fld_s(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case dtos:
++      fld_d(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case vtos: /* nothing to do */                    break;
++    default  : ShouldNotReachHere();
++  }
++  // Clean up tos value in the thread object
++  li(AT, (int)ilgl);
++  st_w(AT, tos_addr);
++  st_w(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
++  if (JvmtiExport::can_force_early_return()) {
++    Label L;
++    Register tmp = T4;
++
++    assert(java_thread != AT, "check");
++    assert(java_thread != tmp, "check");
++    ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    beq(AT, R0, L);
++
++    // Initiate earlyret handling only if it is not already being processed.
++    // If the flag has the earlyret_processing bit set, it means that this code
++    // is called *during* earlyret handling - we don't want to reenter.
++    ld_w(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++    li(tmp, JvmtiThreadState::earlyret_pending);
++    bne(tmp, AT, L);
++
++    // Call Interpreter::remove_activation_early_entry() to get the address of the
++    // same-named entrypoint in the generated interpreter code.
++    ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    ld_w(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++    move(A0, AT);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0);
++    jr(V0);
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg,
++                                                                 int bcp_offset) {
++  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
++  ld_bu(AT, BCP, bcp_offset);
++  ld_bu(reg, BCP, bcp_offset + 1);
++  bstrins_w(reg, AT, 15, 8);
++}
++
++
++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
++                                                       int bcp_offset,
++                                                       size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  if (index_size == sizeof(u2)) {
++    get_2_byte_integer_at_bcp(index, AT, bcp_offset);
++  } else if (index_size == sizeof(u4)) {
++    get_4_byte_integer_at_bcp(index, bcp_offset);
++    // Check if the secondary index definition is still ~x, otherwise
++    // we have to change the following assembler code to calculate the
++    // plain index.
++    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
++    nor(index, index, R0);
++    slli_w(index, index, 0);
++  } else if (index_size == sizeof(u1)) {
++    ld_bu(index, BCP, bcp_offset);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
++                                                           Register index,
++                                                           int bcp_offset,
++                                                           size_t index_size) {
++  assert_different_registers(cache, index);
++  get_cache_index_at_bcp(index, bcp_offset, index_size);
++  ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line");
++  shl(index, 2);
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
++                                                                        Register index,
++                                                                        Register bytecode,
++                                                                        int byte_no,
++                                                                        int bcp_offset,
++                                                                        size_t index_size) {
++  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
++  // We use a 32-bit load here since the layout of 64-bit words on
++  // little-endian machines allow us that.
++  alsl_d(AT, index, cache, Address::times_ptr - 1);
++  ld_w(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
++  if(os::is_MP()) {
++    membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore));
++  }
++
++  const int shift_count = (1 + byte_no) * BitsPerByte;
++  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
++         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
++         "correct shift count");
++  srli_d(bytecode, bytecode, shift_count);
++  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
++  li(AT, ConstantPoolCacheEntry::bytecode_1_mask);
++  andr(bytecode, bytecode, AT);
++}
++
++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
++                                                               Register tmp,
++                                                               int bcp_offset,
++                                                               size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  assert(cache != tmp, "must use different register");
++  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // convert from field index to ConstantPoolCacheEntry index
++  // and from word offset to byte offset
++  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
++  shl(tmp, 2 + LogBytesPerWord);
++  ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  // skip past the header
++  addi_d(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
++  add_d(cache, cache, tmp);
++}
++
++void InterpreterMacroAssembler::get_method_counters(Register method,
++                                                    Register mcs, Label& skip) {
++  Label has_counters;
++  ld_d(mcs, method, in_bytes(Method::method_counters_offset()));
++  bne(mcs, R0, has_counters);
++  call_VM(noreg, CAST_FROM_FN_PTR(address,
++          InterpreterRuntime::build_method_counters), method);
++  ld_d(mcs, method, in_bytes(Method::method_counters_offset()));
++  beq(mcs, R0, skip);   // No MethodCounters allocated, OutOfMemory
++  bind(has_counters);
++}
++
++// Load object from cpool->resolved_references(index)
++void InterpreterMacroAssembler::load_resolved_reference_at_index(
++                                           Register result, Register index, Register tmp) {
++  assert_different_registers(result, index);
++  // convert from field index to resolved_references() index and from
++  // word index to byte offset. Since this is a java object, it can be compressed
++  shl(index, LogBytesPerHeapOop);
++
++  get_constant_pool(result);
++  // load pointer for resolved_references[] objArray
++  ld_d(result, result, ConstantPool::cache_offset_in_bytes());
++  ld_d(result, result, ConstantPoolCache::resolved_references_offset_in_bytes());
++  resolve_oop_handle(result, tmp);
++  // Add in the index
++  add_d(result, result, index);
++  load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp);
++}
++
++// load cpool->resolved_klass_at(index)
++void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool,
++                                           Register index, Register klass) {
++  alsl_d(AT, index, cpool, Address::times_ptr - 1);
++  ld_h(index, AT, sizeof(ConstantPool));
++  Register resolved_klasses = cpool;
++  ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes()));
++  alsl_d(AT, index, resolved_klasses, Address::times_ptr - 1);
++  ld_d(klass, AT, Array<Klass*>::base_offset_in_bytes());
++}
++
++// Resets LVP to locals.  Register sub_klass cannot be any of the above.
++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) {
++
++  assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" );
++  assert( Rsub_klass != T1, "T1 holds 2ndary super array length" );
++  assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" );
++  // Profile the not-null value's klass.
++  // Here T4 and T1 are used as temporary registers.
++  profile_typecheck(T4, Rsub_klass, T1); // blows T4, reloads T1
++
++  // Do the check.
++  check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1
++
++  // Profile the failure of the check.
++  profile_typecheck_failed(T4); // blows T4
++
++}
++
++
++
++// Java Expression Stack
++
++void InterpreterMacroAssembler::pop_ptr(Register r) {
++  ld_d(r, SP, 0);
++  addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_i(Register r) {
++  ld_w(r, SP, 0);
++  addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_l(Register r) {
++  ld_d(r, SP, 0);
++  addi_d(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_f(FloatRegister r) {
++  fld_s(r, SP, 0);
++  addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_d(FloatRegister r) {
++  fld_d(r, SP, 0);
++  addi_d(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_ptr(Register r) {
++  addi_d(SP, SP, - Interpreter::stackElementSize);
++  st_d(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_i(Register r) {
++  // For compatibility reason, don't change to sw.
++  addi_d(SP, SP, - Interpreter::stackElementSize);
++  st_d(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_l(Register r) {
++  addi_d(SP, SP, -2 * Interpreter::stackElementSize);
++  st_d(r, SP, 0);
++  st_d(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_f(FloatRegister r) {
++  addi_d(SP, SP, - Interpreter::stackElementSize);
++  fst_s(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_d(FloatRegister r) {
++  addi_d(SP, SP, -2 * Interpreter::stackElementSize);
++  fst_d(r, SP, 0);
++  st_d(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop(TosState state) {
++  switch (state) {
++    case atos: pop_ptr();           break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: pop_i();             break;
++    case ltos: pop_l();             break;
++    case ftos: pop_f();             break;
++    case dtos: pop_d();             break;
++    case vtos: /* nothing to do */  break;
++    default:   ShouldNotReachHere();
++  }
++  verify_oop(FSR, state);
++}
++
++//FSR=V0,SSR=V1
++void InterpreterMacroAssembler::push(TosState state) {
++  verify_oop(FSR, state);
++  switch (state) {
++    case atos: push_ptr();          break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: push_i();            break;
++    case ltos: push_l();            break;
++    case ftos: push_f();            break;
++    case dtos: push_d();            break;
++    case vtos: /* nothing to do */  break;
++    default  : ShouldNotReachHere();
++  }
++}
++
++void InterpreterMacroAssembler::load_ptr(int n, Register val) {
++  ld_d(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++void InterpreterMacroAssembler::store_ptr(int n, Register val) {
++  st_d(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++// Jump to from_interpreted entry of a call unless single stepping is possible
++// in this thread in which case we must call the i2i entry
++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
++  // record last_sp
++  move(Rsender, SP);
++  st_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++#ifndef OPT_THREAD
++    Register thread = temp;
++    get_thread(temp);
++#else
++    Register thread = TREG;
++#endif
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    ld_w(AT, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(AT, R0, run_compiled_code);
++    ld_d(AT, method, in_bytes(Method::interpreter_entry_offset()));
++    jr(AT);
++    bind(run_compiled_code);
++  }
++
++  ld_d(AT, method, in_bytes(Method::from_interpreted_offset()));
++  jr(AT);
++}
++
++
++// The following two routines provide a hook so that an implementation
++// can schedule the dispatch in two parts. LoongArch64 does not do this.
++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
++  // Nothing LoongArch64 specific to be done here
++}
++
++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
++  dispatch_next(state, step);
++}
++
++// assume the next bytecode in T8.
++void InterpreterMacroAssembler::dispatch_base(TosState state,
++                                              address* table,
++                                              bool verifyoop,
++                                              bool generate_poll) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++
++  if (VerifyActivationFrameSize) {
++    Label L;
++
++    sub_d(T2, FP, SP);
++    int min_frame_size = (frame::java_frame_link_offset -
++      frame::interpreter_frame_initial_sp_offset) * wordSize;
++    addi_d(T2, T2, -min_frame_size);
++    bge(T2, R0, L);
++    stop("broken stack frame");
++    bind(L);
++  }
++  // FIXME: I do not know which register should pass to verify_oop
++  if (verifyoop) verify_oop(FSR, state);
++
++  Label safepoint;
++  address* const safepoint_table = Interpreter::safept_table(state);
++  bool needs_thread_local_poll = generate_poll &&
++    SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
++
++  if (needs_thread_local_poll) {
++    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
++    ld_d(T3, thread, in_bytes(Thread::polling_page_offset()));
++    andi(T3, T3, SafepointMechanism::poll_bit());
++    bne(T3, R0, safepoint);
++  }
++
++  if((long)table >= (long)Interpreter::dispatch_table(btos) &&
++     (long)table <= (long)Interpreter::dispatch_table(vtos)) {
++    int table_size = (long)Interpreter::dispatch_table(itos) -
++                     (long)Interpreter::dispatch_table(stos);
++    int table_offset = ((int)state - (int)itos) * table_size;
++
++    // S8 points to the starting address of Interpreter::dispatch_table(itos).
++    // See StubGenerator::generate_call_stub(address& return_address) for the initialization of S8.
++    if (table_offset != 0) {
++      if (is_simm(table_offset, 12)) {
++        alsl_d(T3, Rnext, S8, LogBytesPerWord - 1);
++        ld_d(T3, T3, table_offset);
++      } else {
++        li(T2, table_offset);
++        alsl_d(T3, Rnext, S8, LogBytesPerWord - 1);
++        ldx_d(T3, T2, T3);
++      }
++    } else {
++      slli_d(T2, Rnext, LogBytesPerWord);
++      ldx_d(T3, S8, T2);
++    }
++  } else {
++    li(T3, (long)table);
++    slli_d(T2, Rnext, LogBytesPerWord);
++    ldx_d(T3, T2, T3);
++  }
++  jr(T3);
++
++  if (needs_thread_local_poll) {
++    bind(safepoint);
++    li(T3, (long)safepoint_table);
++    slli_d(T2, Rnext, LogBytesPerWord);
++    ldx_d(T3, T3, T2);
++    jr(T3);
++  }
++}
++
++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) {
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state));
++}
++
++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state), false);
++}
++
++
++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
++  // load next bytecode
++  ld_bu(Rnext, BCP, step);
++  increment(BCP, step);
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
++  // load current bytecode
++  ld_bu(Rnext, BCP, 0);
++  dispatch_base(state, table);
++}
++
++// remove activation
++//
++// Unlock the receiver if this is a synchronized method.
++// Unlock any Java monitors from syncronized blocks.
++// Remove the activation from the stack.
++//
++// If there are locked Java monitors
++//    If throw_monitor_exception
++//       throws IllegalMonitorStateException
++//    Else if install_monitor_exception
++//       installs IllegalMonitorStateException
++//    Else
++//       no error processing
++// used registers : T1, T2, T3, T8
++// T1 : thread, method access flags
++// T2 : monitor entry pointer
++// T3 : method, monitor top
++// T8 : unlock flag
++void InterpreterMacroAssembler::remove_activation(
++        TosState state,
++        Register ret_addr,
++        bool throw_monitor_exception,
++        bool install_monitor_exception,
++  bool notify_jvmdi) {
++  // Note: Registers V0, V1 and F0, F1 may be in use for the result
++  // check if synchronized method
++  Label unlocked, unlock, no_unlock;
++
++  // get the value of _do_not_unlock_if_synchronized into T8
++#ifndef OPT_THREAD
++  Register thread = T1;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  ld_b(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // reset the flag
++  st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // get method access flags
++  ld_d(T3, FP, frame::interpreter_frame_method_offset * wordSize);
++  ld_w(T1, T3, in_bytes(Method::access_flags_offset()));
++  andi(T1, T1, JVM_ACC_SYNCHRONIZED);
++  beq(T1, R0, unlocked);
++
++  // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set.
++  bne(T8, R0, no_unlock);
++  // unlock monitor
++  push(state); // save result
++
++  // BasicObjectLock will be first in list, since this is a
++  // synchronized method. However, need to check that the object has
++  // not been unlocked by an explicit monitorexit bytecode.
++  addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize
++      - (int)sizeof(BasicObjectLock));
++  // address of first monitor
++  ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  bne(T1, R0, unlock);
++  pop(state);
++  if (throw_monitor_exception) {
++    // Entry already unlocked, need to throw exception
++    // I think LA do not need empty_FPU_stack
++    // remove possible return value from FPU-stack, otherwise stack could overflow
++    empty_FPU_stack();
++    call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::throw_illegal_monitor_state_exception));
++    should_not_reach_here();
++  } else {
++    // Monitor already unlocked during a stack unroll. If requested,
++    // install an illegal_monitor_state_exception.  Continue with
++    // stack unrolling.
++    if (install_monitor_exception) {
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::new_illegal_monitor_state_exception));
++
++    }
++
++    b(unlocked);
++  }
++
++  bind(unlock);
++  unlock_object(c_rarg0);
++  pop(state);
++
++  // Check that for block-structured locking (i.e., that all locked
++  // objects has been unlocked)
++  bind(unlocked);
++
++  // V0, V1: Might contain return value
++
++  // Check that all monitors are unlocked
++  {
++    Label loop, exception, entry, restart;
++    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++    const Address monitor_block_top(FP,
++        frame::interpreter_frame_monitor_block_top_offset * wordSize);
++
++    bind(restart);
++    // points to current entry, starting with top-most entry
++    ld_d(c_rarg0, monitor_block_top);
++    // points to word before bottom of monitor block
++    addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    b(entry);
++
++    // Entry already locked, need to throw exception
++    bind(exception);
++
++    if (throw_monitor_exception) {
++      // Throw exception
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::throw_illegal_monitor_state_exception));
++      should_not_reach_here();
++    } else {
++      // Stack unrolling. Unlock object and install illegal_monitor_exception
++      // Unlock does not block, so don't have to worry about the frame
++      // We don't have to preserve c_rarg0, since we are going to
++      // throw an exception
++
++      push(state);
++      unlock_object(c_rarg0);
++      pop(state);
++
++      if (install_monitor_exception) {
++        empty_FPU_stack();
++        call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                        InterpreterRuntime::new_illegal_monitor_state_exception));
++      }
++
++      b(restart);
++    }
++
++    bind(loop);
++    ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    bne(T1, R0, exception);// check if current entry is used
++
++    addi_d(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry
++    bind(entry);
++    bne(c_rarg0, T3, loop);  // check if bottom reached
++  }
++
++  bind(no_unlock);
++
++  // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame)
++  if (notify_jvmdi) {
++    notify_method_exit(state, NotifyJVMTI); // preserve TOSCA
++  } else {
++    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
++  }
++
++  // remove activation
++  ld_d(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  if (StackReservedPages > 0) {
++    // testing if reserved zone needs to be re-enabled
++    Label no_reserved_zone_enabling;
++
++    ld_d(AT, Address(thread, JavaThread::reserved_stack_activation_offset()));
++    sub_d(AT, TSR, AT);
++    bge(R0, AT, no_reserved_zone_enabling);
++
++    call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                   InterpreterRuntime::throw_delayed_StackOverflowError));
++    should_not_reach_here();
++
++    bind(no_reserved_zone_enabling);
++  }
++  ld_d(ret_addr, FP, frame::java_frame_return_addr_offset * wordSize);
++  ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize);
++  move(SP, TSR); // set sp to sender sp
++}
++
++#endif // CC_INTERP
++
++// Lock object
++//
++// Args:
++//      c_rarg0: BasicObjectLock to be used for locking
++//
++// Kills:
++//      T1
++//      T2
++void InterpreterMacroAssembler::lock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++  } else {
++    Label done, slow_case;
++    const Register tmp_reg = T2;
++    const Register scr_reg = T1;
++    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
++    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
++    const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes();
++
++    // Load object pointer into scr_reg
++    ld_d(scr_reg, lock_reg, obj_offset);
++
++    if (UseBiasedLocking) {
++      // Note: we use noreg for the temporary register since it's hard
++      // to come up with a free register on all incoming code paths
++      biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case);
++    }
++
++    // Load (object->mark() | 1) into tmp_reg
++    ld_d(AT, scr_reg, 0);
++    ori(tmp_reg, AT, 1);
++
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    st_d(tmp_reg, lock_reg, mark_offset);
++
++    assert(lock_offset == 0, "displached header must be first word in BasicObjectLock");
++
++    if (PrintBiasedLockingStatistics) {
++      Label succ, fail;
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail);
++      bind(succ);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++      b(done);
++      bind(fail);
++    } else {
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done);
++    }
++
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) SP <= mark < SP + os::pagesize()
++    //
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in tmp_reg as the result of cmpxchg
++    sub_d(tmp_reg, tmp_reg, SP);
++    li(AT, 7 - os::vm_page_size());
++    andr(tmp_reg, tmp_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    st_d(tmp_reg, lock_reg, mark_offset);
++    if (PrintBiasedLockingStatistics) {
++      bnez(tmp_reg, slow_case);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++    }
++    beqz(tmp_reg, done);
++
++    bind(slow_case);
++    // Call the runtime routine for slow case
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++
++    bind(done);
++  }
++}
++
++// Unlocks an object. Used in monitorexit bytecode and
++// remove_activation.  Throws an IllegalMonitorException if object is
++// not locked by current thread.
++//
++// Args:
++//      c_rarg0: BasicObjectLock for lock
++//
++// Kills:
++//      T1
++//      T2
++//      T3
++// Throw an IllegalMonitorException if object is not locked by current thread
++void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
++  } else {
++    Label done;
++    const Register tmp_reg = T1;
++    const Register scr_reg = T2;
++    const Register hdr_reg = T3;
++
++    save_bcp(); // Save in case of exception
++
++    // Convert from BasicObjectLock structure to object and BasicLock structure
++    // Store the BasicLock address into tmp_reg
++    addi_d(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes());
++
++    // Load oop into scr_reg
++    ld_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    // free entry
++    st_d(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    if (UseBiasedLocking) {
++      biased_locking_exit(scr_reg, hdr_reg, done);
++    }
++
++    // Load the old header from BasicLock structure
++    ld_d(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes());
++    // zero for recursive case
++    beqz(hdr_reg, done);
++
++    // Atomic swap back the old header
++    cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done);
++
++    // Call the runtime routine for slow case.
++    st_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj
++    call_VM(NOREG,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++            lock_reg);
++
++    bind(done);
++
++    restore_bcp();
++  }
++}
++
++#ifndef CC_INTERP
++
++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
++                                                         Label& zero_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ld_d(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++  beq(mdp, R0, zero_continue);
++}
++
++
++// Set the method data pointer for the current bcp.
++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Label set_mdp;
++
++  // V0 and T0 will be used as two temporary registers.
++  push2(V0, T0);
++
++  get_method(T0);
++  // Test MDO to avoid the call if it is NULL.
++  ld_d(V0, T0, in_bytes(Method::method_data_offset()));
++  beq(V0, R0, set_mdp);
++
++  // method: T0
++  // bcp: BCP --> S0
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP);
++  // mdi: V0
++  // mdo is guaranteed to be non-zero here, we checked for it before the call.
++  get_method(T0);
++  ld_d(T0, T0, in_bytes(Method::method_data_offset()));
++  addi_d(T0, T0, in_bytes(MethodData::data_offset()));
++  add_d(V0, T0, V0);
++  bind(set_mdp);
++  st_d(V0, FP, frame::interpreter_frame_mdp_offset * wordSize);
++  pop2(V0, T0);
++}
++
++void InterpreterMacroAssembler::verify_method_data_pointer() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++#ifdef ASSERT
++  Label verify_continue;
++  Register method = T5;
++  Register mdp = T6;
++  Register tmp = A0;
++  push(method);
++  push(mdp);
++  push(tmp);
++  test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue
++  get_method(method);
++
++  // If the mdp is valid, it will point to a DataLayout header which is
++  // consistent with the bcp.  The converse is highly probable also.
++  ld_hu(tmp, mdp, in_bytes(DataLayout::bci_offset()));
++  ld_d(AT, method, in_bytes(Method::const_offset()));
++  add_d(tmp, tmp, AT);
++  addi_d(tmp, tmp, in_bytes(ConstMethod::codes_offset()));
++  beq(tmp, BCP, verify_continue);
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp);
++  bind(verify_continue);
++  pop(tmp);
++  pop(mdp);
++  pop(method);
++#endif // ASSERT
++}
++
++
++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
++                                                int constant,
++                                                Register value) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Address data(mdp_in, constant);
++  st_d(value, data);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      int constant,
++                                                      bool decrement) {
++  // Counter address
++  Address data(mdp_in, constant);
++
++  increment_mdp_data_at(data, decrement);
++}
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Address data,
++                                                      bool decrement) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  // %%% this does 64bit counters at best it is wasting space
++  // at worst it is a rare bug when counters overflow
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Decrement the register.
++    ld_d(AT, data);
++    sltu(tmp, R0, AT);
++    sub_d(AT, AT, tmp);
++    st_d(AT, data);
++  } else {
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Increment the register.
++    ld_d(AT, data);
++    addi_d(tmp, AT, DataLayout::counter_increment);
++    sltu(tmp, R0, tmp);
++    add_d(AT, AT, tmp);
++    st_d(AT, data);
++  }
++  pop(tmp);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      Register reg,
++                                                      int constant,
++                                                      bool decrement) {
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !");
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Decrement the register.
++    add_d(tmp, mdp_in, reg);
++    ld_d(AT, tmp, constant);
++    sltu(tmp, R0, AT);
++    sub_d(AT, AT, tmp);
++    add_d(tmp, mdp_in, reg);
++    st_d(AT, tmp, constant);
++  } else {
++    assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !");
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Increment the register.
++    add_d(tmp, mdp_in, reg);
++    ld_d(AT, tmp, constant);
++    addi_d(tmp, AT, DataLayout::counter_increment);
++    sltu(tmp, R0, tmp);
++    add_d(AT, AT, tmp);
++    add_d(tmp, mdp_in, reg);
++    st_d(AT, tmp, constant);
++  }
++  pop(tmp);
++}
++
++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
++                                                int flag_byte_constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  int header_offset = in_bytes(DataLayout::header_offset());
++  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
++  // Set the flag
++  ld_w(AT, Address(mdp_in, header_offset));
++  if(Assembler::is_simm(header_bits, 12)) {
++    ori(AT, AT, header_bits);
++  } else {
++    push(T8);
++    // T8 is used as a temporary register.
++    li(T8, header_bits);
++    orr(AT, AT, T8);
++    pop(T8);
++  }
++  st_w(AT, Address(mdp_in, header_offset));
++}
++
++
++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
++                                                 int offset,
++                                                 Register value,
++                                                 Register test_value_out,
++                                                 Label& not_equal_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if (test_value_out == noreg) {
++    ld_d(AT, Address(mdp_in, offset));
++    bne(AT, value, not_equal_continue);
++  } else {
++    // Put the test value into a register, so caller can use it:
++    ld_d(test_value_out, Address(mdp_in, offset));
++    bne(value, test_value_out, not_equal_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12");
++  ld_d(AT, mdp_in, offset_of_disp);
++  add_d(mdp_in, mdp_in, AT);
++  st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     Register reg,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  add_d(AT, reg, mdp_in);
++  assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12");
++  ld_d(AT, AT, offset_of_disp);
++  add_d(mdp_in, mdp_in, AT);
++  st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
++                                                       int constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if(Assembler::is_simm(constant, 12)) {
++    addi_d(mdp_in, mdp_in, constant);
++  } else {
++    li(AT, constant);
++    add_d(mdp_in, mdp_in, AT);
++  }
++  st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  push(return_bci); // save/restore across call_VM
++  call_VM(noreg,
++          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
++          return_bci);
++  pop(return_bci);
++}
++
++
++void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
++                                                     Register bumped_count) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    // Otherwise, assign to mdp
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the taken count.
++    // We inline increment_mdp_data_at to return bumped_count in a register
++    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
++    ld_d(bumped_count, mdp, in_bytes(JumpData::taken_offset()));
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    addi_d(AT, bumped_count, DataLayout::counter_increment);
++    sltu(AT, R0, AT);
++    add_d(bumped_count, bumped_count, AT);
++    st_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the not taken count.
++    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
++
++    // The method data pointer needs to be updated to correspond to
++    // the next bytecode
++    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_final_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
++                                                     Register mdp,
++                                                     Register reg2,
++                                                     bool receiver_can_be_null) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label skip_receiver_profile;
++    if (receiver_can_be_null) {
++      Label not_null;
++      bnez(receiver, not_null);
++      // We are making a call.  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++      b(skip_receiver_profile);
++      bind(not_null);
++    }
++
++    // Record the receiver type.
++    record_klass_in_profile(receiver, mdp, reg2, true);
++    bind(skip_receiver_profile);
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++#if INCLUDE_JVMCI
++void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) {
++  assert_different_registers(method, mdp, reg2);
++  if (ProfileInterpreter && MethodProfileWidth > 0) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label done;
++    record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth,
++      &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset()));
++    bind(done);
++
++    update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++#endif // INCLUDE_JVMCI
++
++// This routine creates a state machine for updating the multi-row
++// type profile at a virtual call site (or other type-sensitive bytecode).
++// The machine visits each row (of receiver/count) until the receiver type
++// is found, or until it runs out of rows.  At the same time, it remembers
++// the location of the first empty row.  (An empty row records null for its
++// receiver, and can be allocated for a newly-observed receiver type.)
++// Because there are two degrees of freedom in the state, a simple linear
++// search will not work; it must be a decision tree.  Hence this helper
++// function is recursive, to generate the required tree structured code.
++// It's the interpreter, so we are trading off code space for speed.
++// See below for example code.
++void InterpreterMacroAssembler::record_klass_in_profile_helper(
++                                        Register receiver, Register mdp,
++                                        Register reg2, int start_row,
++                                        Label& done, bool is_virtual_call) {
++  if (TypeProfileWidth == 0) {
++    if (is_virtual_call) {
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++    }
++#if INCLUDE_JVMCI
++    else if (EnableJVMCI) {
++      increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()));
++    }
++#endif // INCLUDE_JVMCI
++  } else {
++    int non_profiled_offset = -1;
++    if (is_virtual_call) {
++      non_profiled_offset = in_bytes(CounterData::count_offset());
++    }
++#if INCLUDE_JVMCI
++    else if (EnableJVMCI) {
++      non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset());
++    }
++#endif // INCLUDE_JVMCI
++
++    record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
++        &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
++  }
++}
++
++void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp,
++                                        Register reg2, int start_row, Label& done, int total_rows,
++                                        OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
++                                        int non_profiled_offset) {
++  int last_row = total_rows - 1;
++  assert(start_row <= last_row, "must be work left to do");
++  // Test this row for both the item and for null.
++  // Take any of three different outcomes:
++  //   1. found item => increment count and goto done
++  //   2. found null => keep looking for case 1, maybe allocate this cell
++  //   3. found something else => keep looking for cases 1 and 2
++  // Case 3 is handled by a recursive call.
++  for (int row = start_row; row <= last_row; row++) {
++    Label next_test;
++    bool test_for_null_also = (row == start_row);
++
++    // See if the receiver is item[n].
++    int item_offset = in_bytes(item_offset_fn(row));
++    test_mdp_data_at(mdp, item_offset, item,
++                     (test_for_null_also ? reg2 : noreg),
++                     next_test);
++    // (Reg2 now contains the item from the CallData.)
++
++    // The receiver is item[n].  Increment count[n].
++    int count_offset = in_bytes(item_count_offset_fn(row));
++    increment_mdp_data_at(mdp, count_offset);
++    b(done);
++    bind(next_test);
++
++    if (test_for_null_also) {
++      Label found_null;
++      // Failed the equality check on item[n]...  Test for null.
++      if (start_row == last_row) {
++        // The only thing left to do is handle the null case.
++        if (non_profiled_offset >= 0) {
++          beqz(reg2, found_null);
++          // Item did not match any saved item and there is no empty row for it.
++          // Increment total counter to indicate polymorphic case.
++          increment_mdp_data_at(mdp, non_profiled_offset);
++          b(done);
++          bind(found_null);
++        } else {
++          bnez(reg2, done);
++        }
++        break;
++      }
++      // Since null is rare, make it be the branch-taken case.
++      beqz(reg2, found_null);
++
++      // Put all the "Case 3" tests here.
++      record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
++        item_offset_fn, item_count_offset_fn, non_profiled_offset);
++
++      // Found a null.  Keep searching for a matching item,
++      // but remember that this is an empty (unused) slot.
++      bind(found_null);
++    }
++  }
++
++  // In the fall-through case, we found no matching item, but we
++  // observed the item[start_row] is NULL.
++
++  // Fill in the item field and increment the count.
++  int item_offset = in_bytes(item_offset_fn(start_row));
++  set_mdp_data_at(mdp, item_offset, item);
++  int count_offset = in_bytes(item_count_offset_fn(start_row));
++  li(reg2, DataLayout::counter_increment);
++  set_mdp_data_at(mdp, count_offset, reg2);
++  if (start_row > 0) {
++    b(done);
++  }
++}
++
++// Example state machine code for three profile rows:
++//   // main copy of decision tree, rooted at row[1]
++//   if (row[0].rec == rec) { row[0].incr(); goto done; }
++//   if (row[0].rec != NULL) {
++//     // inner copy of decision tree, rooted at row[1]
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[1].rec != NULL) {
++//       // degenerate decision tree, rooted at row[2]
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       if (row[2].rec != NULL) { goto done; } // overflow
++//       row[2].init(rec); goto done;
++//     } else {
++//       // remember row[1] is empty
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       row[1].init(rec); goto done;
++//     }
++//   } else {
++//     // remember row[0] is empty
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[2].rec == rec) { row[2].incr(); goto done; }
++//     row[0].init(rec); goto done;
++//   }
++//   done:
++
++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
++                                                        Register mdp, Register reg2,
++                                                        bool is_virtual_call) {
++  assert(ProfileInterpreter, "must be profiling");
++  Label done;
++
++  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
++
++  bind (done);
++}
++
++void InterpreterMacroAssembler::profile_ret(Register return_bci,
++                                            Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++    uint row;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the total ret count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    for (row = 0; row < RetData::row_limit(); row++) {
++      Label next_test;
++
++      // See if return_bci is equal to bci[n]:
++      test_mdp_data_at(mdp,
++                       in_bytes(RetData::bci_offset(row)),
++                       return_bci, noreg,
++                       next_test);
++
++      // return_bci is equal to bci[n].  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
++
++      // The method data pointer needs to be updated to reflect the new target.
++      update_mdp_by_offset(mdp,
++                           in_bytes(RetData::bci_displacement_offset(row)));
++      b(profile_continue);
++      bind(next_test);
++    }
++
++    update_mdp_for_ret(return_bci);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
++  if (ProfileInterpreter && TypeProfileCasts) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    int count_offset = in_bytes(CounterData::count_offset());
++    // Back up the address, since we have already bumped the mdp.
++    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
++
++    // *Decrement* the counter.  We expect to see zero or small negatives.
++    increment_mdp_data_at(mdp, count_offset, true);
++
++    bind (profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++
++      // Record the object type.
++      record_klass_in_profile(klass, mdp, reg2, false);
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the default case count
++    increment_mdp_data_at(mdp,
++                          in_bytes(MultiBranchData::default_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         in_bytes(MultiBranchData::
++                                  default_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_case(Register index,
++                                                    Register mdp,
++                                                    Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Build the base (index * per_case_size_in_bytes()) +
++    // case_array_offset_in_bytes()
++    li(reg2, in_bytes(MultiBranchData::per_case_size()));
++    mul_d(index, index, reg2);
++    addi_d(index, index, in_bytes(MultiBranchData::case_array_offset()));
++
++    // Update the case count
++    increment_mdp_data_at(mdp,
++                          index,
++                          in_bytes(MultiBranchData::relative_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         index,
++                         in_bytes(MultiBranchData::
++                                  relative_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::narrow(Register result) {
++  // Get method->_constMethod->_result_type
++  ld_d(T4, FP, frame::interpreter_frame_method_offset * wordSize);
++  ld_d(T4, T4, in_bytes(Method::const_offset()));
++  ld_bu(T4, T4, in_bytes(ConstMethod::result_type_offset()));
++
++  Label done, notBool, notByte, notChar;
++
++  // common case first
++  addi_d(AT, T4, -T_INT);
++  beq(AT, R0, done);
++
++  // mask integer result to narrower return type.
++  addi_d(AT, T4, -T_BOOLEAN);
++  bne(AT, R0, notBool);
++  andi(result, result, 0x1);
++  beq(R0, R0, done);
++
++  bind(notBool);
++  addi_d(AT, T4, -T_BYTE);
++  bne(AT, R0, notByte);
++  ext_w_b(result, result);
++  beq(R0, R0, done);
++
++  bind(notByte);
++  addi_d(AT, T4, -T_CHAR);
++  bne(AT, R0, notChar);
++  bstrpick_d(result, result, 15, 0);
++  beq(R0, R0, done);
++
++  bind(notChar);
++  ext_w_h(result, result);
++
++  // Nothing to do for T_INT
++  bind(done);
++}
++
++
++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
++  Label update, next, none;
++
++  verify_oop(obj);
++
++  if (mdo_addr.index() != noreg) {
++    guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !");
++    guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !");
++    push(T0);
++    alsl_d(T0, mdo_addr.index(), mdo_addr.base(), mdo_addr.scale() - 1);
++  }
++
++  bnez(obj, update);
++
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::null_seen);
++  if (mdo_addr.index() == noreg) {
++    st_d(AT, mdo_addr);
++  } else {
++    st_d(AT, T0, mdo_addr.disp());
++  }
++
++  b(next);
++
++  bind(update);
++  load_klass(obj, obj);
++
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  bstrpick_d(AT, obj, 63, 2);
++  beqz(AT, next);
++
++  andi(AT, obj, TypeEntries::type_unknown);
++  bnez(AT, next);
++
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  beqz(AT, none);
++
++  addi_d(AT, AT, -(TypeEntries::null_seen));
++  beqz(AT, none);
++
++  // There is a chance that the checks above (re-reading profiling
++  // data from memory) fail if another thread has just set the
++  // profiling to this obj's klass
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  bstrpick_d(AT, obj, 63, 2);
++  beqz(AT, next);
++
++  // different than before. Cannot keep accurate profile.
++  if (mdo_addr.index() == noreg) {
++    ld_d(AT, mdo_addr);
++  } else {
++    ld_d(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::type_unknown);
++  if (mdo_addr.index() == noreg) {
++    st_d(AT, mdo_addr);
++  } else {
++    st_d(AT, T0, mdo_addr.disp());
++  }
++  b(next);
++
++  bind(none);
++  // first time here. Set profile type.
++  if (mdo_addr.index() == noreg) {
++    st_d(obj, mdo_addr);
++  } else {
++    st_d(obj, T0, mdo_addr.disp());
++  }
++
++  bind(next);
++  if (mdo_addr.index() != noreg) {
++    pop(T0);
++  }
++}
++
++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
++  if (!ProfileInterpreter) {
++    return;
++  }
++
++  if (MethodData::profile_arguments() || MethodData::profile_return()) {
++    Label profile_continue;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
++
++    ld_b(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start);
++    li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
++    bne(tmp, AT, profile_continue);
++
++
++    if (MethodData::profile_arguments()) {
++      Label done;
++      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
++      if (Assembler::is_simm(off_to_args, 12)) {
++        addi_d(mdp, mdp, off_to_args);
++      } else {
++        li(AT, off_to_args);
++        add_d(mdp, mdp, AT);
++      }
++
++
++      for (int i = 0; i < TypeProfileArgsLimit; i++) {
++        if (i > 0 || MethodData::profile_return()) {
++          // If return value type is profiled we may have no argument to profile
++          ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++          if (Assembler::is_simm(-1 * i * TypeStackSlotEntries::per_arg_count(), 12)) {
++            addi_w(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count());
++          } else {
++            li(AT, i*TypeStackSlotEntries::per_arg_count());
++            sub_w(tmp, tmp, AT);
++          }
++
++          li(AT, TypeStackSlotEntries::per_arg_count());
++          blt(tmp, AT, done);
++        }
++        ld_d(tmp, callee, in_bytes(Method::const_offset()));
++
++        ld_hu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++        // stack offset o (zero based) from the start of the argument
++        // list, for n arguments translates into offset n - o - 1 from
++        // the end of the argument list
++        ld_d(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args);
++        sub_d(tmp, tmp, AT);
++
++        addi_w(tmp, tmp, -1);
++
++        Address arg_addr = argument_address(tmp);
++        ld_d(tmp, arg_addr);
++
++        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
++        profile_obj_type(tmp, mdo_arg_addr);
++
++        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
++        if (Assembler::is_simm(to_add, 12)) {
++          addi_d(mdp, mdp, to_add);
++        } else {
++          li(AT, to_add);
++          add_d(mdp, mdp, AT);
++        }
++
++        off_to_args += to_add;
++      }
++
++      if (MethodData::profile_return()) {
++        ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++        int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count();
++        if (Assembler::is_simm(-1 * tmp_arg_counts, 12)) {
++          addi_w(tmp, tmp, -1 * tmp_arg_counts);
++        } else {
++          li(AT, tmp_arg_counts);
++          sub_w(mdp, mdp, AT);
++        }
++      }
++
++      bind(done);
++
++      if (MethodData::profile_return()) {
++        // We're right after the type profile for the last
++        // argument. tmp is the number of cells left in the
++        // CallTypeData/VirtualCallTypeData to reach its end. Non null
++        // if there's a return to profile.
++        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
++        slli_w(tmp, tmp, exact_log2(DataLayout::cell_size));
++        add_d(mdp, mdp, tmp);
++      }
++      st_d(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize);
++    } else {
++      assert(MethodData::profile_return(), "either profile call args or call ret");
++      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
++    }
++
++    // mdp points right after the end of the
++    // CallTypeData/VirtualCallTypeData, right after the cells for the
++    // return value type if there's one
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
++  assert_different_registers(mdp, ret, tmp, _bcp_register);
++  if (ProfileInterpreter && MethodData::profile_return()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    if (MethodData::profile_return_jsr292_only()) {
++      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++      // If we don't profile all invoke bytecodes we must make sure
++      // it's a bytecode we indeed profile. We can't go back to the
++      // begining of the ProfileData we intend to update to check its
++      // type because we're right after it and we don't known its
++      // length
++      Label do_profile;
++      ld_b(tmp, _bcp_register, 0);
++      addi_d(AT, tmp, -1 * Bytecodes::_invokedynamic);
++      beqz(AT, do_profile);
++      addi_d(AT, tmp, -1 * Bytecodes::_invokehandle);
++      beqz(AT, do_profile);
++
++      get_method(tmp);
++      ld_hu(tmp, tmp, Method::intrinsic_id_offset_in_bytes());
++      li(AT, vmIntrinsics::_compiledLambdaForm);
++      bne(tmp, AT, profile_continue);
++
++      bind(do_profile);
++    }
++
++    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
++    add_d(tmp, ret, R0);
++    profile_obj_type(tmp, mdo_ret_addr);
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
++  guarantee(T4 == tmp1, "You are reqired to use T4 as the index register for LoongArch !");
++
++  if (ProfileInterpreter && MethodData::profile_parameters()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Load the offset of the area within the MDO used for
++    // parameters. If it's negative we're not profiling any parameters
++    ld_w(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()));
++    blt(tmp1, R0, profile_continue);
++
++    // Compute a pointer to the area for parameters from the offset
++    // and move the pointer to the slot for the last
++    // parameters. Collect profiling from last parameter down.
++    // mdo start + parameters offset + array length - 1
++    add_d(mdp, mdp, tmp1);
++    ld_d(tmp1, mdp, in_bytes(ArrayData::array_len_offset()));
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++
++
++    Label loop;
++    bind(loop);
++
++    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
++    int type_base = in_bytes(ParametersTypeData::type_offset(0));
++    Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size);
++    Address arg_type(mdp, tmp1, per_arg_scale, type_base);
++
++    // load offset on the stack from the slot for this parameter
++    alsl_d(AT, tmp1, mdp, per_arg_scale - 1);
++    ld_d(tmp2, AT, off_base);
++
++    sub_d(tmp2, R0, tmp2);
++
++    // read the parameter from the local area
++    slli_d(AT, tmp2, Interpreter::logStackElementSize);
++    ldx_d(tmp2, AT, _locals_register);
++
++    // profile the parameter
++    profile_obj_type(tmp2, arg_type);
++
++    // go to next parameter
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++    blt(R0, tmp1, loop);
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
++  if (state == atos) {
++    MacroAssembler::verify_oop(reg);
++  }
++}
++
++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
++}
++#endif // !CC_INTERP
++
++
++void InterpreterMacroAssembler::notify_method_entry() {
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    ld_w(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, L);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::post_method_entry));
++    bind(L);
++  }
++
++  {
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++                                  //Rthread,
++                                  thread,
++                                  //Rmethod);
++                                  S3);
++  }
++}
++
++void InterpreterMacroAssembler::notify_method_exit(
++    TosState state, NotifyMethodExitMode mode) {
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
++    Label skip;
++    // Note: frame::interpreter_frame_result has a dependency on how the
++    // method result is saved across the call to post_method_exit. If this
++    // is changed then the interpreter_frame_result implementation will
++    // need to be updated too.
++
++    // template interpreter will leave it on the top of the stack.
++    push(state);
++    ld_w(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, skip);
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
++    bind(skip);
++    pop(state);
++  }
++
++  {
++    // Dtrace notification
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    push(state);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++                 //Rthread, Rmethod);
++                 thread, S3);
++    pop(state);
++  }
++}
++
++// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
++                                                        int increment, int mask,
++                                                        Register scratch, bool preloaded,
++                                                        Condition cond, Label* where) {
++  assert_different_registers(scratch, AT);
++
++  if (!preloaded) {
++    ld_w(scratch, counter_addr);
++  }
++  addi_w(scratch, scratch, increment);
++  st_w(scratch, counter_addr);
++
++  li(AT, mask);
++  andr(scratch, scratch, AT);
++
++  if (cond == Assembler::zero) {
++    beq(scratch, R0, *where);
++  } else {
++    unimplemented();
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,281 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP
++#define CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP
++
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/invocationCounter.hpp"
++#include "runtime/frame.hpp"
++
++// This file specializes the assember with interpreter-specific macros
++
++typedef ByteSize (*OffsetFunction)(uint);
++
++class InterpreterMacroAssembler: public MacroAssembler {
++#ifndef CC_INTERP
++ private:
++
++  Register _locals_register; // register that contains the pointer to the locals
++  Register _bcp_register; // register that contains the bcp
++
++ protected:
++  // Interpreter specific version of call_VM_base
++  virtual void call_VM_leaf_base(address entry_point,
++                                 int number_of_arguments);
++
++  virtual void call_VM_base(Register oop_result,
++                            Register java_thread,
++                            Register last_java_sp,
++                            address  entry_point,
++                            int number_of_arguments,
++                            bool check_exceptions);
++
++  // base routine for all dispatches
++  void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false);
++#endif // CC_INTERP
++
++ public:
++  void jump_to_entry(address entry);
++  // narrow int return value
++  void narrow(Register result);
++
++  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {}
++
++  void  get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++  void  get_4_byte_integer_at_bcp(Register reg, int offset);
++
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  void load_earlyret_value(TosState state);
++
++#ifdef CC_INTERP
++  void save_bcp()                                          { /*  not needed in c++ interpreter and harmless */ }
++  void restore_bcp()                                       { /*  not needed in c++ interpreter and harmless */ }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg);
++
++#else
++
++  // Interpreter-specific registers
++  void save_bcp() {
++    st_d(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize);
++  }
++
++  void restore_bcp() {
++    ld_d(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize);
++  }
++
++  void restore_locals() {
++    ld_d(LVP, FP, frame::interpreter_frame_locals_offset * wordSize);
++  }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg) {
++    ld_d(reg, FP, frame::interpreter_frame_method_offset * wordSize);
++  }
++
++  void get_const(Register reg){
++    get_method(reg);
++    ld_d(reg, reg, in_bytes(Method::const_offset()));
++  }
++
++  void get_constant_pool(Register reg) {
++    get_const(reg);
++    ld_d(reg, reg, in_bytes(ConstMethod::constants_offset()));
++  }
++
++  void get_constant_pool_cache(Register reg) {
++    get_constant_pool(reg);
++    ld_d(reg, reg, ConstantPool::cache_offset_in_bytes());
++  }
++
++  void get_cpool_and_tags(Register cpool, Register tags) {
++    get_constant_pool(cpool);
++    ld_d(tags, cpool, ConstantPool::tags_offset_in_bytes());
++  }
++
++  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
++  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_method_counters(Register method, Register mcs, Label& skip);
++
++  // load cpool->resolved_references(index);
++  void load_resolved_reference_at_index(Register result, Register index, Register tmp);
++
++  // load cpool->resolved_klass_at(index)
++  void load_resolved_klass_at_index(Register cpool,  // the constant pool (corrupted on return)
++                                    Register index,  // the constant pool index (corrupted on return)
++                                    Register klass); // contains the Klass on return
++
++  void pop_ptr(   Register r = FSR);
++  void pop_i(     Register r = FSR);
++  void pop_l(     Register r = FSR);
++  void pop_f(FloatRegister r = FSF);
++  void pop_d(FloatRegister r = FSF);
++
++  void push_ptr(   Register r = FSR);
++  void push_i(     Register r = FSR);
++  void push_l(     Register r = FSR);
++  void push_f(FloatRegister r = FSF);
++  void push_d(FloatRegister r = FSF);
++
++  void pop(Register r ) { ((MacroAssembler*)this)->pop(r); }
++
++  void push(Register r ) { ((MacroAssembler*)this)->push(r); }
++
++  void pop(TosState state); // transition vtos -> state
++  void push(TosState state); // transition state -> vtos
++
++  void empty_expression_stack() {
++    ld_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    // NULL last_sp until next java call
++    st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  }
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void load_ptr(int n, Register val);
++  void store_ptr(int n, Register val);
++
++  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
++  // a subtype of super_klass.
++  //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
++  void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype );
++
++  // Dispatching
++  void dispatch_prolog(TosState state, int step = 0);
++  void dispatch_epilog(TosState state, int step = 0);
++  void dispatch_only(TosState state, bool generate_poll = false);
++  void dispatch_only_normal(TosState state);
++  void dispatch_only_noverify(TosState state);
++  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
++  void dispatch_via (TosState state, address* table);
++
++  // jump to an invoked target
++  void prepare_to_jump_from_interpreted();
++  void jump_from_interpreted(Register method, Register temp);
++
++
++  // Returning from interpreted functions
++  //
++  // Removes the current activation (incl. unlocking of monitors)
++  // and sets up the return address.  This code is also used for
++  // exception unwindwing. In that case, we do not want to throw
++  // IllegalMonitorStateExceptions, since that might get us into an
++  // infinite rethrow exception loop.
++  // Additionally this code is used for popFrame and earlyReturn.
++  // In popFrame case we want to skip throwing an exception,
++  // installing an exception, and notifying jvmdi.
++  // In earlyReturn case we only want to skip throwing an exception
++  // and installing an exception.
++  void remove_activation(TosState state, Register ret_addr,
++                         bool throw_monitor_exception = true,
++                         bool install_monitor_exception = true,
++                         bool notify_jvmdi = true);
++#endif // CC_INTERP
++
++  // Object locking
++  void lock_object  (Register lock_reg);
++  void unlock_object(Register lock_reg);
++
++#ifndef CC_INTERP
++
++  // Interpreter profiling operations
++  void set_method_data_pointer_for_bcp();
++  void test_method_data_pointer(Register mdp, Label& zero_continue);
++  void verify_method_data_pointer();
++
++  void set_mdp_data_at(Register mdp_in, int constant, Register value);
++  void increment_mdp_data_at(Address data, bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, int constant,
++                             bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
++                             bool decrement = false);
++  void increment_mask_and_jump(Address counter_addr,
++                               int increment, int mask,
++                               Register scratch, bool preloaded,
++                               Condition cond, Label* where);
++  void set_mdp_flag_at(Register mdp_in, int flag_constant);
++  void test_mdp_data_at(Register mdp_in, int offset, Register value,
++                        Register test_value_out,
++                        Label& not_equal_continue);
++
++  void record_klass_in_profile(Register receiver, Register mdp,
++                               Register reg2, bool is_virtual_call);
++  void record_klass_in_profile_helper(Register receiver, Register mdp,
++                                      Register reg2, int start_row,
++                                      Label& done, bool is_virtual_call);
++
++  void record_item_in_profile_helper(Register item, Register mdp,
++                                     Register reg2, int start_row, Label& done, int total_rows,
++                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
++                                     int non_profiled_offset);
++  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
++  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
++  void update_mdp_by_constant(Register mdp_in, int constant);
++  void update_mdp_for_ret(Register return_bci);
++
++  void profile_taken_branch(Register mdp, Register bumped_count);
++  void profile_not_taken_branch(Register mdp);
++  void profile_call(Register mdp);
++  void profile_final_call(Register mdp);
++  void profile_virtual_call(Register receiver, Register mdp,
++                            Register scratch2,
++                            bool receiver_can_be_null = false);
++  void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN;
++  void profile_ret(Register return_bci, Register mdp);
++  void profile_null_seen(Register mdp);
++  void profile_typecheck(Register mdp, Register klass, Register scratch);
++  void profile_typecheck_failed(Register mdp);
++  void profile_switch_default(Register mdp);
++  void profile_switch_case(Register index_in_scratch, Register mdp,
++                           Register scratch2);
++
++  // Debugging
++  // only if +VerifyOops && state == atos
++  void verify_oop(Register reg, TosState state = atos);
++  // only if +VerifyFPU  && (state == ftos || state == dtos)
++  void verify_FPU(int stack_depth, TosState state = ftos);
++
++  void profile_obj_type(Register obj, const Address& mdo_addr);
++  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
++  void profile_return_type(Register mdp, Register ret, Register tmp);
++  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2);
++#endif // !CC_INTERP
++
++  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
++
++  // support for jvmti/dtrace
++  void notify_method_entry();
++  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
++};
++
++#endif // CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,273 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/universe.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/signature.hpp"
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// Implementation of SignatureHandlerGenerator
++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
++      const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
++  _masm = new MacroAssembler(buffer);
++  _num_int_args = (method->is_static() ? 1 : 0);
++  _num_fp_args = 0;
++  _stack_offset = 0;
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
++  __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(from_offset));
++  __ st_d(temp(), to(), to_offset * longSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) {
++  __ addi_d(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) );
++  __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(from_offset) );
++
++  __ maskeqz(temp(), temp(), AT);
++  __ st_w(temp(), to(), to_offset * wordSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
++  // generate code to handle arguments
++  iterate(fingerprint);
++  // return result handler
++  __ li(V0, AbstractInterpreter::result_handler(method()->result_type()));
++  // return
++  __ jr(RA);
++
++  __ flush();
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
++  if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_w(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ st_w(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
++  if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_d(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ st_d(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
++  if (_num_int_args < Argument::n_register_parameters - 1) {
++    Register reg = as_Register(++_num_int_args + A0->encoding());
++    if (_num_int_args == 1) {
++      assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
++      __ addi_d(reg, from(), Interpreter::local_offset_in_bytes(offset()));
++    } else {
++      __ ld_d(reg, from(), Interpreter::local_offset_in_bytes(offset()));
++      __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++      __ maskeqz(reg, AT, reg);
++    }
++  } else {
++    __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ maskeqz(temp(), AT, temp());
++    __ st_d(temp(), to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
++  if (_num_fp_args < Argument::n_float_register_parameters) {
++    __ fld_s(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_w(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ st_w(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
++  if (_num_fp_args < Argument::n_float_register_parameters) {
++    __ fld_d(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else if (_num_int_args < Argument::n_register_parameters - 1) {
++    __ ld_d(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ st_d(AT, to(), _stack_offset);
++    _stack_offset += wordSize;
++  }
++}
++
++
++Register InterpreterRuntime::SignatureHandlerGenerator::from()       { return LVP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::to()         { return SP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::temp()       { return T8; }
++
++// Implementation of SignatureHandlerLibrary
++
++void SignatureHandlerLibrary::pd_set_handler(address handler) {}
++
++
++class SlowSignatureHandler
++  : public NativeSignatureIterator {
++ private:
++  address   _from;
++  intptr_t* _to;
++  intptr_t* _int_args;
++  intptr_t* _fp_args;
++  intptr_t* _fp_identifiers;
++  unsigned int _num_int_args;
++  unsigned int _num_fp_args;
++
++  virtual void pass_int()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_long()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2 * Interpreter::stackElementSize;
++
++    if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_object()
++  {
++    intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++      _num_int_args++;
++    } else {
++      *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++    }
++  }
++
++  virtual void pass_float()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_fp_args < Argument::n_float_register_parameters) {
++      *_fp_args++ = from_obj;
++      _num_fp_args++;
++    } else if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_double()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2*Interpreter::stackElementSize;
++
++    if (_num_fp_args < Argument::n_float_register_parameters) {
++      *_fp_args++ = from_obj;
++      *_fp_identifiers |= (1 << _num_fp_args); // mark as double
++      _num_fp_args++;
++    } else if (_num_int_args < Argument::n_register_parameters - 1) {
++      *_int_args++ = from_obj;
++      _num_int_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++ public:
++  SlowSignatureHandler(methodHandle method, address from, intptr_t* to)
++    : NativeSignatureIterator(method)
++  {
++    _from = from;
++    _to   = to;
++
++    // see TemplateInterpreterGenerator::generate_slow_signature_handler()
++    _int_args = to - (method->is_static() ? 15 : 16);
++    _fp_args =  to - 8;
++    _fp_identifiers = to - 9;
++    *(int*) _fp_identifiers = 0;
++    _num_int_args = (method->is_static() ? 1 : 0);
++    _num_fp_args = 0;
++  }
++};
++
++
++IRT_ENTRY(address,
++          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
++                                                     Method* method,
++                                                     intptr_t* from,
++                                                     intptr_t* to))
++  methodHandle m(thread, (Method*)method);
++  assert(m->is_native(), "sanity check");
++
++  // handle arguments
++  SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1));
++
++  // return result handler
++  return Interpreter::result_handler(m->result_type());
++IRT_END
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP
++#define CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP
++
++// This is included in the middle of class Interpreter.
++// Do not include files here.
++
++// native method calls
++
++class SignatureHandlerGenerator: public NativeSignatureIterator {
++ private:
++  MacroAssembler* _masm;
++  unsigned int _num_fp_args;
++  unsigned int _num_int_args;
++  int _stack_offset;
++
++  void move(int from_offset, int to_offset);
++  void box(int from_offset, int to_offset);
++  void pass_int();
++  void pass_long();
++  void pass_object();
++  void pass_float();
++  void pass_double();
++
++ public:
++  // Creation
++  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
++
++  // Code generation
++  void generate(uint64_t fingerprint);
++
++  // Code generation support
++  static Register from();
++  static Register to();
++  static Register temp();
++};
++
++#endif // CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,87 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP
++#define CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP
++
++private:
++
++  // FP value associated with _last_Java_sp:
++  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
++
++public:
++  // Each arch must define reset, save, restore
++  // These are used by objects that only care about:
++  //  1 - initializing a new state (thread creation, javaCalls)
++  //  2 - saving a current state (javaCalls)
++  //  3 - restoring an old state (javaCalls)
++
++  void clear(void) {
++    // clearing _last_Java_sp must be first
++    _last_Java_sp = NULL;
++    // fence?
++    _last_Java_fp = NULL;
++    _last_Java_pc = NULL;
++  }
++
++  void copy(JavaFrameAnchor* src) {
++    // In order to make sure the transition state is valid for "this"
++    // We must clear _last_Java_sp before copying the rest of the new data
++    //
++    // Hack Alert: Temporary bugfix for 4717480/4721647
++    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
++    // unless the value is changing
++    //
++    if (_last_Java_sp != src->_last_Java_sp)
++      _last_Java_sp = NULL;
++
++    _last_Java_fp = src->_last_Java_fp;
++    _last_Java_pc = src->_last_Java_pc;
++    // Must be last so profiler will always see valid frame if has_last_frame() is true
++    _last_Java_sp = src->_last_Java_sp;
++  }
++
++  // Always walkable
++  bool walkable(void) { return true; }
++  // Never any thing to do since we are always walkable and can find address of return addresses
++  void make_walkable(JavaThread* thread) { }
++
++  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
++
++  address last_Java_pc(void)                     { return _last_Java_pc; }
++
++private:
++
++  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
++
++public:
++
++  void set_last_Java_sp(intptr_t* sp)            { _last_Java_sp = sp; }
++
++  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
++  // Assert (last_Java_sp == NULL || fp == NULL)
++  void set_last_Java_fp(intptr_t* fp)                { _last_Java_fp = fp; }
++
++#endif // CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,166 @@
++/*
++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/codeBlob.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/safepoint.hpp"
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#define BUFFER_SIZE 30*wordSize
++
++// Instead of issuing lfence for LoadLoad barrier, we create data dependency
++// between loads, which is more efficient than lfence.
++
++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
++  const char *name = NULL;
++  switch (type) {
++    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
++    case T_BYTE:    name = "jni_fast_GetByteField";    break;
++    case T_CHAR:    name = "jni_fast_GetCharField";    break;
++    case T_SHORT:   name = "jni_fast_GetShortField";   break;
++    case T_INT:     name = "jni_fast_GetIntField";     break;
++    case T_LONG:    name = "jni_fast_GetLongField";    break;
++    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
++    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
++    default:        ShouldNotReachHere();
++  }
++  ResourceMark rm;
++  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
++  CodeBuffer cbuf(blob);
++  MacroAssembler* masm = new MacroAssembler(&cbuf);
++  address fast_entry = __ pc();
++
++  Label slow;
++
++  //  return pc        RA
++  //  jni env          A0
++  //  obj              A1
++  //  jfieldID         A2
++
++  address counter_addr = SafepointSynchronize::safepoint_counter_addr();
++  __ li(AT, (long)counter_addr);
++  __ ld_w(T1, AT, 0);
++
++  // Parameters(A0~A3) should not be modified, since they will be used in slow path
++  __ andi(AT, T1, 1);
++  __ bne(AT, R0, slow);
++
++  __ move(T0, A1);
++  // Both T0 and T4 are clobbered by try_resolve_jobject_in_native.
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->try_resolve_jobject_in_native(masm, /* jni_env */ A0, T0, T4, slow);
++
++  __ srli_d(T2, A2, 2);                 // offset
++  __ add_d(T0, T0, T2);
++
++  __ li(AT, (long)counter_addr);
++  __ ld_w(AT, AT, 0);
++  __ bne(T1, AT, slow);
++
++  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
++  speculative_load_pclist[count] = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ ld_bu (V0, T0, 0); break;
++    case T_BYTE:    __ ld_b  (V0, T0, 0); break;
++    case T_CHAR:    __ ld_hu (V0, T0, 0); break;
++    case T_SHORT:   __ ld_h  (V0, T0, 0); break;
++    case T_INT:     __ ld_w  (V0, T0, 0); break;
++    case T_LONG:    __ ld_d  (V0, T0, 0); break;
++    case T_FLOAT:   __ fld_s (F0, T0, 0); break;
++    case T_DOUBLE:  __ fld_d (F0, T0, 0); break;
++    default:        ShouldNotReachHere();
++  }
++
++  __ jr(RA);
++
++  slowcase_entry_pclist[count++] = __ pc();
++  __ bind (slow);
++  address slow_case_addr = NULL;
++  switch (type) {
++    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
++    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
++    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
++    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
++    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
++    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
++    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
++    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
++    default:        ShouldNotReachHere();
++  }
++  __ jmp(slow_case_addr);
++
++  __ flush ();
++
++  return fast_entry;
++}
++
++address JNI_FastGetField::generate_fast_get_boolean_field() {
++  return generate_fast_get_int_field0(T_BOOLEAN);
++}
++
++address JNI_FastGetField::generate_fast_get_byte_field() {
++  return generate_fast_get_int_field0(T_BYTE);
++}
++
++address JNI_FastGetField::generate_fast_get_char_field() {
++  return generate_fast_get_int_field0(T_CHAR);
++}
++
++address JNI_FastGetField::generate_fast_get_short_field() {
++  return generate_fast_get_int_field0(T_SHORT);
++}
++
++address JNI_FastGetField::generate_fast_get_int_field() {
++  return generate_fast_get_int_field0(T_INT);
++}
++
++address JNI_FastGetField::generate_fast_get_long_field() {
++  return generate_fast_get_int_field0(T_LONG);
++}
++
++address JNI_FastGetField::generate_fast_get_float_field() {
++  return generate_fast_get_int_field0(T_FLOAT);
++}
++
++address JNI_FastGetField::generate_fast_get_double_field() {
++  return generate_fast_get_int_field0(T_DOUBLE);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,144 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP
++#define CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP
++
++#include "jni.h"
++#include "memory/allocation.hpp"
++#include "oops/oop.hpp"
++
++// This file holds platform-dependent routines used to write primitive jni
++// types to the array of arguments passed into JavaCalls::call
++
++class JNITypes : AllStatic {
++  // These functions write a java primitive type (in native format)
++  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
++  // I.e., they are functionally 'push' operations if they have a 'pos'
++  // formal parameter.  Note that jlong's and jdouble's are written
++  // _in reverse_ of the order in which they appear in the interpreter
++  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
++  // reverse the argument list constructed by JavaCallArguments (see
++  // javaCalls.hpp).
++
++private:
++
++  // 32bit Helper routines.
++  static inline void    put_int2r(jint *from, intptr_t *to)           { *(jint *)(to++) = from[1];
++                                                                        *(jint *)(to  ) = from[0]; }
++  static inline void    put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; }
++
++public:
++  // In LOOGNARCH64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[]
++  //   is 8 bytes.
++  // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values.
++  // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded.
++  // This error occurs in ReflectInvoke.java
++  // The parameter of DD(int) should be 4 instead of 0x550000004.
++  //
++  // See: [runtime/javaCalls.hpp]
++
++  static inline void    put_int(jint  from, intptr_t *to)           { *(intptr_t *)(to +   0  ) =  from; }
++  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) =  from; }
++  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; }
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_long(jlong  from, intptr_t *to) {
++    *(jlong*) (to + 1) = from;
++    *(jlong*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = from;
++    *(jlong*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = *from;
++    *(jlong*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // Oops are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
++  static inline void    put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
++  static inline void    put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
++
++  // Floats are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
++  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
++  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
++
++#undef _JNI_SLOT_OFFSET
++#define _JNI_SLOT_OFFSET 0
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_double(jdouble  from, intptr_t *to) {
++    *(jdouble*) (to + 1) = from;
++    *(jdouble*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = from;
++    *(jdouble*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = *from;
++    *(jdouble*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // The get_xxx routines, on the other hand, actually _do_ fetch
++  // java primitive types from the interpreter stack.
++  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
++  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
++  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
++  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
++  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
++#undef _JNI_SLOT_OFFSET
++};
++
++#endif // CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,199 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "jvmci/jvmciCodeInstaller.hpp"
++#include "jvmci/jvmciRuntime.hpp"
++#include "jvmci/jvmciCompilerToVM.hpp"
++#include "jvmci/jvmciJavaClasses.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
++  if (inst->is_int_branch() || inst->is_float_branch()) {
++    return pc_offset + NativeInstruction::nop_instruction_size;
++  } else if (inst->is_call()) {
++    return pc_offset + NativeCall::instruction_size;
++  } else if (inst->is_far_call()) {
++    return pc_offset + NativeFarCall::instruction_size;
++  } else if (inst->is_jump()) {
++    return pc_offset + NativeGeneralJump::instruction_size;
++  } else if (inst->is_lu12iw_lu32id()) {
++    // match LoongArch64TestAssembler.java emitCall
++    // lu12i_w; lu32i_d; jirl
++    return pc_offset + 3 * NativeInstruction::nop_instruction_size;
++  } else {
++    JVMCI_ERROR_0("unsupported type of instruction for call site");
++  }
++  return 0;
++}
++
++void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
++  address pc = _instructions->start() + pc_offset;
++  Handle obj(THREAD, HotSpotObjectConstantImpl::object(constant));
++  jobject value = JNIHandles::make_local(obj());
++  if (HotSpotObjectConstantImpl::compressed(constant)) {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    move->set_data((intptr_t)(CompressedOops::encode(cast_to_oop(cast_from_oop<address>(obj())))));
++    int oop_index = _oop_recorder->find_index(value);
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    _instructions->relocate(pc, rspec, Assembler::narrow_oop_operand);
++  } else {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    move->set_data((intptr_t)(cast_from_oop<address>(obj())));
++    int oop_index = _oop_recorder->find_index(value);
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    _instructions->relocate(pc, rspec);
++  }
++}
++
++void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
++  address pc = _instructions->start() + pc_offset;
++  if (HotSpotMetaspaceConstantImpl::compressed(constant)) {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    narrowKlass narrowOop = record_narrow_metadata_reference(_instructions, pc, constant, CHECK);
++    move->set_data((intptr_t) narrowOop);
++    TRACE_jvmci_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/0x%x", p2i(pc), narrowOop);
++  } else {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    void* reference = record_metadata_reference(_instructions, pc, constant, CHECK);
++    move->set_data((intptr_t) reference);
++    TRACE_jvmci_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(reference));
++  }
++}
++
++void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) {
++  address pc = _instructions->start() + pc_offset;
++  NativeInstruction* inst = nativeInstruction_at(pc);
++  if (inst->is_pcaddu12i_add()) {
++    address dest = _constants->start() + data_offset;
++    _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS));
++    TRACE_jvmci_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset);
++  } else {
++    JVMCI_ERROR("unknown load or move instruction at " PTR_FORMAT, p2i(pc));
++  }
++}
++
++void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
++  address pc = (address) inst;
++  if (inst->is_call()) {
++    NativeCall* call = nativeCall_at(pc);
++    call->set_destination((address) foreign_call_destination);
++    _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec());
++  } else if (inst->is_far_call()) {
++    NativeFarCall* call = nativeFarCall_at(pc);
++    call->set_destination((address) foreign_call_destination);
++    _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec());
++  } else if (inst->is_jump()) {
++    NativeGeneralJump* jump = nativeGeneralJump_at(pc);
++    jump->set_jump_destination((address) foreign_call_destination);
++    _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec());
++  } else if (inst->is_lu12iw_lu32id()) {
++    // match emitCall of LoongArch64TestAssembler.java
++    // lu12i_w; lu32i_d; jirl
++    MacroAssembler::pd_patch_instruction((address)inst, (address)foreign_call_destination);
++  } else {
++    JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc));
++  }
++  TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst));
++}
++
++void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &cbuf, Handle hotspot_method, jint pc_offset, TRAPS) {
++#ifdef ASSERT
++  Method* method = NULL;
++  // we need to check, this might also be an unresolved method
++  if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) {
++    method = getMethodFromHotSpotMethod(hotspot_method());
++  }
++#endif
++  switch (_next_call_type) {
++    case INLINE_INVOKE:
++      break;
++    case INVOKEVIRTUAL:
++    case INVOKEINTERFACE: {
++      assert(!method->is_static(), "cannot call static method with invokeinterface");
++      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
++      _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc));
++      call->trampoline_jump(cbuf, SharedRuntime::get_resolve_virtual_call_stub());
++      break;
++    }
++    case INVOKESTATIC: {
++      assert(method->is_static(), "cannot call non-static method with invokestatic");
++      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
++      _instructions->relocate(call->instruction_address(), relocInfo::static_call_type);
++      call->trampoline_jump(cbuf, SharedRuntime::get_resolve_static_call_stub());
++      break;
++    }
++    case INVOKESPECIAL: {
++      assert(!method->is_static(), "cannot call static method with invokespecial");
++      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
++      _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type);
++      call->trampoline_jump(cbuf, SharedRuntime::get_resolve_opt_virtual_call_stub());
++      break;
++    }
++    default:
++      JVMCI_ERROR("invalid _next_call_type value");
++      break;
++  }
++}
++
++void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
++  switch (mark) {
++    case POLL_NEAR:
++      JVMCI_ERROR("unimplemented");
++      break;
++    case POLL_FAR:
++      _instructions->relocate(pc, relocInfo::poll_type);
++      break;
++    case POLL_RETURN_NEAR:
++      JVMCI_ERROR("unimplemented");
++      break;
++    case POLL_RETURN_FAR:
++      _instructions->relocate(pc, relocInfo::poll_return_type);
++      break;
++    default:
++      JVMCI_ERROR("invalid mark value");
++      break;
++  }
++}
++
++// convert JVMCI register indices (as used in oop maps) to HotSpot registers
++VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
++  if (jvmci_reg < RegisterImpl::number_of_registers) {
++    return as_Register(jvmci_reg)->as_VMReg();
++  } else {
++    jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers;
++    if (floatRegisterNumber >= 0 && floatRegisterNumber < FloatRegisterImpl::number_of_registers) {
++      return as_FloatRegister(floatRegisterNumber)->as_VMReg();
++    }
++    JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg);
++  }
++}
++
++bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
++  return !hotspotRegister->is_FloatRegister();
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad
+--- a/src/hotspot/cpu/loongarch/loongarch_64.ad	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/loongarch_64.ad	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,13917 @@
++//
++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// GodSon3 Architecture Description File
++
++//----------REGISTER DEFINITION BLOCK------------------------------------------
++// This information is used by the matcher and the register allocator to
++// describe individual registers and classes of registers within the target
++// archtecture.
++
++// format:
++// reg_def name (call convention, c-call convention, ideal type, encoding);
++//     call convention :
++//      NS  = No-Save
++//      SOC = Save-On-Call
++//      SOE = Save-On-Entry
++//      AS  = Always-Save
++//    ideal type :
++//      see opto/opcodes.hpp for more info
++// reg_class name (reg, ...);
++// alloc_class name (reg, ...);
++register %{
++
++// General Registers
++// Integer Registers
++  reg_def R0    ( NS,  NS,  Op_RegI,   0, R0->as_VMReg());
++  reg_def R0_H  ( NS,  NS,  Op_RegI,   0, R0->as_VMReg()->next());
++  reg_def RA    ( NS,  NS,  Op_RegI,   1, RA->as_VMReg());
++  reg_def RA_H  ( NS,  NS,  Op_RegI,   1, RA->as_VMReg()->next());
++  reg_def TP    ( NS,  NS,  Op_RegI,   2, TP->as_VMReg());
++  reg_def TP_H  ( NS,  NS,  Op_RegI,   2, TP->as_VMReg()->next());
++  reg_def SP    ( NS,  NS,  Op_RegI,   3, SP->as_VMReg());
++  reg_def SP_H  ( NS,  NS,  Op_RegI,   3, SP->as_VMReg()->next());
++  reg_def A0    (SOC, SOC,  Op_RegI,   4, A0->as_VMReg());
++  reg_def A0_H  (SOC, SOC,  Op_RegI,   4, A0->as_VMReg()->next());
++  reg_def A1    (SOC, SOC,  Op_RegI,   5, A1->as_VMReg());
++  reg_def A1_H  (SOC, SOC,  Op_RegI,   5, A1->as_VMReg()->next());
++  reg_def A2    (SOC, SOC,  Op_RegI,   6, A2->as_VMReg());
++  reg_def A2_H  (SOC, SOC,  Op_RegI,   6, A2->as_VMReg()->next());
++  reg_def A3    (SOC, SOC,  Op_RegI,   7, A3->as_VMReg());
++  reg_def A3_H  (SOC, SOC,  Op_RegI,   7, A3->as_VMReg()->next());
++  reg_def A4    (SOC, SOC,  Op_RegI,   8, A4->as_VMReg());
++  reg_def A4_H  (SOC, SOC,  Op_RegI,   8, A4->as_VMReg()->next());
++  reg_def A5    (SOC, SOC,  Op_RegI,   9, A5->as_VMReg());
++  reg_def A5_H  (SOC, SOC,  Op_RegI,   9, A5->as_VMReg()->next());
++  reg_def A6    (SOC, SOC,  Op_RegI,  10, A6->as_VMReg());
++  reg_def A6_H  (SOC, SOC,  Op_RegI,  10, A6->as_VMReg()->next());
++  reg_def A7    (SOC, SOC,  Op_RegI,  11, A7->as_VMReg());
++  reg_def A7_H  (SOC, SOC,  Op_RegI,  11, A7->as_VMReg()->next());
++  reg_def T0    (SOC, SOC,  Op_RegI,  12, T0->as_VMReg());
++  reg_def T0_H  (SOC, SOC,  Op_RegI,  12, T0->as_VMReg()->next());
++  reg_def T1    (SOC, SOC,  Op_RegI,  13, T1->as_VMReg());
++  reg_def T1_H  (SOC, SOC,  Op_RegI,  13, T1->as_VMReg()->next());
++  reg_def T2    (SOC, SOC,  Op_RegI,  14, T2->as_VMReg());
++  reg_def T2_H  (SOC, SOC,  Op_RegI,  14, T2->as_VMReg()->next());
++  reg_def T3    (SOC, SOC,  Op_RegI,  15, T3->as_VMReg());
++  reg_def T3_H  (SOC, SOC,  Op_RegI,  15, T3->as_VMReg()->next());
++  reg_def T4    (SOC, SOC,  Op_RegI,  16, T4->as_VMReg());
++  reg_def T4_H  (SOC, SOC,  Op_RegI,  16, T4->as_VMReg()->next());
++  reg_def T5    (SOC, SOC,  Op_RegI,  17, T5->as_VMReg());
++  reg_def T5_H  (SOC, SOC,  Op_RegI,  17, T5->as_VMReg()->next());
++  reg_def T6    (SOC, SOC,  Op_RegI,  18, T6->as_VMReg());
++  reg_def T6_H  (SOC, SOC,  Op_RegI,  18, T6->as_VMReg()->next());
++  reg_def T7    (SOC, SOC,  Op_RegI,  19, T7->as_VMReg());
++  reg_def T7_H  (SOC, SOC,  Op_RegI,  19, T7->as_VMReg()->next());
++  reg_def T8    (SOC, SOC,  Op_RegI,  20, T8->as_VMReg());
++  reg_def T8_H  (SOC, SOC,  Op_RegI,  20, T8->as_VMReg()->next());
++  reg_def RX    ( NS,  NS,  Op_RegI,  21, RX->as_VMReg());
++  reg_def RX_H  ( NS,  NS,  Op_RegI,  21, RX->as_VMReg()->next());
++  reg_def FP    ( NS,  NS,  Op_RegI,  22, FP->as_VMReg());
++  reg_def FP_H  ( NS,  NS,  Op_RegI,  22, FP->as_VMReg()->next());
++  reg_def S0    (SOC, SOE,  Op_RegI,  23, S0->as_VMReg());
++  reg_def S0_H  (SOC, SOE,  Op_RegI,  23, S0->as_VMReg()->next());
++  reg_def S1    (SOC, SOE,  Op_RegI,  24, S1->as_VMReg());
++  reg_def S1_H  (SOC, SOE,  Op_RegI,  24, S1->as_VMReg()->next());
++  reg_def S2    (SOC, SOE,  Op_RegI,  25, S2->as_VMReg());
++  reg_def S2_H  (SOC, SOE,  Op_RegI,  25, S2->as_VMReg()->next());
++  reg_def S3    (SOC, SOE,  Op_RegI,  26, S3->as_VMReg());
++  reg_def S3_H  (SOC, SOE,  Op_RegI,  26, S3->as_VMReg()->next());
++  reg_def S4    (SOC, SOE,  Op_RegI,  27, S4->as_VMReg());
++  reg_def S4_H  (SOC, SOE,  Op_RegI,  27, S4->as_VMReg()->next());
++  reg_def S5    (SOC, SOE,  Op_RegI,  28, S5->as_VMReg());
++  reg_def S5_H  (SOC, SOE,  Op_RegI,  28, S5->as_VMReg()->next());
++  reg_def S6    (SOC, SOE,  Op_RegI,  29, S6->as_VMReg());
++  reg_def S6_H  (SOC, SOE,  Op_RegI,  29, S6->as_VMReg()->next());
++  reg_def S7    (SOC, SOE,  Op_RegI,  30, S7->as_VMReg());
++  reg_def S7_H  (SOC, SOE,  Op_RegI,  30, S7->as_VMReg()->next());
++  reg_def S8    (SOC, SOE,  Op_RegI,  31, S8->as_VMReg());
++  reg_def S8_H  (SOC, SOE,  Op_RegI,  31, S8->as_VMReg()->next());
++
++
++// Floating/Vector registers.
++  reg_def F0    ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()          );
++  reg_def F0_H  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next()  );
++  reg_def F0_J  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(2) );
++  reg_def F0_K  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(3) );
++  reg_def F0_L  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(4) );
++  reg_def F0_M  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(5) );
++  reg_def F0_N  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(6) );
++  reg_def F0_O  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(7) );
++
++  reg_def F1    ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()          );
++  reg_def F1_H  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next()  );
++  reg_def F1_J  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(2) );
++  reg_def F1_K  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(3) );
++  reg_def F1_L  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(4) );
++  reg_def F1_M  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(5) );
++  reg_def F1_N  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(6) );
++  reg_def F1_O  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(7) );
++
++  reg_def F2    ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()          );
++  reg_def F2_H  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next()  );
++  reg_def F2_J  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(2) );
++  reg_def F2_K  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(3) );
++  reg_def F2_L  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(4) );
++  reg_def F2_M  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(5) );
++  reg_def F2_N  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(6) );
++  reg_def F2_O  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(7) );
++
++  reg_def F3    ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()          );
++  reg_def F3_H  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next()  );
++  reg_def F3_J  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(2) );
++  reg_def F3_K  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(3) );
++  reg_def F3_L  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(4) );
++  reg_def F3_M  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(5) );
++  reg_def F3_N  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(6) );
++  reg_def F3_O  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(7) );
++
++  reg_def F4    ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()          );
++  reg_def F4_H  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next()  );
++  reg_def F4_J  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(2) );
++  reg_def F4_K  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(3) );
++  reg_def F4_L  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(4) );
++  reg_def F4_M  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(5) );
++  reg_def F4_N  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(6) );
++  reg_def F4_O  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(7) );
++
++  reg_def F5    ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()          );
++  reg_def F5_H  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next()  );
++  reg_def F5_J  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(2) );
++  reg_def F5_K  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(3) );
++  reg_def F5_L  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(4) );
++  reg_def F5_M  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(5) );
++  reg_def F5_N  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(6) );
++  reg_def F5_O  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(7) );
++
++  reg_def F6    ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()          );
++  reg_def F6_H  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next()  );
++  reg_def F6_J  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(2) );
++  reg_def F6_K  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(3) );
++  reg_def F6_L  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(4) );
++  reg_def F6_M  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(5) );
++  reg_def F6_N  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(6) );
++  reg_def F6_O  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(7) );
++
++  reg_def F7    ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()          );
++  reg_def F7_H  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next()  );
++  reg_def F7_J  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(2) );
++  reg_def F7_K  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(3) );
++  reg_def F7_L  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(4) );
++  reg_def F7_M  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(5) );
++  reg_def F7_N  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(6) );
++  reg_def F7_O  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(7) );
++
++  reg_def F8    ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()          );
++  reg_def F8_H  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next()  );
++  reg_def F8_J  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(2) );
++  reg_def F8_K  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(3) );
++  reg_def F8_L  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(4) );
++  reg_def F8_M  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(5) );
++  reg_def F8_N  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(6) );
++  reg_def F8_O  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(7) );
++
++  reg_def F9    ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()          );
++  reg_def F9_H  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next()  );
++  reg_def F9_J  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(2) );
++  reg_def F9_K  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(3) );
++  reg_def F9_L  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(4) );
++  reg_def F9_M  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(5) );
++  reg_def F9_N  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(6) );
++  reg_def F9_O  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(7) );
++
++  reg_def F10   ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()          );
++  reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next()  );
++  reg_def F10_J ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(2) );
++  reg_def F10_K ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(3) );
++  reg_def F10_L ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(4) );
++  reg_def F10_M ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(5) );
++  reg_def F10_N ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(6) );
++  reg_def F10_O ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(7) );
++
++  reg_def F11   ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()          );
++  reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next()  );
++  reg_def F11_J ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(2) );
++  reg_def F11_K ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(3) );
++  reg_def F11_L ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(4) );
++  reg_def F11_M ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(5) );
++  reg_def F11_N ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(6) );
++  reg_def F11_O ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(7) );
++
++  reg_def F12   ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()          );
++  reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next()  );
++  reg_def F12_J ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(2) );
++  reg_def F12_K ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(3) );
++  reg_def F12_L ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(4) );
++  reg_def F12_M ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(5) );
++  reg_def F12_N ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(6) );
++  reg_def F12_O ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(7) );
++
++  reg_def F13   ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()          );
++  reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next()  );
++  reg_def F13_J ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(2) );
++  reg_def F13_K ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(3) );
++  reg_def F13_L ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(4) );
++  reg_def F13_M ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(5) );
++  reg_def F13_N ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(6) );
++  reg_def F13_O ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(7) );
++
++  reg_def F14   ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()          );
++  reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next()  );
++  reg_def F14_J ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(2) );
++  reg_def F14_K ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(3) );
++  reg_def F14_L ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(4) );
++  reg_def F14_M ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(5) );
++  reg_def F14_N ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(6) );
++  reg_def F14_O ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(7) );
++
++  reg_def F15   ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()          );
++  reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next()  );
++  reg_def F15_J ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(2) );
++  reg_def F15_K ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(3) );
++  reg_def F15_L ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(4) );
++  reg_def F15_M ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(5) );
++  reg_def F15_N ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(6) );
++  reg_def F15_O ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(7) );
++
++  reg_def F16   ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()          );
++  reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next()  );
++  reg_def F16_J ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(2) );
++  reg_def F16_K ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(3) );
++  reg_def F16_L ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(4) );
++  reg_def F16_M ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(5) );
++  reg_def F16_N ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(6) );
++  reg_def F16_O ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(7) );
++
++  reg_def F17   ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()          );
++  reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next()  );
++  reg_def F17_J ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(2) );
++  reg_def F17_K ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(3) );
++  reg_def F17_L ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(4) );
++  reg_def F17_M ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(5) );
++  reg_def F17_N ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(6) );
++  reg_def F17_O ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(7) );
++
++  reg_def F18   ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()          );
++  reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next()  );
++  reg_def F18_J ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(2) );
++  reg_def F18_K ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(3) );
++  reg_def F18_L ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(4) );
++  reg_def F18_M ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(5) );
++  reg_def F18_N ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(6) );
++  reg_def F18_O ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(7) );
++
++  reg_def F19   ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()          );
++  reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next()  );
++  reg_def F19_J ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(2) );
++  reg_def F19_K ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(3) );
++  reg_def F19_L ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(4) );
++  reg_def F19_M ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(5) );
++  reg_def F19_N ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(6) );
++  reg_def F19_O ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(7) );
++
++  reg_def F20   ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()          );
++  reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next()  );
++  reg_def F20_J ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(2) );
++  reg_def F20_K ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(3) );
++  reg_def F20_L ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(4) );
++  reg_def F20_M ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(5) );
++  reg_def F20_N ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(6) );
++  reg_def F20_O ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(7) );
++
++  reg_def F21   ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()          );
++  reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next()  );
++  reg_def F21_J ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(2) );
++  reg_def F21_K ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(3) );
++  reg_def F21_L ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(4) );
++  reg_def F21_M ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(5) );
++  reg_def F21_N ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(6) );
++  reg_def F21_O ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(7) );
++
++  reg_def F22   ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()          );
++  reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next()  );
++  reg_def F22_J ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(2) );
++  reg_def F22_K ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(3) );
++  reg_def F22_L ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(4) );
++  reg_def F22_M ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(5) );
++  reg_def F22_N ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(6) );
++  reg_def F22_O ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(7) );
++
++  reg_def F23   ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()          );
++  reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next()  );
++  reg_def F23_J ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(2) );
++  reg_def F23_K ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(3) );
++  reg_def F23_L ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(4) );
++  reg_def F23_M ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(5) );
++  reg_def F23_N ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(6) );
++  reg_def F23_O ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(7) );
++
++  reg_def F24   ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()          );
++  reg_def F24_H ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()->next()  );
++  reg_def F24_J ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(2) );
++  reg_def F24_K ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(3) );
++  reg_def F24_L ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(4) );
++  reg_def F24_M ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(5) );
++  reg_def F24_N ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(6) );
++  reg_def F24_O ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(7) );
++
++  reg_def F25   ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()          );
++  reg_def F25_H ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()->next()  );
++  reg_def F25_J ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(2) );
++  reg_def F25_K ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(3) );
++  reg_def F25_L ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(4) );
++  reg_def F25_M ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(5) );
++  reg_def F25_N ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(6) );
++  reg_def F25_O ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(7) );
++
++  reg_def F26   ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()          );
++  reg_def F26_H ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()->next()  );
++  reg_def F26_J ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(2) );
++  reg_def F26_K ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(3) );
++  reg_def F26_L ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(4) );
++  reg_def F26_M ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(5) );
++  reg_def F26_N ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(6) );
++  reg_def F26_O ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(7) );
++
++  reg_def F27   ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()          );
++  reg_def F27_H ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()->next()  );
++  reg_def F27_J ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(2) );
++  reg_def F27_K ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(3) );
++  reg_def F27_L ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(4) );
++  reg_def F27_M ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(5) );
++  reg_def F27_N ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(6) );
++  reg_def F27_O ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(7) );
++
++  reg_def F28   ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()          );
++  reg_def F28_H ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()->next()  );
++  reg_def F28_J ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(2) );
++  reg_def F28_K ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(3) );
++  reg_def F28_L ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(4) );
++  reg_def F28_M ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(5) );
++  reg_def F28_N ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(6) );
++  reg_def F28_O ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(7) );
++
++  reg_def F29   ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()          );
++  reg_def F29_H ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()->next()  );
++  reg_def F29_J ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(2) );
++  reg_def F29_K ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(3) );
++  reg_def F29_L ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(4) );
++  reg_def F29_M ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(5) );
++  reg_def F29_N ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(6) );
++  reg_def F29_O ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(7) );
++
++  reg_def F30   ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()          );
++  reg_def F30_H ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()->next()  );
++  reg_def F30_J ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(2) );
++  reg_def F30_K ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(3) );
++  reg_def F30_L ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(4) );
++  reg_def F30_M ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(5) );
++  reg_def F30_N ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(6) );
++  reg_def F30_O ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(7) );
++
++  reg_def F31   ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()          );
++  reg_def F31_H ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()->next()  );
++  reg_def F31_J ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(2) );
++  reg_def F31_K ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(3) );
++  reg_def F31_L ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(4) );
++  reg_def F31_M ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(5) );
++  reg_def F31_N ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(6) );
++  reg_def F31_O ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(7) );
++
++
++// ----------------------------
++// Special Registers
++//S6 is used for get_thread(S6)
++//S5 is uesd for heapbase of compressed oop
++alloc_class chunk0(
++                     S7, S7_H,
++                     S0, S0_H,
++                     S1, S1_H,
++                     S2, S2_H,
++                     S4, S4_H,
++                     S5, S5_H,
++                     S6, S6_H,
++                     S3, S3_H,
++                     T2, T2_H,
++                     T3, T3_H,
++                     T8, T8_H,
++                     T4, T4_H,
++                     T1, T1_H, // inline_cache_reg
++                     T6, T6_H,
++                     A7, A7_H,
++                     A6, A6_H,
++                     A5, A5_H,
++                     A4, A4_H,
++                     T5, T5_H,
++                     A3, A3_H,
++                     A2, A2_H,
++                     A1, A1_H,
++                     A0, A0_H,
++                     T0, T0_H,
++                     S8, S8_H
++                     RA, RA_H,
++                     SP, SP_H, // stack_pointer
++                     FP, FP_H, // frame_pointer
++
++                     // non-allocatable registers
++                     T7, T7_H,
++                     TP, TP_H,
++                     RX, RX_H,
++                     R0, R0_H,
++                 );
++
++// F23 is scratch reg
++alloc_class chunk1(  F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O,
++                     F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O,
++                     F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O,
++                     F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O,
++                     F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O,
++                     F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O,
++                     F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O,
++                     F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O,
++                     F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O,
++                     F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O,
++                     F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O,
++                     F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O,
++                     F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O,
++                     F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O,
++                     F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O,
++                     F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O,
++                     F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O,
++                     F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O,
++                     F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O,
++                     F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O,
++                     F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O,
++                     F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O,
++                     F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O,
++                     F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O,
++                     F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O,
++                     F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O,
++                     F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O,
++                     F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O,
++                     F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O,
++                     F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O,
++                     F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O,
++
++                     // non-allocatable registers
++                     F23, F23_H, F23_J, F23_K, F23_L, F23_M, F23_N, F23_O,
++                  );
++
++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 );
++reg_class s0_reg( S0 );
++reg_class s1_reg( S1 );
++reg_class s2_reg( S2 );
++reg_class s3_reg( S3 );
++reg_class s4_reg( S4 );
++reg_class s5_reg( S5 );
++reg_class s6_reg( S6 );
++reg_class s7_reg( S7 );
++
++reg_class t_reg( T0, T1, T2, T3, T8, T4 );
++reg_class t0_reg( T0 );
++reg_class t1_reg( T1 );
++reg_class t2_reg( T2 );
++reg_class t3_reg( T3 );
++reg_class t8_reg( T8 );
++reg_class t4_reg( T4 );
++
++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 );
++reg_class a0_reg( A0 );
++reg_class a1_reg( A1 );
++reg_class a2_reg( A2 );
++reg_class a3_reg( A3 );
++reg_class a4_reg( A4 );
++reg_class a5_reg( A5 );
++reg_class a6_reg( A6 );
++reg_class a7_reg( A7 );
++
++// TODO: LA
++//reg_class v0_reg( A0 );
++//reg_class v1_reg( A1 );
++
++reg_class sp_reg( SP, SP_H );
++reg_class fp_reg( FP, FP_H );
++
++reg_class v0_long_reg( A0, A0_H );
++reg_class v1_long_reg( A1, A1_H );
++reg_class a0_long_reg( A0, A0_H );
++reg_class a1_long_reg( A1, A1_H );
++reg_class a2_long_reg( A2, A2_H );
++reg_class a3_long_reg( A3, A3_H );
++reg_class a4_long_reg( A4, A4_H );
++reg_class a5_long_reg( A5, A5_H );
++reg_class a6_long_reg( A6, A6_H );
++reg_class a7_long_reg( A7, A7_H );
++reg_class t0_long_reg( T0, T0_H );
++reg_class t1_long_reg( T1, T1_H );
++reg_class t2_long_reg( T2, T2_H );
++reg_class t3_long_reg( T3, T3_H );
++reg_class t8_long_reg( T8, T8_H );
++reg_class t4_long_reg( T4, T4_H );
++reg_class s0_long_reg( S0, S0_H );
++reg_class s1_long_reg( S1, S1_H );
++reg_class s2_long_reg( S2, S2_H );
++reg_class s3_long_reg( S3, S3_H );
++reg_class s4_long_reg( S4, S4_H );
++reg_class s5_long_reg( S5, S5_H );
++reg_class s6_long_reg( S6, S6_H );
++reg_class s7_long_reg( S7, S7_H );
++
++//reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, A7, A6, A5, A4, T5, A3, A2, A1, A0, T0 );
++
++reg_class all_reg32(
++                     S8,
++                     S7,
++                     S5, /* S5_heapbase */
++                  /* S6, S6 TREG     */
++                     S4,
++                     S3,
++                     S2,
++                     S1,
++                     S0,
++                     T8,
++                  /* T7,  AT */
++                     T6,
++                     T5,
++                  /* T4,  jarl T4 */
++                     T3,
++                     T2,
++                     T1,
++                     T0,
++                     A7,
++                     A6,
++                     A5,
++                     A4,
++                     A3,
++                     A2,
++                     A1,
++                     A0 );
++
++reg_class int_reg %{
++  return _ANY_REG32_mask;
++%}
++
++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, T5, T0 );
++
++reg_class p_reg %{
++  return _PTR_REG_mask;
++%}
++
++reg_class no_T8_p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A7, A7_H,
++                 A6, A6_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H
++               );
++
++reg_class no_Ax_p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 T0, T0_H
++               );
++
++reg_class all_reg(
++                    S8, S8_H,
++                    S7, S7_H,
++                 /* S6, S6_H,  S6 TREG     */
++                    S5, S5_H, /* S5_heapbase */
++                    S4, S4_H,
++                    S3, S3_H,
++                    S2, S2_H,
++                    S1, S1_H,
++                    S0, S0_H,
++                    T8, T8_H,
++                 /* T7, T7_H,  AT */
++                    T6, T6_H,
++                    T5, T5_H,
++                 /* T4, T4_H,  jalr  T4    */
++                    T3, T3_H,
++                    T2, T2_H,
++                    T1, T1_H,
++                    T0, T0_H,
++                    A7, A7_H,
++                    A6, A6_H,
++                    A5, A5_H,
++                    A4, A4_H,
++                    A3, A3_H,
++                    A2, A2_H,
++                    A1, A1_H,
++                    A0, A0_H
++                  );
++
++
++reg_class long_reg %{
++  return _ANY_REG_mask;
++%}
++
++// Floating point registers.
++// F31 are not used as temporary registers in D2I
++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F24, F25, F26, F27, F28, F29, F30, F31);
++
++reg_class dbl_reg( F0, F0_H,
++                   F1, F1_H,
++                   F2, F2_H,
++                   F3, F3_H,
++                   F4, F4_H,
++                   F5, F5_H,
++                   F6, F6_H,
++                   F7, F7_H,
++                   F8, F8_H,
++                   F9, F9_H,
++                   F10, F10_H,
++                   F11, F11_H,
++                   F12, F12_H,
++                   F13, F13_H,
++                   F14, F14_H,
++                   F15, F15_H,
++                   F16, F16_H,
++                   F17, F17_H,
++                   F18, F18_H,
++                   F19, F19_H,
++                   F20, F20_H,
++                   F21, F21_H,
++                   F22, F22_H,
++                   F24, F24_H,
++                   F25, F25_H,
++                   F26, F26_H,
++                   F27, F27_H,
++                   F28, F28_H,
++                   F29, F29_H,
++                   F30, F30_H,
++                   F31, F31_H);
++
++// Class for all 128bit vector registers
++reg_class vectorx_reg(  F0, F0_H, F0_J, F0_K,
++                        F1, F1_H, F1_J, F1_K,
++                        F2, F2_H, F2_J, F2_K,
++                        F3, F3_H, F3_J, F3_K,
++                        F4, F4_H, F4_J, F4_K,
++                        F5, F5_H, F5_J, F5_K,
++                        F6, F6_H, F6_J, F6_K,
++                        F7, F7_H, F7_J, F7_K,
++                        F8, F8_H, F8_J, F8_K,
++                        F9, F9_H, F9_J, F9_K,
++                        F10, F10_H, F10_J, F10_K,
++                        F11, F11_H, F11_J, F11_K,
++                        F12, F12_H, F12_J, F12_K,
++                        F13, F13_H, F13_J, F13_K,
++                        F14, F14_H, F14_J, F14_K,
++                        F15, F15_H, F15_J, F15_K,
++                        F16, F16_H, F16_J, F16_K,
++                        F17, F17_H, F17_J, F17_K,
++                        F18, F18_H, F18_J, F18_K,
++                        F19, F19_H, F19_J, F19_K,
++                        F20, F20_H, F20_J, F20_K,
++                        F21, F21_H, F21_J, F21_K,
++                        F22, F22_H, F22_J, F22_K,
++                        F24, F24_H, F24_J, F24_K,
++                        F25, F25_H, F25_J, F25_K,
++                        F26, F26_H, F26_J, F26_K,
++                        F27, F27_H, F27_J, F27_K,
++                        F28, F28_H, F28_J, F28_K,
++                        F29, F29_H, F29_J, F29_K,
++                        F30, F30_H, F30_J, F30_K,
++                        F31, F31_H, F31_J, F31_K);
++
++// Class for all 256bit vector registers
++reg_class vectory_reg(  F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O,
++                        F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O,
++                        F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O,
++                        F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O,
++                        F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O,
++                        F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O,
++                        F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O,
++                        F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O,
++                        F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O,
++                        F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O,
++                        F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O,
++                        F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O,
++                        F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O,
++                        F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O,
++                        F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O,
++                        F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O,
++                        F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O,
++                        F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O,
++                        F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O,
++                        F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O,
++                        F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O,
++                        F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O,
++                        F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O,
++                        F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O,
++                        F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O,
++                        F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O,
++                        F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O,
++                        F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O,
++                        F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O,
++                        F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O,
++                        F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O);
++
++// TODO: LA
++//reg_class flt_arg0( F0 );
++//reg_class dbl_arg0( F0, F0_H );
++//reg_class dbl_arg1( F1, F1_H );
++
++%}
++
++//----------DEFINITION BLOCK---------------------------------------------------
++// Define name --> value mappings to inform the ADLC of an integer valued name
++// Current support includes integer values in the range [0, 0x7FFFFFFF]
++// Format:
++//        int_def  <name>         ( <int_value>, <expression>);
++// Generated Code in ad_<arch>.hpp
++//        #define  <name>   (<expression>)
++//        // value == <int_value>
++// Generated code in ad_<arch>.cpp adlc_verification()
++//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
++//
++definitions %{
++  int_def DEFAULT_COST      (    100,     100);
++  int_def HUGE_COST         (1000000, 1000000);
++
++  // Memory refs are twice as expensive as run-of-the-mill.
++  int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);
++
++  // Branches are even more expensive.
++  int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
++  // we use jr instruction to construct call, so more expensive
++  int_def CALL_COST         (    500, DEFAULT_COST * 5);
++/*
++        int_def EQUAL             (   1, 1  );
++        int_def NOT_EQUAL         (   2, 2  );
++        int_def GREATER           (   3, 3  );
++        int_def GREATER_EQUAL     (   4, 4  );
++        int_def LESS              (   5, 5  );
++        int_def LESS_EQUAL        (   6, 6  );
++*/
++%}
++
++
++
++//----------SOURCE BLOCK-------------------------------------------------------
++// This is a block of C++ code which provides values, functions, and
++// definitions necessary in the rest of the architecture description
++
++source_hpp %{
++// Header information of the source block.
++// Method declarations/definitions which are used outside
++// the ad-scope can conveniently be defined here.
++//
++// To keep related declarations/definitions/uses close together,
++// we switch between source %{ }% and source_hpp %{ }% freely as needed.
++
++extern RegMask _ANY_REG32_mask;
++extern RegMask _ANY_REG_mask;
++extern RegMask _PTR_REG_mask;
++
++class CallStubImpl {
++
++  //--------------------------------------------------------------
++  //---<  Used for optimization in Compile::shorten_branches  >---
++  //--------------------------------------------------------------
++
++ public:
++  // Size of call trampoline stub.
++  static uint size_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++
++  // number of relocations needed by a call trampoline stub
++  static uint reloc_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++};
++
++class HandlerImpl {
++
++ public:
++
++  static int emit_exception_handler(CodeBuffer &cbuf);
++  static int emit_deopt_handler(CodeBuffer& cbuf);
++
++  static uint size_exception_handler() {
++    // NativeCall instruction size is the same as NativeJump.
++    // exception handler starts out as jump and can be patched to
++    // a call be deoptimization.  (4932387)
++    // Note that this value is also credited (in output.cpp) to
++    // the size of the code section.
++    int size = NativeFarCall::instruction_size;
++    const uintx m = 16 - 1;
++    return mask_bits(size + m, ~m);
++    //return round_to(size, 16);
++  }
++
++  static uint size_deopt_handler() {
++    int size = NativeFarCall::instruction_size;
++    const uintx m = 16 - 1;
++    return mask_bits(size + m, ~m);
++    //return round_to(size, 16);
++  }
++};
++
++bool is_CAS(int opcode);
++bool use_AMO(int opcode);
++
++bool unnecessary_acquire(const Node *barrier);
++bool unnecessary_release(const Node *barrier);
++bool unnecessary_volatile(const Node *barrier);
++bool needs_releasing_store(const Node *store);
++
++%} // end source_hpp
++
++source %{
++
++#define   NO_INDEX    0
++#define   RELOC_IMM64    Assembler::imm_operand
++#define   RELOC_DISP32   Assembler::disp32_operand
++
++#define V0_num    A0_num
++#define V0_H_num  A0_H_num
++
++#define __ _masm.
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++RegMask _ANY_REG32_mask;
++RegMask _ANY_REG_mask;
++RegMask _PTR_REG_mask;
++
++void reg_mask_init() {
++  _ANY_REG32_mask = _ALL_REG32_mask;
++  _ANY_REG_mask = _ALL_REG_mask;
++  _PTR_REG_mask = _ALL_REG_mask;
++
++  if (UseCompressedOops && (Universe::narrow_ptrs_base() != NULL)) {
++    _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r28->as_VMReg()));
++    _ANY_REG_mask.SUBTRACT(_S5_LONG_REG_mask);
++    _PTR_REG_mask.SUBTRACT(_S5_LONG_REG_mask);
++  }
++}
++
++// Emit exception handler code.
++// Stuff framesize into a register and call a VM stub routine.
++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_exception_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_exception_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point());
++  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++// Emit deopt handler code.
++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_deopt_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_deopt_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_call(SharedRuntime::deopt_blob()->unpack());
++  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++
++const bool Matcher::match_rule_supported(int opcode) {
++  if (!has_match_rule(opcode))
++    return false;
++
++  return true;  // Per default match rules are supported.
++}
++
++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
++  // identify extra cases that we might want to provide match rules for
++  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
++  bool ret_value = match_rule_supported(opcode);
++
++  return ret_value;  // Per default match rules are supported.
++}
++
++const bool Matcher::has_predicated_vectors(void) {
++  return false;
++}
++
++const int Matcher::float_pressure(int default_pressure_threshold) {
++  Unimplemented();
++  return default_pressure_threshold;
++}
++
++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
++  const int safety_zone = 3 * BytesPerInstWord;
++  int offs = offset - br_size + 4;
++  // To be conservative on LoongArch
++  // branch node should be end with:
++  //   branch inst
++  offs = (offs < 0 ? offs - safety_zone : offs + safety_zone) >> 2;
++  switch (rule) {
++    case jmpDir_long_rule:
++    case jmpDir_short_rule:
++      return Assembler::is_simm(offs, 26);
++    case jmpCon_flags_long_rule:
++    case jmpCon_flags_short_rule:
++    case branchConP_0_long_rule:
++    case branchConP_0_short_rule:
++    case branchConN2P_0_long_rule:
++    case branchConN2P_0_short_rule:
++    case cmpN_null_branch_long_rule:
++    case cmpN_null_branch_short_rule:
++    case branchConF_reg_reg_long_rule:
++    case branchConF_reg_reg_short_rule:
++    case branchConD_reg_reg_long_rule:
++    case branchConD_reg_reg_short_rule:
++      return Assembler::is_simm(offs, 21);
++    default:
++      return Assembler::is_simm(offs, 16);
++  }
++  return false;
++}
++
++
++// No additional cost for CMOVL.
++const int Matcher::long_cmove_cost() { return 0; }
++
++// No CMOVF/CMOVD with SSE2
++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
++
++// Does the CPU require late expand (see block.cpp for description of late expand)?
++const bool Matcher::require_postalloc_expand = false;
++
++// Do we need to mask the count passed to shift instructions or does
++// the cpu only look at the lower 5/6 bits anyway?
++const bool Matcher::need_masked_shift_count = false;
++
++bool Matcher::narrow_oop_use_complex_address() {
++  assert(UseCompressedOops, "only for compressed oops code");
++  return false;
++}
++
++bool Matcher::narrow_klass_use_complex_address() {
++  assert(UseCompressedClassPointers, "only for compressed klass code");
++  return false;
++}
++
++bool Matcher::const_oop_prefer_decode() {
++  // Prefer ConN+DecodeN over ConP.
++  return true;
++}
++
++bool Matcher::const_klass_prefer_decode() {
++  // TODO: Either support matching DecodeNKlass (heap-based) in operand
++  //       or condisider the following:
++  // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
++  //return Universe::narrow_klass_base() == NULL;
++  return true;
++}
++
++// This is UltraSparc specific, true just means we have fast l2f conversion
++const bool Matcher::convL2FSupported(void) {
++  return true;
++}
++
++// Vector ideal reg
++const uint Matcher::vector_ideal_reg(int size) {
++  assert(MaxVectorSize == 16 || MaxVectorSize == 32, "");
++  switch(size) {
++    case 16: return Op_VecX;
++    case 32: return Op_VecY;
++  }
++  ShouldNotReachHere();
++  return 0;
++}
++
++// Only lowest bits of xmm reg are used for vector shift count.
++const uint Matcher::vector_shift_count_ideal_reg(int size) {
++  assert(MaxVectorSize == 16 || MaxVectorSize == 32, "");
++  switch(size) {
++    case 16: return Op_VecX;
++    case 32: return Op_VecY;
++  }
++  ShouldNotReachHere();
++  return 0;
++}
++
++
++const bool Matcher::convi2l_type_required = true;
++
++// Should the Matcher clone shifts on addressing modes, expecting them
++// to be subsumed into complex addressing expressions or compute them
++// into registers?
++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
++  return clone_base_plus_offset_address(m, mstack, address_visited);
++}
++
++void Compile::reshape_address(AddPNode* addp) {
++}
++
++// Max vector size in bytes. 0 if not supported.
++const int Matcher::vector_width_in_bytes(BasicType bt) {
++  return (int)MaxVectorSize;
++}
++
++// Limits on vector size (number of elements) loaded into vector.
++const int Matcher::max_vector_size(const BasicType bt) {
++  assert(is_java_primitive(bt), "only primitive type vectors");
++  return vector_width_in_bytes(bt)/type2aelembytes(bt);
++}
++
++const int Matcher::min_vector_size(const BasicType bt) {
++  int max_size = max_vector_size(bt);
++  int size     = 0;
++
++  if (UseLSX) size = 16;
++  size = size / type2aelembytes(bt);
++  return MIN2(size,max_size);
++}
++
++// LoongArch supports misaligned vectors store/load? FIXME
++const bool Matcher::misaligned_vectors_ok() {
++  return false;
++  //return !AlignVector; // can be changed by flag
++}
++
++// Register for DIVI projection of divmodI
++RegMask Matcher::divI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for MODI projection of divmodI
++RegMask Matcher::modI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for DIVL projection of divmodL
++RegMask Matcher::divL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++int Matcher::regnum_to_fpu_offset(int regnum) {
++  return regnum - 32; // The FP registers are in the second chunk
++}
++
++
++const bool Matcher::isSimpleConstant64(jlong value) {
++  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
++  return true;
++}
++
++
++// Return whether or not this register is ever used as an argument.  This
++// function is used on startup to build the trampoline stubs in generateOptoStub.
++// Registers not mentioned will be killed by the VM call in the trampoline, and
++// arguments in those registers not be available to the callee.
++bool Matcher::can_be_java_arg( int reg ) {
++  // Refer to: [sharedRuntime_loongarch_64.cpp] SharedRuntime::java_calling_convention()
++  if (    reg == T0_num || reg == T0_H_num
++       || reg == A0_num || reg == A0_H_num
++       || reg == A1_num || reg == A1_H_num
++       || reg == A2_num || reg == A2_H_num
++       || reg == A3_num || reg == A3_H_num
++       || reg == A4_num || reg == A4_H_num
++       || reg == A5_num || reg == A5_H_num
++       || reg == A6_num || reg == A6_H_num
++       || reg == A7_num || reg == A7_H_num )
++    return true;
++
++  if (    reg == F0_num || reg == F0_H_num
++       || reg == F1_num || reg == F1_H_num
++       || reg == F2_num || reg == F2_H_num
++       || reg == F3_num || reg == F3_H_num
++       || reg == F4_num || reg == F4_H_num
++       || reg == F5_num || reg == F5_H_num
++       || reg == F6_num || reg == F6_H_num
++       || reg == F7_num || reg == F7_H_num )
++    return true;
++
++  return false;
++}
++
++bool Matcher::is_spillable_arg( int reg ) {
++  return can_be_java_arg(reg);
++}
++
++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
++  return false;
++}
++
++// Register for MODL projection of divmodL
++RegMask Matcher::modL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++const RegMask Matcher::method_handle_invoke_SP_save_mask() {
++  return FP_REG_mask();
++}
++
++// LoongArch doesn't support AES intrinsics
++const bool Matcher::pass_original_key_for_aes() {
++  return false;
++}
++
++int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallLeafDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallRuntimeDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++// If CPU can load and store mis-aligned doubles directly then no fixup is
++// needed.  Else we split the double into 2 integer pieces and move it
++// piece-by-piece.  Only happens when passing doubles into C code as the
++// Java calling convention forces doubles to be aligned.
++const bool Matcher::misaligned_doubles_ok = false;
++// Do floats take an entire double register or just half?
++//const bool Matcher::float_in_double = true;
++bool Matcher::float_in_double() { return false; }
++// Do ints take an entire long register or just half?
++const bool Matcher::int_in_long = true;
++// Is it better to copy float constants, or load them directly from memory?
++// Intel can load a float constant from a direct address, requiring no
++// extra registers.  Most RISCs will have to materialize an address into a
++// register first, so they would do better to copy the constant from stack.
++const bool Matcher::rematerialize_float_constants = false;
++// Advertise here if the CPU requires explicit rounding operations
++// to implement the UseStrictFP mode.
++const bool Matcher::strict_fp_requires_explicit_rounding = false;
++// false => size gets scaled to BytesPerLong, ok.
++const bool Matcher::init_array_count_is_in_bytes = false;
++
++// Indicate if the safepoint node needs the polling page as an input.
++// it does if the polling page is more than disp32 away.
++bool SafePointNode::needs_polling_address_input() {
++  return SafepointMechanism::uses_thread_local_poll();
++}
++
++#ifndef PRODUCT
++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("BRK");
++}
++#endif
++
++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
++  MacroAssembler _masm(&cbuf);
++  __ brk(5);
++}
++
++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++// !!!!! Special hack to get all type of calls to specify the byte offset
++//       from the start of the call to the point where the return address
++//       will point.
++int MachCallStaticJavaNode::ret_addr_offset() {
++  // bl
++  return NativeCall::instruction_size;
++}
++
++int MachCallDynamicJavaNode::ret_addr_offset() {
++  // lu12i_w IC_Klass,
++  // ori IC_Klass,
++  // lu32i_d IC_Klass
++  // lu52i_d IC_Klass
++
++  // bl
++  return NativeMovConstReg::instruction_size + NativeCall::instruction_size;
++}
++
++//=============================================================================
++
++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
++enum RC { rc_bad, rc_int, rc_float, rc_stack };
++static enum RC rc_class( OptoReg::Name reg ) {
++  if( !OptoReg::is_valid(reg)  ) return rc_bad;
++  if (OptoReg::is_stack(reg)) return rc_stack;
++  VMReg r = OptoReg::as_VMReg(reg);
++  if (r->is_Register()) return rc_int;
++  assert(r->is_FloatRegister(), "must be");
++  return rc_float;
++}
++
++// Helper methods for MachSpillCopyNode::implementation().
++static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
++                          int src_hi, int dst_hi, uint ireg, outputStream* st) {
++  int size = 0;
++  if (cbuf) {
++    MacroAssembler _masm(cbuf);
++    int offset = __ offset();
++    switch (ireg) {
++      case Op_VecX:
++        __ vori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0);
++        break;
++      case Op_VecY:
++        __ xvori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#ifndef PRODUCT
++  } else if (!do_size) {
++    switch (ireg) {
++      case Op_VecX:
++        st->print("vori.b    %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
++        break;
++      case Op_VecY:
++        st->print("xvori.b    %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#endif
++  }
++  size += 4;
++  return size;
++}
++
++static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
++                            int stack_offset, int reg, uint ireg, outputStream* st) {
++  int size = 0;
++  if (cbuf) {
++    MacroAssembler _masm(cbuf);
++    int offset = __ offset();
++    if (is_load) {
++      switch (ireg) {
++        case Op_VecX:
++          __ vld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        case Op_VecY:
++          __ xvld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else { // store
++      switch (ireg) {
++        case Op_VecX:
++          __ vst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        case Op_VecY:
++          __ xvst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++#ifndef PRODUCT
++  } else if (!do_size) {
++    if (is_load) {
++      switch (ireg) {
++        case Op_VecX:
++          st->print("vld    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        case Op_VecY:
++          st->print("xvld    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else { // store
++      switch (ireg) {
++        case Op_VecX:
++          st->print("vst    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        case Op_VecY:
++          st->print("xvst    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++#endif
++  }
++  size += 4;
++  return size;
++}
++
++static int vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
++                                      int dst_offset, uint ireg, outputStream* st) {
++  int size = 0;
++  if (cbuf) {
++    MacroAssembler _masm(cbuf);
++    switch (ireg) {
++      case Op_VecX:
++        __ vld(F23, SP, src_offset);
++        __ vst(F23, SP, dst_offset);
++        break;
++      case Op_VecY:
++        __ xvld(F23, SP, src_offset);
++        __ xvst(F23, SP, dst_offset);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#ifndef PRODUCT
++  } else {
++    switch (ireg) {
++      case Op_VecX:
++        st->print("vld f23, %d(sp)\n\t"
++                  "vst f23, %d(sp)\t# 128-bit mem-mem spill",
++                  src_offset, dst_offset);
++        break;
++      case Op_VecY:
++        st->print("xvld f23, %d(sp)\n\t"
++                  "xvst f23, %d(sp)\t# 256-bit mem-mem spill",
++                  src_offset, dst_offset);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++#endif
++  }
++  size += 8;
++  return size;
++}
++
++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
++  // Get registers to move
++  OptoReg::Name src_second = ra_->get_reg_second(in(1));
++  OptoReg::Name src_first = ra_->get_reg_first(in(1));
++  OptoReg::Name dst_second = ra_->get_reg_second(this );
++  OptoReg::Name dst_first = ra_->get_reg_first(this );
++
++  enum RC src_second_rc = rc_class(src_second);
++  enum RC src_first_rc = rc_class(src_first);
++  enum RC dst_second_rc = rc_class(dst_second);
++  enum RC dst_first_rc = rc_class(dst_first);
++
++  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
++
++  // Generate spill code!
++
++  if( src_first == dst_first && src_second == dst_second )
++    return 0;            // Self copy, no move
++
++  if (bottom_type()->isa_vect() != NULL) {
++    uint ireg = ideal_reg();
++    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
++    if (src_first_rc == rc_stack && dst_first_rc == rc_stack) {
++      // mem -> mem
++      int src_offset = ra_->reg2offset(src_first);
++      int dst_offset = ra_->reg2offset(dst_first);
++      vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
++    } else if (src_first_rc == rc_float && dst_first_rc == rc_float) {
++      vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
++    } else if (src_first_rc == rc_float && dst_first_rc == rc_stack) {
++      int stack_offset = ra_->reg2offset(dst_first);
++      vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
++    } else if (src_first_rc == rc_stack && dst_first_rc == rc_float) {
++      int stack_offset = ra_->reg2offset(src_first);
++      vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
++    } else {
++      ShouldNotReachHere();
++    }
++    return 0;
++  }
++
++  if (src_first_rc == rc_stack) {
++    // mem ->
++    if (dst_first_rc == rc_stack) {
++      // mem -> mem
++      assert(src_second != dst_first, "overlap");
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld_d(AT, Address(SP, src_offset));
++          __ st_d(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tld_d    AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t"
++                    "st_d    AT, [SP + #%d]",
++                    src_offset, dst_offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        // No pushl/popl, so:
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld_w(AT, Address(SP, src_offset));
++          __ st_w(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tld_w    AT, [SP + #%d] spill 2\n\t"
++                    "st_w    AT, [SP + #%d]\n\t",
++                    src_offset, dst_offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // mem -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld_d(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tld_d    %s, [SP + #%d]\t# spill 3",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++            __ ld_w(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++          else {
++            if (Assembler::is_simm(offset, 12)) {
++              __ ld_wu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++            } else {
++              __ li(AT, offset);
++              __ ldx_wu(as_Register(Matcher::_regEncode[dst_first]), SP, AT);
++            }
++          }
++#ifndef PRODUCT
++        } else {
++          if (this->ideal_reg() == Op_RegI)
++            st->print("\tld_w    %s, [SP + #%d]\t# spill 4",
++                      Matcher::regName[dst_first],
++                      offset);
++          else
++            st->print("\tld_wu    %s, [SP + #%d]\t# spill 5",
++                      Matcher::regName[dst_first],
++                      offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // mem-> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fld_d( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tfld_d  %s, [SP + #%d]\t# spill 6",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fld_s( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tfld_s   %s, [SP + #%d]\t# spill 7",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      }
++    }
++    return 0;
++  } else if (src_first_rc == rc_int) {
++    // gpr ->
++    if (dst_first_rc == rc_stack) {
++      // gpr -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ st_d(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tst_d    %s, [SP + #%d] # spill 8",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ st_w(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\tst_w    %s, [SP + #%d]\t# spill 9",
++                    Matcher::regName[src_first], offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // gpr -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ move(as_Register(Matcher::_regEncode[dst_first]),
++                  as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\tmove(64bit)    %s <-- %s\t# spill 10",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++              __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++          else
++              __ add_d(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0);
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("move(32-bit)    %s <-- %s\t# spill 11",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      }
++    } else if (dst_first_rc == rc_float) {
++      // gpr -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ movgr2fr_d(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movgr2fr_d   %s, %s\t# spill 12",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ movgr2fr_w(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movgr2fr_w   %s, %s\t# spill 13",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  } else if (src_first_rc == rc_float) {
++    // xmm ->
++    if (dst_first_rc == rc_stack) {
++      // xmm -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fst_d( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) );
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fst_d   %s, [SP + #%d]\t# spill 14",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fst_s(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fst_s   %s, [SP + #%d]\t# spill 15",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // xmm -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ movfr2gr_d( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movfr2gr_d   %s, %s\t# spill 16",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ movfr2gr_s( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("movfr2gr_s   %s, %s\t# spill 17",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // xmm -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fmov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fmov_d  %s <-- %s\t# spill 18",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fmov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("fmov_s  %s <-- %s\t# spill 19",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  }
++
++  assert(0," foo ");
++  Unimplemented();
++  return 0;
++}
++
++#ifndef PRODUCT
++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  implementation( NULL, ra_, false, st );
++}
++#endif
++
++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  implementation( &cbuf, ra_, false, NULL );
++}
++
++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++//=============================================================================
++#ifndef PRODUCT
++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile *C = ra_->C;
++  int framesize = C->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  st->print_cr("addi_d   SP, SP, %d # Rlease stack @ MachEpilogNode", framesize);
++  st->print("\t");
++  st->print_cr("ld_d    RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize);
++  st->print("\t");
++  st->print_cr("ld_d    FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2);
++
++  if( do_polling() && C->is_method_compilation() ) {
++    st->print("\t");
++    if (SafepointMechanism::uses_thread_local_poll()) {
++      st->print_cr("ld_d    AT, poll_offset[thread] #polling_page_address\n\t"
++                   "ld_w    AT, [AT]\t"
++                   "# Safepoint: poll for GC");
++    } else {
++      st->print_cr("Poll Safepoint # MachEpilogNode");
++    }
++  }
++}
++#endif
++
++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile *C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++  int framesize = C->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  __ ld_d(RA, Address(SP, framesize - wordSize));
++  __ ld_d(FP, Address(SP, framesize - wordSize * 2));
++  if (Assembler::is_simm(framesize, 12)) {
++    __ addi_d(SP, SP, framesize);
++  } else {
++    __ li(AT, framesize);
++    __ add_d(SP, SP, AT);
++  }
++
++  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
++    __ reserved_stack_check();
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if( do_polling() && C->is_method_compilation() ) {
++    if (SafepointMechanism::uses_thread_local_poll()) {
++      __ ld_d(AT, thread, in_bytes(Thread::polling_page_offset()));
++      __ relocate(relocInfo::poll_return_type);
++      __ ld_w(AT, AT, 0);
++    } else {
++      __ li(AT, (long)os::get_polling_page());
++      __ relocate(relocInfo::poll_return_type);
++      __ ld_w(AT, AT, 0);
++    }
++  }
++}
++
++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way
++}
++
++int MachEpilogNode::reloc() const {
++  return 0; // a large enough number
++}
++
++const Pipeline * MachEpilogNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++int MachEpilogNode::safepoint_offset() const { return 0; }
++
++//=============================================================================
++
++#ifndef PRODUCT
++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_reg_first(this);
++  st->print("ADDI_D %s, SP, %d   @BoxLockNode",Matcher::regName[reg],offset);
++}
++#endif
++
++
++uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
++   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++
++   if (Assembler::is_simm(offset, 12))
++     return 4;
++   else
++     return 3 * 4;
++}
++
++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_encode(this);
++
++  if (Assembler::is_simm(offset, 12)) {
++    __ addi_d(as_Register(reg), SP, offset);
++  } else {
++    __ lu12i_w(AT, Assembler::split_low20(offset >> 12));
++    __ ori(AT, AT, Assembler::split_low12(offset));
++    __ add_d(as_Register(reg), SP, AT);
++  }
++}
++
++int MachCallRuntimeNode::ret_addr_offset() {
++  // pcaddu18i
++  // jirl
++  return NativeFarCall::instruction_size;
++}
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
++}
++#endif
++
++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
++  MacroAssembler _masm(&cbuf);
++  int i = 0;
++  for(i = 0; i < _count; i++)
++     __ nop();
++}
++
++uint MachNopNode::size(PhaseRegAlloc *) const {
++  return 4 * _count;
++}
++const Pipeline* MachNopNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++//=============================================================================
++
++//=============================================================================
++#ifndef PRODUCT
++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  st->print_cr("load_klass(T4, T0)");
++  st->print_cr("\tbeq(T4, iCache, L)");
++  st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)");
++  st->print_cr("    L:");
++}
++#endif
++
++
++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  int  ic_reg = Matcher::inline_cache_reg_encode();
++  Label L;
++  Register receiver = T0;
++  Register   iCache = as_Register(ic_reg);
++
++  __ load_klass(T4, receiver);
++  __ beq(T4, iCache, L);
++  __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
++  __ bind(L);
++}
++
++uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++//=============================================================================
++
++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask();
++
++int Compile::ConstantTable::calculate_table_base_offset() const {
++  return 0;  // absolute addressing, no offset
++}
++
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
++  ShouldNotReachHere();
++}
++
++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++  Compile* C = ra_->C;
++  Compile::ConstantTable& constant_table = C->constant_table();
++  MacroAssembler _masm(&cbuf);
++
++  Register Rtoc = as_Register(ra_->get_encode(this));
++  CodeSection* consts_section = cbuf.consts();
++  int consts_size = consts_section->align_at_start(consts_section->size());
++  assert(constant_table.size() == consts_size, "must be equal");
++
++  if (consts_section->size()) {
++    assert((CodeBuffer::SECT_CONSTS + 1) == CodeBuffer::SECT_INSTS,
++           "insts must be immediately follow consts");
++    // Materialize the constant table base.
++    address baseaddr = cbuf.insts()->start() - consts_size + -(constant_table.table_base_offset());
++    jint offs = (baseaddr - __ pc()) >> 2;
++    guarantee(Assembler::is_simm(offs, 20), "Not signed 20-bit offset");
++    __ pcaddi(Rtoc, offs);
++  }
++}
++
++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
++  // pcaddi
++  return 1 * BytesPerInstWord;
++}
++
++#ifndef PRODUCT
++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++  Register r = as_Register(ra_->get_encode(this));
++  st->print("pcaddi    %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name());
++}
++#endif
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile* C = ra_->C;
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  // Calls to C2R adapters often do not accept exceptional returns.
++  // We require that their callers must bang for them.  But be careful, because
++  // some VM calls (such as call site linkage) can use several kilobytes of
++  // stack.  But the stack safety zone should account for that.
++  // See bugs 4446381, 4468289, 4497237.
++  if (C->need_stack_bang(bangsize)) {
++    st->print_cr("# stack bang"); st->print("\t");
++  }
++  st->print("st_d       RA, %d(SP)  @ MachPrologNode\n\t", -wordSize);
++  st->print("st_d       FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
++  st->print("addi_d   FP, SP, -%d \n\t", wordSize*2);
++  st->print("addi_d   SP, SP, -%d \t",framesize);
++}
++#endif
++
++
++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile* C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++#ifdef ASSERT
++  address start = __ pc();
++#endif
++
++  if (C->need_stack_bang(bangsize)) {
++    __ generate_stack_overflow_check(bangsize);
++  }
++
++  if (Assembler::is_simm(-framesize, 12)) {
++    __ addi_d(SP, SP, -framesize);
++  } else {
++    __ li(AT, -framesize);
++    __ add_d(SP, SP, AT);
++  }
++  __ st_d(RA, Address(SP, framesize - wordSize));
++  __ st_d(FP, Address(SP, framesize - wordSize * 2));
++  if (Assembler::is_simm(framesize - wordSize * 2, 12)) {
++    __ addi_d(FP, SP, framesize - wordSize * 2);
++  } else {
++    __ li(AT, framesize - wordSize * 2);
++    __ add_d(FP, SP, AT);
++  }
++
++  assert((__ pc() - start) >= 1 * BytesPerInstWord, "No enough room for patch_verified_entry");
++
++  C->set_frame_complete(cbuf.insts_size());
++  if (C->has_mach_constant_base_node()) {
++    // NOTE: We set the table base offset here because users might be
++    // emitted before MachConstantBaseNode.
++    Compile::ConstantTable& constant_table = C->constant_table();
++    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
++  }
++}
++
++
++uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way
++}
++
++int MachPrologNode::reloc() const {
++  return 0; // a large enough number
++}
++
++bool is_CAS(int opcode)
++{
++  switch(opcode) {
++  // We handle these
++  case Op_CompareAndSwapI:
++  case Op_CompareAndSwapL:
++  case Op_CompareAndSwapP:
++  case Op_CompareAndSwapN:
++  case Op_GetAndSetI:
++  case Op_GetAndSetL:
++  case Op_GetAndSetP:
++  case Op_GetAndSetN:
++  case Op_GetAndAddI:
++  case Op_GetAndAddL:
++    return true;
++  default:
++    return false;
++  }
++}
++
++bool use_AMO(int opcode)
++{
++  switch(opcode) {
++  // We handle these
++  case Op_StoreI:
++  case Op_StoreL:
++  case Op_StoreP:
++  case Op_StoreN:
++  case Op_StoreNKlass:
++    return true;
++  default:
++    return false;
++  }
++}
++
++bool unnecessary_acquire(const Node *barrier)
++{
++  assert(barrier->is_MemBar(), "expecting a membar");
++
++  if (UseBarriersForVolatile) {
++    // we need to plant a dbar
++    return false;
++  }
++
++  MemBarNode* mb = barrier->as_MemBar();
++
++  if (mb->trailing_load_store()) {
++    Node* load_store = mb->in(MemBarNode::Precedent);
++    assert(load_store->is_LoadStore(), "unexpected graph shape");
++    return is_CAS(load_store->Opcode());
++  }
++
++  return false;
++}
++
++bool unnecessary_release(const Node *n)
++{
++  assert((n->is_MemBar() && n->Opcode() == Op_MemBarRelease), "expecting a release membar");
++
++  if (UseBarriersForVolatile) {
++    // we need to plant a dbar
++    return false;
++  }
++
++  MemBarNode *barrier = n->as_MemBar();
++
++  if (!barrier->leading()) {
++    return false;
++  } else {
++    Node* trailing = barrier->trailing_membar();
++    MemBarNode* trailing_mb = trailing->as_MemBar();
++    assert(trailing_mb->trailing(), "Not a trailing membar?");
++    assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
++
++    Node* mem = trailing_mb->in(MemBarNode::Precedent);
++    if (mem->is_Store()) {
++      assert(mem->as_Store()->is_release(), "");
++      assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
++      return use_AMO(mem->Opcode());
++    } else {
++      assert(mem->is_LoadStore(), "");
++      assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
++      return is_CAS(mem->Opcode());
++    }
++  }
++
++  return false;
++}
++
++bool unnecessary_volatile(const Node *n)
++{
++  // assert n->is_MemBar();
++  if (UseBarriersForVolatile) {
++    // we need to plant a dbar
++    return false;
++  }
++
++  MemBarNode *mbvol = n->as_MemBar();
++
++  bool release = false;
++  if (mbvol->trailing_store()) {
++    Node* mem = mbvol->in(MemBarNode::Precedent);
++    release = use_AMO(mem->Opcode());
++  }
++
++  assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
++#ifdef ASSERT
++  if (release) {
++    Node* leading = mbvol->leading_membar();
++    assert(leading->Opcode() == Op_MemBarRelease, "");
++    assert(leading->as_MemBar()->leading_store(), "");
++    assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
++   }
++#endif
++
++  return release;
++}
++
++bool needs_releasing_store(const Node *n)
++{
++  // assert n->is_Store();
++  if (UseBarriersForVolatile) {
++    // we use a normal store and dbar combination
++    return false;
++  }
++
++  StoreNode *st = n->as_Store();
++
++  return st->trailing_membar() != NULL;
++}
++
++%}
++
++//----------ENCODING BLOCK-----------------------------------------------------
++// This block specifies the encoding classes used by the compiler to output
++// byte streams.  Encoding classes generate functions which are called by
++// Machine Instruction Nodes in order to generate the bit encoding of the
++// instruction.  Operands specify their base encoding interface with the
++// interface keyword.  There are currently supported four interfaces,
++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
++// operand to generate a function which returns its register number when
++// queried.   CONST_INTER causes an operand to generate a function which
++// returns the value of the constant when queried.  MEMORY_INTER causes an
++// operand to generate four functions which return the Base Register, the
++// Index Register, the Scale Value, and the Offset Value of the operand when
++// queried.  COND_INTER causes an operand to generate six functions which
++// return the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional instruction.
++// Instructions specify two basic values for encoding.  They use the
++// ins_encode keyword to specify their encoding class (which must be one of
++// the class names specified in the encoding block), and they use the
++// opcode keyword to specify, in order, their primary, secondary, and
++// tertiary opcode.  Only the opcode sections which a particular instruction
++// needs for encoding need to be specified.
++encode %{
++
++  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
++    MacroAssembler _masm(&cbuf);
++    // This is the instruction starting address for relocation info.
++    __ block_comment("Java_To_Runtime");
++    cbuf.set_insts_mark();
++    __ relocate(relocInfo::runtime_call_type);
++    __ patchable_call((address)$meth$$method);
++  %}
++
++  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
++    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
++    // who we intended to call.
++    MacroAssembler _masm(&cbuf);
++    address addr = (address)$meth$$method;
++    address call;
++    __ block_comment("Java_Static_Call");
++
++    if ( !_method ) {
++      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
++      call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf);
++      if (call == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    } else {
++      int method_index = resolved_method_index(cbuf);
++      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
++                                     : static_call_Relocation::spec(method_index);
++      call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf);
++      if (call == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++      // Emit stub for static call
++      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
++      if (stub == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    }
++  %}
++
++
++  //
++  // [Ref: LIR_Assembler::ic_call() ]
++  //
++  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
++    MacroAssembler _masm(&cbuf);
++    __ block_comment("Java_Dynamic_Call");
++    address call = __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
++    if (call == NULL) {
++      ciEnv::current()->record_failure("CodeCache is full");
++      return;
++    }
++  %}
++
++
++  enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{
++    Register result = $result$$Register;
++    Register sub    = $sub$$Register;
++    Register super  = $super$$Register;
++    Register length = $tmp$$Register;
++    Register tmp    = T4;
++    Label miss;
++
++    // result may be the same as sub
++    //    47c   B40: #    B21 B41 <- B20  Freq: 0.155379
++    //    47c     partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0
++    //    4bc     mov   S2, NULL #@loadConP
++    //    4c0     beq   S1, S2, B21 #@branchConP  P=0.999999 C=-1.000000
++    //
++    MacroAssembler _masm(&cbuf);
++    Label done;
++    __ check_klass_subtype_slow_path(sub, super, length, tmp,
++        NULL, &miss,
++        /*set_cond_codes:*/ true);
++    // Refer to X86_64's RDI
++    __ move(result, 0);
++    __ b(done);
++
++    __ bind(miss);
++    __ li(result, 1);
++    __ bind(done);
++  %}
++
++%}
++
++
++//---------LOONGARCH FRAME--------------------------------------------------------------
++// Definition of frame structure and management information.
++//
++//  S T A C K   L A Y O U T    Allocators stack-slot number
++//                             |   (to get allocators register number
++//  G  Owned by    |        |  v    add SharedInfo::stack0)
++//  r   CALLER     |        |
++//  o     |        +--------+      pad to even-align allocators stack-slot
++//  w     V        |  pad0  |        numbers; owned by CALLER
++//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
++//  h     ^        |   in   |  5
++//        |        |  args  |  4   Holes in incoming args owned by SELF
++//  |     |    old |        |  3
++//  |     |     SP-+--------+----> Matcher::_old_SP, even aligned
++//  v     |        |  ret   |  3   return address
++//     Owned by    +--------+
++//      Self       |  pad2  |  2   pad to align old SP
++//        |        +--------+  1
++//        |        | locks  |  0
++//        |        +--------+----> SharedInfo::stack0, even aligned
++//        |        |  pad1  | 11   pad to align new SP
++//        |        +--------+
++//        |        |        | 10
++//        |        | spills |  9   spills
++//        V        |        |  8   (pad0 slot for callee)
++//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
++//        ^        |  out   |  7
++//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
++//   Owned by  new |        |
++//    Callee    SP-+--------+----> Matcher::_new_SP, even aligned
++//                  |        |
++//
++// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
++//         known from SELF's arguments and the Java calling convention.
++//         Region 6-7 is determined per call site.
++// Note 2: If the calling convention leaves holes in the incoming argument
++//         area, those holes are owned by SELF.  Holes in the outgoing area
++//         are owned by the CALLEE.  Holes should not be nessecary in the
++//         incoming area, as the Java calling convention is completely under
++//         the control of the AD file.  Doubles can be sorted and packed to
++//         avoid holes.  Holes in the outgoing arguments may be nessecary for
++//         varargs C calling conventions.
++// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
++//         even aligned with pad0 as needed.
++//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
++//         region 6-11 is even aligned; it may be padded out more so that
++//         the region from SP to FP meets the minimum stack alignment.
++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
++//         alignment.  Region 11, pad1, may be dynamically extended so that
++//         SP meets the minimum alignment.
++
++
++frame %{
++
++  stack_direction(TOWARDS_LOW);
++
++  // These two registers define part of the calling convention
++  // between compiled code and the interpreter.
++  // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention
++  // for more information.
++
++  inline_cache_reg(T1);                // Inline Cache Register
++  interpreter_method_oop_reg(S3);      // Method Oop Register when calling interpreter
++
++  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
++  cisc_spilling_operand_name(indOffset32);
++
++  // Number of stack slots consumed by locking an object
++  // generate Compile::sync_stack_slots
++  sync_stack_slots(2);
++
++  frame_pointer(SP);
++
++  // Interpreter stores its frame pointer in a register which is
++  // stored to the stack by I2CAdaptors.
++  // I2CAdaptors convert from interpreted java to compiled java.
++
++  interpreter_frame_pointer(FP);
++
++  // generate Matcher::stack_alignment
++  stack_alignment(StackAlignmentInBytes);  //wordSize = sizeof(char*);
++
++  // Number of stack slots between incoming argument block and the start of
++  // a new frame.  The PROLOG must add this many slots to the stack.  The
++  // EPILOG must remove this many slots.
++  in_preserve_stack_slots(4);  //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp
++
++  // Number of outgoing stack slots killed above the out_preserve_stack_slots
++  // for calls to C.  Supports the var-args backing area for register parms.
++  varargs_C_out_slots_killed(0);
++
++  // The after-PROLOG location of the return address.  Location of
++  // return address specifies a type (REG or STACK) and a number
++  // representing the register number (i.e. - use a register name) or
++  // stack slot.
++  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
++  // Otherwise, it is above the locks and verification slot and alignment word
++  //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong));
++  return_addr(REG RA);
++
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++
++  // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing)
++  // StartNode::calling_convention call this.
++  calling_convention %{
++    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
++  %}
++
++
++
++
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++
++  // SEE CallRuntimeNode::calling_convention for more information.
++  c_calling_convention %{
++   (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
++  %}
++
++
++  // Location of C & interpreter return values
++  // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR.
++  // SEE Matcher::match.
++  c_return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,    V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,  V0_H_num };
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++  // Location of return values
++  // register(s) contain(s) return value for Op_StartC2I and Op_Start.
++  // SEE Matcher::match.
++
++  return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,     V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,   V0_H_num};
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++%}
++
++//----------ATTRIBUTES---------------------------------------------------------
++//----------Operand Attributes-------------------------------------------------
++op_attrib op_cost(0);        // Required cost attribute
++
++//----------Instruction Attributes---------------------------------------------
++ins_attrib ins_cost(100);       // Required cost attribute
++ins_attrib ins_size(32);         // Required size attribute (in bits)
++ins_attrib ins_pc_relative(0);  // Required PC Relative flag
++ins_attrib ins_short_branch(0); // Required flag: is this instruction a
++                                // non-matching short branch variant of some
++                                                            // long branch?
++ins_attrib ins_alignment(4);    // Required alignment attribute (must be a power of 2)
++                                // specifies the alignment that some part of the instruction (not
++                                // necessarily the start) requires.  If > 1, a compute_padding()
++                                // function must be provided for the instruction
++
++//----------OPERANDS-----------------------------------------------------------
++// Operand definitions must precede instruction definitions for correct parsing
++// in the ADLC because operands constitute user defined types which are used in
++// instruction definitions.
++
++// Vectors
++
++operand vecX() %{
++  constraint(ALLOC_IN_RC(vectorx_reg));
++  match(VecX);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand vecY() %{
++  constraint(ALLOC_IN_RC(vectory_reg));
++  match(VecY);
++
++   format %{ %}
++   interface(REG_INTER);
++%}
++
++// Flags register, used as output of compare instructions
++operand FlagsReg() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegFlags);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++//----------Simple Operands----------------------------------------------------
++// TODO: Should we need to define some more special immediate number ?
++// Immediate Operands
++// Integer Immediate
++operand immI() %{
++  match(ConI);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU1() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 1));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU2() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 3));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU3() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 7));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU4() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 15));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU5() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 31));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU6() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 63));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immIU8() %{
++  predicate((0 <= n->get_int()) && (n->get_int() <= 255));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI10() %{
++  predicate((-512 <= n->get_int()) && (n->get_int() <= 511));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI12() %{
++  predicate((-2048 <= n->get_int()) && (n->get_int() <= 2047));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M65536() %{
++  predicate(n->get_int() == -65536);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for decrement
++operand immI_M1() %{
++  predicate(n->get_int() == -1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for zero
++operand immI_0() %{
++  predicate(n->get_int() == 0);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_1() %{
++  predicate(n->get_int() == 1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_2() %{
++  predicate(n->get_int() == 2);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_16() %{
++  predicate(n->get_int() == 16);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_24() %{
++  predicate(n->get_int() == 24);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for long shifts
++operand immI_32() %{
++  predicate(n->get_int() == 32);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for byte-wide masking
++operand immI_255() %{
++  predicate(n->get_int() == 255);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_65535() %{
++  predicate(n->get_int() == 65535);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_MaxI() %{
++  predicate(n->get_int() == 2147483647);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M2047_2048() %{
++  predicate((-2047 <= n->get_int()) && (n->get_int() <= 2048));
++  match(ConI);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Valid scale values for addressing modes
++operand immI_0_3() %{
++  predicate(0 <= n->get_int() && (n->get_int() <= 3));
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_31() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 31);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_4095() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 4095);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_1_4() %{
++  predicate(1 <= n->get_int() && (n->get_int() <= 4));
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_32_63() %{
++  predicate(n->get_int() >= 32 && n->get_int() <= 63);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M128_255() %{
++  predicate((-128 <= n->get_int()) && (n->get_int() <= 255));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive integer mask
++operand immI_nonneg_mask() %{
++  predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1));
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate
++operand immL() %{
++  match(ConL);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immLU5() %{
++  predicate((0 <= n->get_long()) && (n->get_long() <= 31));
++  match(ConL);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL10() %{
++  predicate((-512 <= n->get_long()) && (n->get_long() <= 511));
++  match(ConL);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL12() %{
++  predicate((-2048 <= n->get_long()) && (n->get_long() <= 2047));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 32-bit signed
++operand immL32()
++%{
++  predicate(n->get_long() == (int)n->get_long());
++  match(ConL);
++
++  op_cost(15);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 3..6 zero
++operand immL_M121() %{
++  predicate(n->get_long() == -121L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..2 zero
++operand immL_M8() %{
++  predicate(n->get_long() == -8L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 1..2 zero
++operand immL_M7() %{
++  predicate(n->get_long() == -7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 2 zero
++operand immL_M5() %{
++  predicate(n->get_long() == -5L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..1 zero
++operand immL_M4() %{
++  predicate(n->get_long() == -4L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate zero
++operand immL_0() %{
++  predicate(n->get_long() == 0L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_7() %{
++  predicate(n->get_long() == 7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_MaxUI() %{
++  predicate(n->get_long() == 0xFFFFFFFFL);
++  match(ConL);
++  op_cost(20);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_M2047_2048() %{
++  predicate((-2047 <= n->get_long()) && (n->get_long() <= 2048));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_0_4095() %{
++  predicate(n->get_long() >= 0 && n->get_long() <= 4095);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive long mask
++operand immL_nonneg_mask() %{
++  predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1));
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immP() %{
++  match(ConP);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immP_0() %{
++  predicate(n->get_ptr() == 0);
++  match(ConP);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immP_no_oop_cheap() %{
++  predicate(!n->bottom_type()->isa_oop_ptr());
++  match(ConP);
++
++  op_cost(5);
++  // formats are generated automatically for constants and base registers
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer for polling page
++operand immP_poll() %{
++  predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
++  match(ConP);
++  op_cost(5);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immN() %{
++  match(ConN);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immN_0() %{
++  predicate(n->get_narrowcon() == 0);
++  match(ConN);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immNKlass() %{
++  match(ConNKlass);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point immediate
++operand immF() %{
++  match(ConF);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point zero
++operand immF_0() %{
++  predicate(jint_cast(n->getf()) == 0);
++  match(ConF);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point immediate
++operand immD() %{
++  match(ConD);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point zero
++operand immD_0() %{
++  predicate(jlong_cast(n->getd()) == 0);
++  match(ConD);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Register Operands
++// Integer Register
++operand mRegI() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegI);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand no_Ax_mRegI() %{
++  constraint(ALLOC_IN_RC(no_Ax_int_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand mS0RegI() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S0" %}
++  interface(REG_INTER);
++%}
++
++operand mS1RegI() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S1" %}
++  interface(REG_INTER);
++%}
++
++operand mS3RegI() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S3" %}
++  interface(REG_INTER);
++%}
++
++operand mS4RegI() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S4" %}
++  interface(REG_INTER);
++%}
++
++operand mS5RegI() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S5" %}
++  interface(REG_INTER);
++%}
++
++operand mS6RegI() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S6" %}
++  interface(REG_INTER);
++%}
++
++operand mS7RegI() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S7" %}
++  interface(REG_INTER);
++%}
++
++
++operand mT0RegI() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++operand mT1RegI() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T1" %}
++  interface(REG_INTER);
++%}
++
++operand mT2RegI() %{
++  constraint(ALLOC_IN_RC(t2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T2" %}
++  interface(REG_INTER);
++%}
++
++operand mT3RegI() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T3" %}
++  interface(REG_INTER);
++%}
++
++operand mT8RegI() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T8" %}
++  interface(REG_INTER);
++%}
++
++operand mT4RegI() %{
++  constraint(ALLOC_IN_RC(t4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T4" %}
++  interface(REG_INTER);
++%}
++
++operand mA0RegI() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand mA1RegI() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A1" %}
++  interface(REG_INTER);
++%}
++
++operand mA2RegI() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A2" %}
++  interface(REG_INTER);
++%}
++
++operand mA3RegI() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A3" %}
++  interface(REG_INTER);
++%}
++
++operand mA4RegI() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A4" %}
++  interface(REG_INTER);
++%}
++
++operand mA5RegI() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A5" %}
++  interface(REG_INTER);
++%}
++
++operand mA6RegI() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A6" %}
++  interface(REG_INTER);
++%}
++
++operand mA7RegI() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A7" %}
++  interface(REG_INTER);
++%}
++
++operand mRegN() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegN() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegN() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegN() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegN() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegN() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegN() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegN() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegN() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegN() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5_RegN() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegN() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegN() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0_RegN() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegN() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s2_RegN() %{
++  constraint(ALLOC_IN_RC(s2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegN() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegN() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegN() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegN() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegN() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Pointer Register
++operand mRegP() %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(RegP);
++  match(a0_RegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand no_T8_mRegP() %{
++  constraint(ALLOC_IN_RC(no_T8_p_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand no_Ax_mRegP() %{
++  constraint(ALLOC_IN_RC(no_Ax_p_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegP()
++%{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegP()
++%{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegP()
++%{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegP()
++%{
++  constraint(ALLOC_IN_RC(s5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegP()
++%{
++  constraint(ALLOC_IN_RC(s6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegP()
++%{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegP()
++%{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegP()
++%{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t2_RegP()
++%{
++  constraint(ALLOC_IN_RC(t2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegP()
++%{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegP()
++%{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegP()
++%{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegP()
++%{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegP()
++%{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegP()
++%{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegP()
++%{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++
++operand a5_RegP()
++%{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegP()
++%{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegP()
++%{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegP()
++%{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1_RegP()
++%{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand mRegL() %{
++  constraint(ALLOC_IN_RC(long_reg));
++  match(RegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand mRegI2L(mRegI reg) %{
++  match(ConvI2L reg);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand mRegL2I(mRegL reg) %{
++  match(ConvL2I reg);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0RegL() %{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1RegL() %{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0RegL() %{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand a1RegL() %{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2RegL() %{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3RegL() %{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0RegL() %{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1RegL() %{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3RegL() %{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8RegL() %{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4RegL() %{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5RegL() %{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6RegL() %{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7RegL() %{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0RegL() %{
++  constraint(ALLOC_IN_RC(s0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1RegL() %{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3RegL() %{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4RegL() %{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7RegL() %{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Floating register operands
++operand regF() %{
++  constraint(ALLOC_IN_RC(flt_reg));
++  match(RegF);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//Double Precision Floating register operands
++operand regD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(RegD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//----------Memory Operands----------------------------------------------------
++// Indirect Memory Operand
++operand indirect(mRegP reg) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(reg);
++
++  format %{ "[$reg] @ indirect" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);  /* NO_INDEX */
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset12(mRegP reg, immL12 off)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg off);
++
++  op_cost(10);
++  format %{ "[$reg + $off (12-bit)] @ indOffset12" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0); /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indOffset12I2L(mRegP reg, immI12 off)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg (ConvI2L off));
++
++  op_cost(10);
++  format %{ "[$reg + $off (12-bit)] @ indOffset12I2L" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0); /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Index Register
++operand indIndex(mRegP addr, mRegL index) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP addr index);
++
++  op_cost(20);
++  format %{"[$addr + $index] @ indIndex" %}
++  interface(MEMORY_INTER) %{
++    base($addr);
++    index($index);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++operand indIndexI2L(mRegP reg, mRegI ireg)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg (ConvI2L ireg));
++  op_cost(10);
++  format %{ "[$reg + $ireg] @ indIndexI2L" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($ireg);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Operand
++operand indirectNarrow(mRegN reg)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(DecodeN reg);
++
++  format %{ "[$reg] @ indirectNarrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset12Narrow(mRegN reg, immL12 off)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeN reg) off);
++
++  format %{ "[$reg + $off (12-bit)] @ indOffset12Narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++//----------Conditional Branch Operands----------------------------------------
++// Comparison Op  - This is the operation of the comparison, and is limited to
++//                  the following set of codes:
++//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
++//
++// Other attributes of the comparison, such as unsignedness, are specified
++// by the comparison instruction that sets a condition code flags register.
++// That result is represented by a flags operand whose subtype is appropriate
++// to the unsignedness (etc.) of the comparison.
++//
++// Later, the instruction which matches both the Comparison Op (a Bool) and
++// the flags (produced by the Cmp) specifies the coding of the comparison op
++// by matching a specific subtype of Bool operand below, such as cmpOp.
++
++// Comparision Code
++operand cmpOp() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++operand cmpOpEqNe() %{
++  match(Bool);
++  predicate(n->as_Bool()->_test._test == BoolTest::ne
++            || n->as_Bool()->_test._test == BoolTest::eq);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++//----------Special Memory Operands--------------------------------------------
++// Stack Slot Operand - This operand is used for loading and storing temporary
++//                      values on the stack where a match requires a value to
++//                      flow through memory.
++operand stackSlotP(sRegP reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotI(sRegI reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotF(sRegF reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotD(sRegD reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotL(sRegL reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++
++//------------------------OPERAND CLASSES--------------------------------------
++opclass memory( indirect, indOffset12, indOffset12I2L, indIndex, indIndexI2L,
++                indirectNarrow, indOffset12Narrow);
++opclass memory_loadRange(indOffset12, indirect);
++
++opclass mRegLorI2L(mRegI2L, mRegL);
++opclass mRegIorL2I( mRegI, mRegL2I);
++
++//----------PIPELINE-----------------------------------------------------------
++// Rules which define the behavior of the target architectures pipeline.
++
++pipeline %{
++
++  //----------ATTRIBUTES---------------------------------------------------------
++  attributes %{
++    fixed_size_instructions;          // Fixed size instructions
++    max_instructions_per_bundle = 1;     // 1 instruction per bundle
++    max_bundles_per_cycle = 4;         // Up to 4 bundles per cycle
++         bundle_unit_size=4;
++    instruction_unit_size = 4;           // An instruction is 4 bytes long
++    instruction_fetch_unit_size = 16;    // The processor fetches one line
++    instruction_fetch_units = 1;         // of 16 bytes
++
++    // List of nop instructions
++    nops( MachNop );
++  %}
++
++  //----------RESOURCES----------------------------------------------------------
++  // Resources are the functional units available to the machine
++
++  resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4,  ALU1, ALU2,  ALU = ALU1 | ALU2,  FPU1, FPU2, FPU = FPU1 | FPU2,  MEM,  BR);
++
++  //----------PIPELINE DESCRIPTION-----------------------------------------------
++  // Pipeline Description specifies the stages in the machine's pipeline
++
++  // IF: fetch
++  // ID: decode
++  // RD: read
++  // CA: caculate
++  // WB: write back
++  // CM: commit
++
++  pipe_desc(IF, ID, RD, CA, WB, CM);
++
++
++  //----------PIPELINE CLASSES---------------------------------------------------
++  // Pipeline Classes describe the stages in which input and output are
++  // referenced by the hardware pipeline.
++
++  //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{
++    single_instruction;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+1;
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.19 Integer mult operation : dst <-- reg1 mult reg2
++  pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer div operation : dst <-- reg1 div reg2
++  pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer mod operation : dst <-- reg1 mod reg2
++  pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{
++    instruction_count(2);
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //no.16 load Long from memory :
++  pipe_class ialu_loadL(mRegL dst, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.17 Store Long to Memory :
++  pipe_class ialu_storeL(mRegL src, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{
++         single_instruction;
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.3 Integer move operation : dst <-- reg
++  pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.4 No instructions : do nothing
++  pipe_class empty( ) %{
++    instruction_count(0);
++  %}
++
++  //No.5 UnConditional branch :
++  pipe_class pipe_jump( label labl ) %{
++    multiple_bundles;
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //No.6 ALU Conditional branch :
++  pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //no.7 load integer from memory :
++  pipe_class ialu_loadI(mRegI dst, memory mem) %{
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.8 Store Integer to Memory :
++  pipe_class ialu_storeI(mRegI src, memory mem) %{
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++
++  //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  //No.22 Floating div operation : dst <-- reg1 div reg2
++  pipe_class fpu_div(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  pipe_class fcvt_I2D(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class fcvt_D2I(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class pipe_mfc1(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  pipe_class pipe_mtc1(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD(5);
++  %}
++
++  //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2
++  pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  //No.11 Load Floating from Memory :
++  pipe_class fpu_loadF(regF dst, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.12 Store Floating to Memory :
++  pipe_class fpu_storeF(regF src, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.13 FPU Conditional branch :
++  pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++//No.14 Floating FPU reg operation : dst <-- op reg
++  pipe_class fpu1_regF(regF dst, regF src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  pipe_class long_memory_op() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(30);
++  %}
++
++  pipe_class simple_call() %{
++   instruction_count(10); multiple_bundles; force_serialization;
++   fixed_latency(200);
++   BR     : RD;
++  %}
++
++  pipe_class call() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(200);
++  %}
++
++  //FIXME:
++  //No.9 Piple slow : for multi-instructions
++  pipe_class pipe_slow(  ) %{
++    instruction_count(20);
++    force_serialization;
++    multiple_bundles;
++    fixed_latency(50);
++  %}
++
++%}
++
++
++
++//----------INSTRUCTIONS-------------------------------------------------------
++//
++// match      -- States which machine-independent subtree may be replaced
++//               by this instruction.
++// ins_cost   -- The estimated cost of this instruction is used by instruction
++//               selection to identify a minimum cost tree of machine
++//               instructions that matches a tree of machine-independent
++//               instructions.
++// format     -- A string providing the disassembly for this instruction.
++//               The value of an instruction's operand may be inserted
++//               by referring to it with a '$' prefix.
++// opcode     -- Three instruction opcodes may be provided.  These are referred
++//               to within an encode class as $primary, $secondary, and $tertiary
++//               respectively.  The primary opcode is commonly used to
++//               indicate the type of machine instruction, while secondary
++//               and tertiary are often used for prefix options or addressing
++//               modes.
++// ins_encode -- A list of encode classes with parameters. The encode class
++//               name must have been defined in an 'enc_class' specification
++//               in the encode section of the architecture description.
++
++
++// Load Integer
++instruct loadI(mRegI dst, memory mem) %{
++  match(Set dst (LoadI mem));
++
++  ins_cost(125);
++  format %{ "ld_w    $dst, $mem   #@loadI" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadI_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadI mem)));
++
++  ins_cost(125);
++  format %{ "ld_w    $dst, $mem   #@loadI_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Integer (32 bit signed) to Byte (8 bit signed)
++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "ld_b  $dst, $mem\t# int -> byte #@loadI2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "ld_bu  $dst, $mem\t# int -> ubyte #@loadI2UB" %}
++    ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Short (16 bit signed)
++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
++
++  ins_cost(125);
++  format %{ "ld_h  $dst, $mem\t# int -> short #@loadI2S" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "ld_hu  $dst, $mem\t# int -> ushort/char #@loadI2US" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Long.
++instruct loadL(mRegL dst, memory mem) %{
++//  predicate(!((LoadLNode*)n)->require_atomic_access());
++  match(Set dst (LoadL mem));
++
++  ins_cost(250);
++  format %{ "ld_d    $dst, $mem   #@loadL" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Load Long - UNaligned
++instruct loadL_unaligned(mRegL dst, memory mem) %{
++  match(Set dst (LoadL_unaligned mem));
++
++  // FIXME: Need more effective ldl/ldr
++  ins_cost(450);
++  format %{ "ld_d    $dst, $mem   #@loadL_unaligned\n\t" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Store Long
++instruct storeL_reg(memory mem, mRegL src) %{
++  match(Set mem (StoreL mem src));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(200);
++  format %{ "st_d    $mem,   $src #@storeL_reg\n" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_reg_volatile(indirect mem, mRegL src) %{
++  match(Set mem (StoreL mem src));
++
++  ins_cost(205);
++  format %{ "amswap_db_d    R0, $src, $mem #@storeL_reg\n" %}
++  ins_encode %{
++    __ amswap_db_d(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_immL_0(memory mem, immL_0 zero) %{
++  match(Set mem (StoreL mem zero));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(180);
++  format %{ "st_d    zero, $mem #@storeL_immL_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_immL_0_volatile(indirect mem, immL_0 zero) %{
++  match(Set mem (StoreL mem zero));
++
++  ins_cost(185);
++  format %{ "amswap_db_d    AT, R0, $mem #@storeL_immL_0" %}
++  ins_encode %{
++    __ amswap_db_d(AT, R0, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++// Load Compressed Pointer
++instruct loadN(mRegN dst, memory mem)
++%{
++   match(Set dst (LoadN mem));
++
++   ins_cost(125); // XXX
++   format %{ "ld_wu    $dst, $mem\t# compressed ptr @ loadN" %}
++   ins_encode %{
++     relocInfo::relocType disp_reloc = $mem->disp_reloc();
++     assert(disp_reloc == relocInfo::none, "cannot have disp");
++     __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++   %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2P(mRegP dst, memory mem)
++%{
++   match(Set dst (DecodeN (LoadN mem)));
++   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++
++   ins_cost(125); // XXX
++   format %{ "ld_wu    $dst, $mem\t# @ loadN2P" %}
++   ins_encode %{
++     relocInfo::relocType disp_reloc = $mem->disp_reloc();
++     assert(disp_reloc == relocInfo::none, "cannot have disp");
++     __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++   %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Pointer
++instruct loadP(mRegP dst, memory mem) %{
++  match(Set dst (LoadP mem));
++
++  ins_cost(125);
++  format %{ "ld_d    $dst, $mem #@loadP" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Klass Pointer
++instruct loadKlass(mRegP dst, memory mem) %{
++  match(Set dst (LoadKlass mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load narrow Klass Pointer
++instruct loadNKlass(mRegN dst, memory mem)
++%{
++  match(Set dst (LoadNKlass mem));
++
++  ins_cost(125); // XXX
++  format %{ "ld_wu    $dst, $mem\t# compressed klass ptr @ loadNKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2PKlass(mRegP dst, memory mem)
++%{
++  match(Set dst (DecodeNKlass (LoadNKlass mem)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "ld_wu    $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Constant
++instruct loadConI(mRegI dst, immI src) %{
++  match(Set dst src);
++
++  ins_cost(120);
++  format %{ "mov    $dst, $src #@loadConI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    int    value = $src$$constant;
++    __ li(dst, value);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct loadConL(mRegL dst, immL src) %{
++  match(Set dst src);
++  ins_cost(120);
++  format %{ "li   $dst, $src @ loadConL" %}
++  ins_encode %{
++    __ li($dst$$Register, $src$$constant);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++// Load Range
++instruct loadRange(mRegI dst, memory_loadRange mem) %{
++  match(Set dst (LoadRange mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadRange" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct storeP(memory mem, mRegP src ) %{
++  match(Set mem (StoreP mem src));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125);
++  format %{ "st_d    $src, $mem #@storeP" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP_volatile(indirect mem, mRegP src ) %{
++  match(Set mem (StoreP mem src));
++
++  ins_cost(130);
++  format %{ "amswap_db_d    R0, $src, $mem #@storeP" %}
++  ins_encode %{
++    __ amswap_db_d(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store NULL Pointer, mark word, or other simple pointer constant.
++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{
++  match(Set mem (StoreP mem zero));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125);
++  format %{ "mov    $mem, $zero #@storeImmP_0" %}
++    ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmP_immP_0_volatile(indirect mem, immP_0 zero) %{
++  match(Set mem (StoreP mem zero));
++
++  ins_cost(130);
++  format %{ "amswap_db_d    AT, R0, $mem #@storeImmP_0" %}
++  ins_encode %{
++    __ amswap_db_d(AT, R0, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Compressed Pointer
++instruct storeN(memory mem, mRegN src)
++%{
++  match(Set mem (StoreN mem src));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# compressed ptr @ storeN" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeN_volatile(indirect mem, mRegN src)
++%{
++  match(Set mem (StoreN mem src));
++
++  ins_cost(130); // XXX
++  format %{ "amswap_db_w    R0, $src, $mem # compressed ptr @ storeN" %}
++  ins_encode %{
++    __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2N(memory mem, mRegP src)
++%{
++  match(Set mem (StoreN mem (EncodeP src)));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0 && !needs_releasing_store(n));
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# @ storeP2N" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2N_volatile(indirect mem, mRegP src)
++%{
++  match(Set mem (StoreN mem (EncodeP src)));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++
++  ins_cost(130); // XXX
++  format %{ "amswap_db_w    R0, $src, $mem # @ storeP2N" %}
++  ins_encode %{
++    __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeNKlass(memory mem, mRegN src)
++%{
++  match(Set mem (StoreNKlass mem src));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# compressed klass ptr @ storeNKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeNKlass_volatile(indirect mem, mRegN src)
++%{
++  match(Set mem (StoreNKlass mem src));
++
++  ins_cost(130);
++  format %{ "amswap_db_w    R0, $src, $mem # compressed klass ptr @ storeNKlass" %}
++  ins_encode %{
++    __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2NKlass(memory mem, mRegP src)
++%{
++  match(Set mem (StoreNKlass mem (EncodePKlass src)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0 && !needs_releasing_store(n));
++
++  ins_cost(125); // XXX
++  format %{ "st_w    $mem, $src\t# @ storeP2NKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2NKlass_volatile(indirect mem, mRegP src)
++%{
++  match(Set mem (StoreNKlass mem (EncodePKlass src)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
++
++  ins_cost(130);
++  format %{ "amswap_db_w    R0, $src, $mem # @ storeP2NKlass" %}
++  ins_encode %{
++    __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmN_immN_0(memory mem, immN_0 zero)
++%{
++  match(Set mem (StoreN mem zero));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125); // XXX
++  format %{ "storeN0    zero, $mem\t# compressed ptr" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmN_immN_0_volatile(indirect mem, immN_0 zero)
++%{
++  match(Set mem (StoreN mem zero));
++
++  ins_cost(130); // XXX
++  format %{ "amswap_db_w    AT, R0, $mem # compressed ptr" %}
++  ins_encode %{
++    __ amswap_db_w(AT, R0, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Byte
++instruct storeB_immB_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreB mem zero));
++
++  format %{ "mov    $mem, zero #@storeB_immB_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeB(memory mem, mRegIorL2I src) %{
++  match(Set mem (StoreB mem src));
++
++  ins_cost(125);
++  format %{ "st_b    $src, $mem #@storeB" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Byte (8bit signed)
++instruct loadB(mRegI dst, memory mem) %{
++  match(Set dst (LoadB mem));
++
++  ins_cost(125);
++  format %{ "ld_b   $dst, $mem #@loadB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadB mem)));
++
++  ins_cost(125);
++  format %{ "ld_b   $dst, $mem #@loadB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Byte (8bit UNsigned)
++instruct loadUB(mRegI dst, memory mem) %{
++  match(Set dst (LoadUB mem));
++
++  ins_cost(125);
++  format %{ "ld_bu   $dst, $mem #@loadUB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUB mem)));
++
++  ins_cost(125);
++  format %{ "ld_bu   $dst, $mem #@loadUB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16bit signed)
++instruct loadS(mRegI dst, memory mem) %{
++  match(Set dst (LoadS mem));
++
++  ins_cost(125);
++  format %{ "ld_h   $dst, $mem #@loadS" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16 bit signed) to Byte (8 bit signed)
++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "ld_b $dst, $mem\t# short -> byte #@loadS2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadS mem)));
++
++  ins_cost(125);
++  format %{ "ld_h   $dst, $mem #@loadS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Integer Immediate
++instruct storeI_immI_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreI mem zero));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(120);
++  format %{ "mov    $mem, zero #@storeI_immI_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeI_immI_0_volatile(indirect mem, immI_0 zero) %{
++  match(Set mem (StoreI mem zero));
++
++  ins_cost(125);
++  format %{ "amswap_db_w    AT, R0, $mem #@storeI_immI_0" %}
++  ins_encode %{
++    __ amswap_db_w(AT, R0, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Integer
++instruct storeI(memory mem, mRegIorL2I src) %{
++  match(Set mem (StoreI mem src));
++  predicate(!needs_releasing_store(n));
++
++  ins_cost(125);
++  format %{ "st_w    $mem, $src #@storeI" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeI_volatile(indirect mem, mRegIorL2I src) %{
++  match(Set mem (StoreI mem src));
++
++  ins_cost(130);
++  format %{ "amswap_db_w    R0, $src, $mem #@storeI" %}
++  ins_encode %{
++    __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Float
++instruct loadF(regF dst, memory mem) %{
++  match(Set dst (LoadF mem));
++
++  ins_cost(150);
++  format %{ "loadF $dst, $mem #@loadF" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadConP_general(mRegP dst, immP src) %{
++  match(Set dst src);
++
++  ins_cost(120);
++  format %{ "li   $dst, $src #@loadConP_general" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    long* value = (long*)$src$$constant;
++
++    if($src->constant_reloc() == relocInfo::metadata_type){
++      int klass_index = __ oop_recorder()->find_index((Klass*)value);
++      RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++
++      __ relocate(rspec);
++      __ patchable_li52(dst, (long)value);
++    } else if($src->constant_reloc() == relocInfo::oop_type){
++      int oop_index = __ oop_recorder()->find_index((jobject)value);
++      RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++      __ relocate(rspec);
++      __ patchable_li52(dst, (long)value);
++    } else if ($src->constant_reloc() == relocInfo::none) {
++        __ li(dst, (long)value);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{
++  match(Set dst src);
++
++  ins_cost(80);
++  format %{ "li    $dst, $src @ loadConP_no_oop_cheap" %}
++
++  ins_encode %{
++    if ($src->constant_reloc() == relocInfo::metadata_type) {
++      __ mov_metadata($dst$$Register, (Metadata*)$src$$constant);
++    } else {
++      __ li($dst$$Register, $src$$constant);
++    }
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct loadConP_poll(mRegP dst, immP_poll src) %{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "li   $dst, $src #@loadConP_poll" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    intptr_t value = (intptr_t)$src$$constant;
++
++    __ li(dst, (jlong)value);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_immP_0(mRegP dst, immP_0 src)
++%{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "mov    $dst, R0\t# ptr" %}
++  ins_encode %{
++     Register dst_reg = $dst$$Register;
++     __ add_d(dst_reg, R0, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{
++  match(Set dst src);
++  format %{ "move    $dst, R0\t# compressed NULL ptr" %}
++  ins_encode %{
++    __ move($dst$$Register, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN(mRegN dst, immN src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed ptr @ loadConN" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_oop(dst, (jobject)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++instruct loadConNKlass(mRegN dst, immNKlass src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed klass ptr @ loadConNKlass" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_klass(dst, (Klass*)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++//FIXME
++// Tail Call; Jump from runtime stub to Java code.
++// Also known as an 'interprocedural jump'.
++// Target of jump will eventually return to caller.
++// TailJump below removes the return address.
++instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{
++  match(TailCall jump_target method_oop );
++  ins_cost(300);
++  format %{ "JMP    $jump_target \t# @TailCalljmpInd" %}
++
++  ins_encode %{
++    Register target = $jump_target$$Register;
++    Register    oop = $method_oop$$Register;
++
++    // RA will be used in generate_forward_exception()
++    __ push(RA);
++
++    __ move(S3, oop);
++    __ jr(target);
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++// Create exception oop: created by stack-crawling runtime code.
++// Created exception is now available to this handler, and is setup
++// just prior to jumping to this handler.  No code emitted.
++instruct CreateException( a0_RegP ex_oop )
++%{
++  match(Set ex_oop (CreateEx));
++
++  // use the following format syntax
++  format %{ "# exception oop is in A0; no code emitted @CreateException" %}
++  ins_encode %{
++    // X86 leaves this function empty
++    __ block_comment("CreateException is empty in LA");
++  %}
++  ins_pipe( empty );
++//  ins_pipe( pipe_jump );
++%}
++
++
++/* The mechanism of exception handling is clear now.
++
++- Common try/catch:
++  [stubGenerator_loongarch.cpp] generate_forward_exception()
++      |- V0, V1 are created
++      |- T4 <= SharedRuntime::exception_handler_for_return_address
++      `- jr T4
++           `- the caller's exception_handler
++                 `- jr OptoRuntime::exception_blob
++                        `- here
++- Rethrow(e.g. 'unwind'):
++  * The callee:
++     |- an exception is triggered during execution
++     `- exits the callee method through RethrowException node
++          |- The callee pushes exception_oop(T0) and exception_pc(RA)
++          `- The callee jumps to OptoRuntime::rethrow_stub()
++  * In OptoRuntime::rethrow_stub:
++     |- The VM calls _rethrow_Java to determine the return address in the caller method
++     `- exits the stub with tailjmpInd
++          |- pops exception_oop(V0) and exception_pc(V1)
++          `- jumps to the return address(usually an exception_handler)
++  * The caller:
++     `- continues processing the exception_blob with V0/V1
++*/
++
++// Rethrow exception:
++// The exception oop will come in the first argument position.
++// Then JUMP (not call) to the rethrow stub code.
++instruct RethrowException()
++%{
++  match(Rethrow);
++
++  // use the following format syntax
++  format %{ "JMP    rethrow_stub #@RethrowException" %}
++  ins_encode %{
++    __ block_comment("@ RethrowException");
++
++    cbuf.set_insts_mark();
++    cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec());
++
++    // call OptoRuntime::rethrow_stub to get the exception handler in parent method
++    __ patchable_jump((address)OptoRuntime::rethrow_stub());
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Branch Instructions --- long offset versions
++
++// Jump Direct
++instruct jmpDir_long(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_long" %}
++
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jmp_far(*L);
++  %}
++
++  ins_pipe( pipe_jump );
++  //ins_pc_relative(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, L, true /* signed */);
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++    int     val  = $src2$$constant;
++
++    if (val == 0) {
++      __ cmp_branch_long(flag, op1, R0, L, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_long(flag, op1, AT, L, true /* signed */);
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_long(cmpOpEqNe cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_long" %}
++
++  ins_encode %{
++    Label*    L =  $labl$$label;
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        __ bne_long($cr$$Register, R0, *L);
++        break;
++      case 0x02: //not equal
++        __ beq_long($cr$$Register, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++// Conditional jumps
++instruct branchConP_0_long(cmpOpEqNe cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_0_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, R0, L, true /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConN2P_0_long(cmpOpEqNe cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_0_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, R0, L, true /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConP_long(cmpOp cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_null_branch_long(cmpOpEqNe cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_long" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, R0, L, true /* signed */);
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_long" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */);
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConIU_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConIU_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_long(flag, op1, R0, L, false /* unsigned */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_long(flag, op1, AT, L, false /* unsigned */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, L, true /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_long(flag, op1, R0, L, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_long(flag, op1, AT, L, true /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, target, true /* signed */);
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++    Label*   target = $labl$$label;
++    int      flag   = $cmp$$cmpcode;
++
++    __ cmp_branch_long(flag, op1, op2, target, false /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Label*   target = $labl$$label;
++    int      flag = $cmp$$cmpcode;
++    long      val = $src2$$constant;
++
++    if (val == 0) {
++      __ cmp_branch_long(flag, op1, R0, target, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_long(flag, op1, AT, target, true /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    long     val = $src2$$constant;
++    Label*   target = $labl$$label;
++    int      flag   = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_long(flag, op1, R0, target, false /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_long(flag, op1, AT, target, false /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: //not_equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: //not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++
++// ============================================================================
++// Branch Instructions -- short offset versions
++
++// Jump Direct
++instruct jmpDir_short(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_short" %}
++
++  ins_encode %{
++    Label &L = *($labl$$label);
++    if(&L)
++       __ b(L);
++    else
++       __ b(int(0));
++  %}
++
++    ins_pipe( pipe_jump );
++    ins_pc_relative(1);
++    ins_short_branch(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, L, true /* signed */);
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++    int     val  = $src2$$constant;
++
++    if (val == 0) {
++      __ cmp_branch_short(flag, op1, R0, L, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_short(flag, op1, AT, L, true /* signed */);
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_short(cmpOpEqNe cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_short" %}
++
++  ins_encode %{
++    Label    &L =  *($labl$$label);
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        if (&L)
++          __ bnez($cr$$Register, L);
++        else
++          __ bnez($cr$$Register, (int)0);
++        break;
++      case 0x02: //not equal
++        if (&L)
++          __ beqz($cr$$Register, L);
++        else
++          __ beqz($cr$$Register, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++// Conditional jumps
++instruct branchConP_0_short(cmpOpEqNe cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_0_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branchEqNe_off21(flag, op1, L);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConN2P_0_short(cmpOpEqNe cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_0_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branchEqNe_off21(flag, op1, L);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConP_short(cmpOp cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_short" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branchEqNe_off21(flag, op1, L);
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_short" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */);
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConIU_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_short(flag, op1, R0, L, false /* unsigned */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_short(flag, op1, AT, L, false /* unsigned */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, L, true /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_short(flag, op1, R0, L, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_short(flag, op1, AT, L, true /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, target, true /* signed */);
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++    Label&   target = *($labl$$label);
++    int      flag   = $cmp$$cmpcode;
++
++    __ cmp_branch_short(flag, op1, op2, target, false /* signed */);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++    long    val = $src2$$constant;
++
++    if (val == 0) {
++      __ cmp_branch_short(flag, op1, R0, target, true /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_short(flag, op1, AT, target, true /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    long     val = $src2$$constant;
++    Label&   target = *($labl$$label);
++    int      flag   = $cmp$$cmpcode;
++
++    if (val == 0) {
++      __ cmp_branch_short(flag, op1, R0, target, false /* signed */);
++    } else {
++      __ li(AT, val);
++      __ cmp_branch_short(flag, op1, AT, target, false /* signed */);
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x02: //not_equal
++        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x02: //not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
++        if (&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x03: //greater
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x04: //greater_equal
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bceqz(FCC0, L);
++        else
++          __ bceqz(FCC0, (int)0);
++        break;
++      case 0x05: //less
++        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      case 0x06: //less_equal
++        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
++        if(&L)
++          __ bcnez(FCC0, L);
++        else
++          __ bcnez(FCC0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++// =================== End of branch instructions ==========================
++
++// Call Runtime Instruction
++instruct CallRuntimeDirect(method meth) %{
++  match(CallRuntime );
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,runtime #@CallRuntimeDirect" %}
++  ins_encode( Java_To_Runtime( meth ) );
++  ins_pipe( pipe_slow );
++  ins_alignment(4);
++%}
++
++
++
++//------------------------MemBar Instructions-------------------------------
++//Memory barrier flavors
++
++instruct unnecessary_membar_acquire() %{
++  predicate(unnecessary_acquire(n));
++  match(MemBarAcquire);
++  ins_cost(0);
++
++  format %{ "membar_acquire (elided)" %}
++
++  ins_encode %{
++    __ block_comment("membar_acquire (elided)");
++  %}
++
++  ins_pipe(empty);
++%}
++
++instruct membar_acquire() %{
++  match(MemBarAcquire);
++  ins_cost(400);
++
++  format %{ "MEMBAR-acquire @ membar_acquire" %}
++  ins_encode %{
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  %}
++  ins_pipe(empty);
++%}
++
++instruct load_fence() %{
++  match(LoadFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ load_fence" %}
++  ins_encode %{
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_acquire_lock()
++%{
++  match(MemBarAcquireLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct unnecessary_membar_release() %{
++  predicate(unnecessary_release(n));
++  match(MemBarRelease);
++  ins_cost(0);
++
++  format %{ "membar_release (elided)" %}
++
++  ins_encode %{
++    __ block_comment("membar_release (elided)");
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_release() %{
++  match(MemBarRelease);
++  ins_cost(400);
++
++  format %{ "MEMBAR-release @ membar_release" %}
++
++  ins_encode %{
++    // Attention: DO NOT DELETE THIS GUY!
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct store_fence() %{
++  match(StoreFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ store_fence" %}
++
++  ins_encode %{
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_release_lock()
++%{
++  match(MemBarReleaseLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct unnecessary_membar_volatile() %{
++  predicate(unnecessary_volatile(n));
++  match(MemBarVolatile);
++  ins_cost(0);
++
++  format %{ "membar_volatile (elided)" %}
++
++  ins_encode %{
++    __ block_comment("membar_volatile (elided)");
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_volatile() %{
++  match(MemBarVolatile);
++  ins_cost(400);
++
++  format %{ "MEMBAR-volatile" %}
++  ins_encode %{
++    if( !os::is_MP() ) return;     // Not needed on single CPU
++    __ membar(__ StoreLoad);
++
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_storestore() %{
++  match(MemBarStoreStore);
++
++  ins_cost(400);
++  format %{ "MEMBAR-storestore @ membar_storestore" %}
++  ins_encode %{
++    __ membar(__ StoreStore);
++  %}
++  ins_pipe(empty);
++%}
++
++//----------Move Instructions--------------------------------------------------
++instruct castX2P(mRegP dst, mRegL src) %{
++  match(Set dst (CastX2P src));
++  format %{ "castX2P  $dst, $src @ castX2P" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_cost(10);
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct castP2X(mRegL dst, mRegP src ) %{
++  match(Set dst (CastP2X src));
++
++  format %{ "mov    $dst, $src\t  #@castP2X" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{
++  match(Set dst (MoveF2I src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveF2I   $dst, $src @ MoveF2I_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ movfr2gr_s(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{
++  match(Set dst (MoveI2F src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveI2F   $dst, $src @ MoveI2F_reg_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ movgr2fr_w(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{
++  match(Set dst (MoveD2L src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveD2L   $dst, $src @ MoveD2L_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ movfr2gr_d(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{
++  match(Set dst (MoveL2D src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveL2D   $dst, $src @ MoveL2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ movgr2fr_d(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//----------Conditional Move---------------------------------------------------
++// Conditional move
++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src1, mRegI src2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI src1 src2)) (Binary src1 src2)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovI_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src1, $src2 \t @cmovI_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, true);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpI_reg_reg2(mRegI dst, mRegI src1, mRegI src2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI src1 src2)) (Binary src2 src1)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovI_cmpI_reg_reg2\n"
++             "\tCMOV  $dst,$src2, $src1 \t @cmovI_cmpI_reg_reg2"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, true);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpI_dst_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpI_dst_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpI_dst_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegIorL2I src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegIorL2I src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop) %{
++  match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++    Label L;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src1, mRegL src2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpL src1 src2)) (Binary src1 src2)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovL_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src1, $src2 \t @cmovL_cmpL_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, true);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src1, mRegL src2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL src1 src2)) (Binary src1 src2)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovL_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src1, $src2 \t @cmovL_cmpUL_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpL_reg_reg2(mRegL dst, mRegL src1, mRegL src2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpL src1 src2)) (Binary src2 src1)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovL_cmpL_reg_reg2\n"
++             "\tCMOV  $dst,$src2, $src1 \t @cmovL_cmpL_reg_reg2"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, true);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpUL_reg_reg2(mRegL dst, mRegL src1, mRegL src2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL src1 src2)) (Binary src2 src1)));
++  ins_cost(50);
++  format %{
++             "CMP$cop  $src1, $src2\t  @cmovL_cmpUL_reg_reg2\n"
++             "\tCMOV  $dst,$src2, $src1 \t @cmovL_cmpUL_reg_reg2"
++         %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Register dst = $dst$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovL_cmpL_dst_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpL_dst_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpL_dst_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpUL_dst_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpUL_dst_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpUL_dst_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovD_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{
++  match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovF_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister tmp1 = as_FloatRegister($tmp3$$reg);
++    FloatRegister tmp2 = as_FloatRegister($tmp4$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{
++  match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister tmp1 = as_FloatRegister($tmp3$$reg);
++    FloatRegister tmp2 = as_FloatRegister($tmp4$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop, regF tmp3, regF tmp4) %{
++  match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpP_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpP_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister tmp1 = as_FloatRegister($tmp3$$reg);
++    FloatRegister tmp2 = as_FloatRegister($tmp4$$reg);
++    int     flag = $cop$$cmpcode;
++
++    // Use signed comparison here, because the most significant bit of the
++    // user-space virtual address must be 0.
++    __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//FIXME
++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
++  match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  effect(TEMP tmp3, TEMP tmp4);
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister tmp1 = $tmp3$$FloatRegister;
++    FloatRegister tmp2 = $tmp4$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovF_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Manifest a CmpL result in an integer register.  Very painful.
++// This is the test to avoid.
++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{
++  match(Set dst (CmpL3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpL3  $dst, $src1, $src2 @ cmpL3_reg_reg" %}
++  ins_encode %{
++    Register opr1 = as_Register($src1$$reg);
++    Register opr2 = as_Register($src2$$reg);
++    Register dst  = as_Register($dst$$reg);
++
++    __ slt(AT, opr1, opr2);
++    __ slt(dst, opr2, opr1);
++    __ sub_d(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//
++// less_rsult     = -1
++// greater_result =  1
++// equal_result   =  0
++// nan_result     = -1
++//
++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{
++  match(Set dst (CmpF3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpF3  $dst, $src1, $src2 @ cmpF3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ fcmp_clt_s(FCC0, src2, src1);
++    __ fcmp_cult_s(FCC1, src1, src2);
++    __ movcf2gr(dst, FCC0);
++    __ movcf2gr(AT, FCC1);
++    __ sub_d(dst, dst, AT);
++
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{
++  match(Set dst (CmpD3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpD3  $dst, $src1, $src2 @ cmpD3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ fcmp_clt_d(FCC0, src2, src1);
++    __ fcmp_cult_d(FCC1, src1, src2);
++    __ movcf2gr(dst, FCC0);
++    __ movcf2gr(AT, FCC1);
++    __ sub_d(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct clear_array(t8RegL cnt, t3_RegP base, Universe dummy) %{
++  match(Set dummy (ClearArray cnt base));
++  effect(USE_KILL cnt, USE_KILL base);
++  format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %}
++  ins_encode %{
++    //Assume cnt is the number of bytes in an array to be cleared,
++    //and base points to the starting address of the array.
++    Register base = $base$$Register;
++    Register cnt  = $cnt$$Register;
++    Label Loop, done;
++
++    __ beq(cnt, R0, done);
++
++    __ bind(Loop);
++    __ st_d(R0, base, 0);
++    __ addi_d(cnt, cnt, -1);
++    __ addi_d(base, base, wordSize);
++    __ bne(cnt, R0, Loop);
++
++    __ bind(done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct clear_array_imm(immL cnt, t3_RegP base, Universe dummy) %{
++  match(Set dummy (ClearArray cnt base));
++  effect(USE_KILL base);
++  format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %}
++  ins_encode %{
++    //Assume cnt is the number of bytes in an array to be cleared,
++    //and base points to the starting address of the array.
++    Register base = $base$$Register;
++    long     cnt  = $cnt$$constant;
++    Label Loop, done;
++
++    int tmp = cnt % 8;
++    int i = 0;
++    for (; i < tmp; i++) {
++      __ st_d(R0, base, i * 8);
++    }
++    if (cnt - tmp) {
++      __ li(AT, cnt);
++      __ alsl_d(AT, AT, base, 2);
++      __ addi_d(base, base, i * 8);
++      __ bind(Loop);
++      __ st_d(R0, base,  0);
++      __ st_d(R0, base,  8);
++      __ st_d(R0, base, 16);
++      __ st_d(R0, base, 24);
++      __ st_d(R0, base, 32);
++      __ st_d(R0, base, 40);
++      __ st_d(R0, base, 48);
++      __ st_d(R0, base, 56);
++      __ addi_d(base, base, 64);
++      __ blt(base, AT, Loop);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct has_negatives(a4_RegP ary1, mA5RegI len, no_Ax_mRegI result) %{
++  match(Set result (HasNegatives ary1 len));
++  effect(USE_KILL ary1, USE_KILL len);
++  format %{ "has negatives byte[] ary1:$ary1, len:$len -> $result @ has_negatives" %}
++
++  ins_encode %{
++    __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_indexofU_char(a4_RegP str1, mA5RegI cnt1, mA6RegI ch, no_Ax_mRegI result, mRegL tmp1, mRegL tmp2, mRegL tmp3) %{
++  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
++  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{ "String IndexOf char[] $str1, len:$cnt1, char:$ch, res:$result, tmp1:$tmp1, tmp2:$tmp2, tmp3:$tmp3 -> $result @ string_indexof_char" %}
++
++  ins_encode %{
++    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
++                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
++                           $tmp3$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::LL);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareU" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::UU);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareLU" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::LU);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareUL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::UL);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// fast char[] to byte[] compression
++instruct string_compress(a4_RegP src, a5_RegP dst, mA6RegI len, no_Ax_mRegI result,
++                         mRegL tmp1, mRegL tmp2, mRegL tmp3)
++%{
++  match(Set result (StrCompressedCopy src (Binary dst len)));
++  effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{ "String Compress $src,$dst -> $result @ string_compress " %}
++  ins_encode %{
++    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
++                           $result$$Register, $tmp1$$Register,
++                           $tmp2$$Register, $tmp3$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// byte[] to char[] inflation
++instruct string_inflate(Universe dummy, a4_RegP src, a5_RegP dst, mA6RegI len,
++                        mRegL tmp1, mRegL tmp2)
++%{
++  match(Set dummy (StrInflatedCopy src (Binary dst len)));
++  effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP tmp1, TEMP tmp2);
++
++  format %{ "String Inflate $src,$dst @ string_inflate " %}
++  ins_encode %{
++    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
++                          $tmp1$$Register, $tmp2$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// intrinsic optimization
++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, no_Ax_mRegI result, t8RegL tmp1, t3RegL tmp2) %{
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp1, KILL tmp2);
++
++  format %{ "String Equal $str1, $str2, len:$cnt, tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_equals" %}
++  ins_encode %{
++    __ arrays_equals($str1$$Register, $str2$$Register,
++                     $cnt$$Register, $tmp1$$Register, $tmp2$$Register, $result$$Register,
++                     false/* byte */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//----------Arithmetic Instructions-------------------------------------------
++//----------Addition Instructions---------------------------------------------
++instruct addI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add   $dst, $src1, $src2 #@addI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ add_w(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addI_Reg_imm(mRegI dst, mRegIorL2I src1,  immI12 src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add    $dst, $src1, $src2 #@addI_Reg_imm12" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    int       imm = $src2$$constant;
++
++    __ addi_w(dst, src1, imm);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addI_salI_Reg_Reg_immI_1_4(mRegI dst, mRegI src1, mRegI src2, immI_1_4 shift) %{
++  match(Set dst (AddI src1 (LShiftI src2 shift)));
++
++  format %{ "alsl    $dst, $src1, $src2, $shift #@addI_salI_Reg_Reg_immI_1_4" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    int        sh = $shift$$constant;
++    __ alsl_w(dst, src2, src1, sh - 1);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegLorI2L src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addP_reg_reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ add_d(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_reg_M8(mRegP dst, mRegP src1, mRegLorI2L src2, immL_M8 M8) %{
++  match(Set dst (AddP src1 (AndL src2 M8)));
++  format %{ "dadd    $dst, $src1, $src2 #@addP_reg_reg_M8" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ bstrins_d(src2, R0, 2, 0);
++    __ add_d(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_imm12(mRegP dst, mRegP src1,  immL12 src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "ADD   $dst, $src1, $src2 #@addP_reg_imm12" %}
++  ins_encode %{
++    Register src1 = $src1$$Register;
++    long     src2 = $src2$$constant;
++    Register  dst = $dst$$Register;
++
++    __ addi_d(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct addP_salL_Reg_RegI2L_immI_1_4(mRegP dst, mRegP src1, mRegI src2, immI_1_4 shift) %{
++  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) shift)));
++
++  format %{ "alsl    $dst, $src1, $src2, $shift #@addP_salL_Reg_RegI2L_immI_1_4" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    int        sh = $shift$$constant;
++    __ alsl_d(dst, src2, src1, sh - 1);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Add Long Register with Register
++instruct addL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (AddL src1 src2));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_Reg\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ add_d(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_Reg_imm(mRegL dst, mRegLorI2L src1, immL12 src2)
++%{
++  match(Set dst (AddL src1 src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_imm " %}
++  ins_encode %{
++    Register dst_reg  = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    int      src2_imm = $src2$$constant;
++
++    __ addi_d(dst_reg, src1_reg, src2_imm);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++//----------Abs Instructions-------------------------------------------
++
++// Integer Absolute Instructions
++instruct absI_rReg(mRegI dst, mRegI src)
++%{
++  match(Set dst (AbsI src));
++  effect(TEMP dst);
++  format %{ "AbsI $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ srai_w(AT, src, 31);
++    __ xorr(dst, src, AT);
++    __ sub_w(dst, dst, AT);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Long Absolute Instructions
++instruct absL_rReg(mRegL dst, mRegLorI2L src)
++%{
++  match(Set dst (AbsL src));
++  effect(TEMP dst);
++  format %{ "AbsL $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ srai_d(AT, src, 63);
++    __ xorr(dst, src, AT);
++    __ sub_d(dst, dst, AT);
++  %}
++
++  ins_pipe(ialu_regL_regL);
++%}
++
++//----------Subtraction Instructions-------------------------------------------
++// Integer Subtraction Instructions
++instruct subI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(100);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ sub_w(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subI_Reg_immI_M2047_2048(mRegI dst, mRegIorL2I src1,  immI_M2047_2048 src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_immI_M2047_2048" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ addi_w(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negI_Reg(mRegI dst, immI_0 zero,  mRegIorL2I src) %{
++  match(Set dst (SubI zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negI_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ sub_w(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negL_Reg(mRegL dst, immL_0 zero,  mRegLorI2L src) %{
++  match(Set dst (SubL zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negL_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ sub_d(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subL_Reg_immL_M2047_2048(mRegL dst, mRegL src1,  immL_M2047_2048 src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subL_Reg_immL_M2047_2048" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ addi_d(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Subtract Long Register with Register.
++instruct subL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(100);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_Reg" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ sub_d(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Integer MOD with Register
++instruct modI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{
++  match(Set dst (ModI src1 src2));
++  ins_cost(300);
++  format %{ "modi   $dst, $src1, $src2 @ modI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ mod_w(dst, src1, src2);
++  %}
++
++  //ins_pipe( ialu_mod );
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct modL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (ModL src1 src2));
++  format %{ "modL  $dst, $src1, $src2 @modL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ mod_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (MulI src1 src2));
++
++  ins_cost(300);
++  format %{ "mul   $dst, $src1, $src2 @ mulI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mul_w(dst, src1, src2);
++  %}
++  ins_pipe( ialu_mult );
++%}
++
++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (DivI src1 src2));
++
++  ins_cost(300);
++  format %{ "div   $dst, $src1, $src2 @ divI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++    __ div_w(dst, src1, src2);
++
++  %}
++  ins_pipe( ialu_mod );
++%}
++
++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (DivF src1 src2));
++
++  ins_cost(300);
++  format %{ "divF   $dst, $src1, $src2 @ divF_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fdiv_s(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (DivD src1 src2));
++
++  ins_cost(300);
++  format %{ "divD   $dst, $src1, $src2 @ divD_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fdiv_d(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (MulL src1 src2));
++  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ mul_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulHiL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (MulHiL src1 src2));
++  format %{ "mulHiL  $dst, $src1, $src2 @mulL_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ mulh_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (DivL src1 src2));
++  format %{ "divL  $dst, $src1, $src2 @divL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    __ div_d(dst, op1, op2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (AddF src1 src2));
++  format %{ "AddF  $dst, $src1, $src2 @addF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fadd_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (SubF src1 src2));
++  format %{ "SubF  $dst, $src1, $src2 @subF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fsub_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (AddD src1 src2));
++  format %{ "AddD  $dst, $src1, $src2 @addD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fadd_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (SubD src1 src2));
++  format %{ "SubD  $dst, $src1, $src2 @subD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ fsub_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negF_reg(regF dst, regF src) %{
++  match(Set dst (NegF src));
++  format %{ "negF  $dst, $src @negF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fneg_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negD_reg(regD dst, regD src) %{
++  match(Set dst (NegD src));
++  format %{ "negD  $dst, $src @negD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fneg_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (MulF src1 src2));
++  format %{ "MULF  $dst, $src1, $src2 @mulF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fmul_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// Mul two double precision floating piont number
++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (MulD src1 src2));
++  format %{ "MULD  $dst, $src1, $src2 @mulD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ fmul_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct absF_reg(regF dst, regF src) %{
++  match(Set dst (AbsF src));
++  ins_cost(100);
++  format %{ "absF  $dst, $src @absF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fabs_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// intrinsics for math_native.
++// AbsD  SqrtD  CosD  SinD  TanD  LogD  Log10D
++
++instruct absD_reg(regD dst, regD src) %{
++  match(Set dst (AbsD src));
++  ins_cost(100);
++  format %{ "absD  $dst, $src @absD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fabs_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtD_reg(regD dst, regD src) %{
++  match(Set dst (SqrtD src));
++  ins_cost(100);
++  format %{ "SqrtD  $dst, $src @sqrtD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fsqrt_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtF_reg(regF dst, regF src) %{
++  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
++  ins_cost(100);
++  format %{ "SqrtF  $dst, $src @sqrtF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fsqrt_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// src1 * src2 + src3
++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary src1 src2)));
++
++  format %{ "fmadd_s  $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 + src3
++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary src1 src2)));
++
++  format %{ "fmadd_d  $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 - src3
++instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
++
++  format %{ "fmsub_s $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 - src3
++instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
++
++  format %{ "fmsub_d  $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 - src3
++instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
++  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
++
++  format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fnmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 - src3
++instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
++  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
++
++  format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fnmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 + src3
++instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
++  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
++
++  format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fnmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 + src3
++instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
++  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
++
++  format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
++
++  ins_encode %{
++    __ fnmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++instruct copySignF_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (CopySignF src1 src2));
++  effect(TEMP_DEF dst, USE src1, USE src2);
++
++  format %{ "fcopysign_s  $dst $src1 $src2 @ copySignF_reg" %}
++
++  ins_encode %{
++    __ fcopysign_s($dst$$FloatRegister,
++                   $src1$$FloatRegister,
++                   $src2$$FloatRegister);
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct copySignD_reg(regD dst, regD src1, regD src2, immD_0 zero) %{
++  match(Set dst (CopySignD src1 (Binary src2 zero)));
++  effect(TEMP_DEF dst, USE src1, USE src2);
++
++  format %{ "fcopysign_d  $dst $src1 $src2 @ copySignD_reg" %}
++
++  ins_encode %{
++    __ fcopysign_d($dst$$FloatRegister,
++                   $src1$$FloatRegister,
++                   $src2$$FloatRegister);
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++
++//----------------------------------Logical Instructions----------------------
++//__________________________________Integer Logical Instructions-------------
++
++//And Instuctions
++// And Register with Immediate
++instruct andI_Reg_imm_0_4095(mRegI dst, mRegI src1,  immI_0_4095 src2) %{
++  match(Set dst (AndI src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andI_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ andi(dst, src, val);
++
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1,  immI_nonneg_mask mask) %{
++  match(Set dst (AndI src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ bstrpick_w(dst, src, size-1, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1,  immL_nonneg_mask mask) %{
++  match(Set dst (AndL src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_jlong_mask($mask$$constant);
++
++    __ bstrpick_d(dst, src, size-1, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_imm_0_4095(mRegI dst, mRegI src1,  immI_0_4095 src2) %{
++  match(Set dst (XorI src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorI_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++       __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_immI_M1(mRegI dst, mRegIorL2I src1,  immI_M1 M1) %{
++  match(Set dst (XorI src1 M1));
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorI_Reg_immI_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ orn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorL_Reg_imm_0_4095(mRegL dst, mRegL src1,  immL_0_4095 src2) %{
++  match(Set dst (XorL src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorL_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct lbu_and_lmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI mask (LoadB mem)));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_lmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct lbu_and_rmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI (LoadB mem) mask));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_rmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct andI_Reg_Reg(mRegI dst, mRegI src1,  mRegI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and    $dst, $src1, $src2 #@andI_Reg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ andr(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI src1 (XorI src2 M1)));
++
++  format %{ "andn   $dst, $src1, $src2 #@andnI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ andn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI src1 (XorI src2 M1)));
++
++  format %{ "orn    $dst, $src1, $src2 #@ornI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ orn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI (XorI src1 M1) src2));
++
++  format %{ "andn   $dst, $src2, $src1 #@andnI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ andn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI (XorI src1 M1) src2));
++
++  format %{ "orn    $dst, $src2, $src1 #@ornI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ orn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// And Long Register with Register
++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegLorI2L src2) %{
++  match(Set dst (AndL src1 src2));
++  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ andr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct andL_Reg_imm_0_4095(mRegL dst, mRegL src1,  immL_0_4095 src2) %{
++  match(Set dst (AndL src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++    __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL2I_Reg_imm_0_4095(mRegI dst, mRegL src1,  immL_0_4095 src2) %{
++  match(Set dst (ConvL2I (AndL src1 src2)));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL2I_Reg_imm_0_4095" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++    __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct andL_Reg_immL_M8(mRegL dst,  immL_M8 M8) %{
++  match(Set dst (AndL dst M8));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M8 #@andL_Reg_immL_M8" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 2, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M5(mRegL dst,  immL_M5 M5) %{
++  match(Set dst (AndL dst M5));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M5 #@andL_Reg_immL_M5" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 2, 2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M7(mRegL dst,  immL_M7 M7) %{
++  match(Set dst (AndL dst M7));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M7 #@andL_Reg_immL_M7" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 2, 1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M4(mRegL dst,  immL_M4 M4) %{
++  match(Set dst (AndL dst M4));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M4 #@andL_Reg_immL_M4" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 1, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M121(mRegL dst,  immL_M121 M121) %{
++  match(Set dst (AndL dst M121));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M121 #@andL_Reg_immL_M121" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ bstrins_d(dst, R0, 6, 3);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Long Register with Register
++instruct orL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
++  match(Set dst (OrL src1 src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegLorI2L src2) %{
++  match(Set dst (OrL (CastP2X src1) src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Long Register with Register
++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (XorL src1 src2));
++  format %{ "XOR    $dst, $src1, $src2 @ xorL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ xorr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left by 5-bit immediate
++instruct salI_Reg_imm(mRegI dst, mRegIorL2I src, immIU5 shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ slli_w(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{
++  match(Set dst (AndI (LShiftI src shift) mask));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm_and_M65536" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ slli_w(dst, src, 16);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen));
++
++  format %{ "andi  $dst, $src, 7\t# @land7_2_s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ andi(dst, src, 7);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
++// This idiom is used by the compiler the i2s bytecode.
++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
++
++  format %{ "i2s  $dst, $src\t# @i2s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ ext_w_h(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
++// This idiom is used by the compiler for the i2b bytecode.
++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour)
++%{
++  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
++
++  format %{ "i2b  $dst, $src\t# @i2b" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ ext_w_b(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immIU5 shift) %{
++  match(Set dst (LShiftI (ConvL2I src) shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_RegL2I_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ slli_w(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Shift Left by 8-bit immediate
++instruct salI_Reg_Reg(mRegI dst, mRegIorL2I src, mRegI shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shamt = $shift$$Register;
++    __ sll_w(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++// Shift Left Long 6-bit immI
++instruct salL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    __ slli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left Long
++instruct salL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ sll_d(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long 6-bit
++instruct sarL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    __ srai_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (RShiftL src shift)));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt   = $shift$$constant;
++
++    __ srai_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long arithmetically
++instruct sarL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ sra_d(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long logically
++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(100);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ srl_d(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegLorI2L src, immI_0_31 shift, immI_MaxI max_int) %{
++  match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int));
++  ins_cost(80);
++  format %{ "bstrpick_d    $dst, $src, $shift+30, shift @ slrL_Reg_immI_0_31_and_max_int" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ bstrpick_d(dst_reg, src_reg, shamt+30, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (URShiftL src shift)));
++  predicate(n->in(1)->in(2)->get_int() > 32);
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_convL2I" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ srli_d(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Instructions
++// Xor Register with Register
++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (XorI src1 src2));
++
++  format %{ "XOR    $dst, $src1, $src2 #@xorI_Reg_Reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ xorr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Instructions
++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_4095 src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_imm" %}
++  ins_encode %{
++    __ ori($dst$$Register, $src1$$Register, $src2$$constant);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Register with Register
++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift)));
++  predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int())));
++
++  format %{ "rotri_w     $dst, $src, 1 ...\n\t"
++            "srli_w      $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %}
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register   src = $src$$Register;
++    int     rshift = $rshift$$constant;
++
++    __ rotri_w(dst, src, 1);
++    if (rshift - 1) {
++      __ srli_w(dst, dst, rshift - 1);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{
++  match(Set dst (OrI src1 (CastP2X src2)));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_castP2X" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right by 5-bit immediate
++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{
++  match(Set dst (URShiftI src shift));
++  //effect(KILL cr);
++
++  format %{ "SRLI_W    $dst, $src, $shift #@shr_logical_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++
++    __ srli_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{
++  match(Set dst (AndI (URShiftI src shift) mask));
++
++  format %{ "bstrpick_w    $dst, $src, $shift+one-bits($mask)-1, shift #@shr_logical_Reg_imm_nonneg_mask" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int      pos = $shift$$constant;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ bstrpick_w(dst, src, pos+size-1, pos);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift)));
++
++  ins_cost(100);
++  format %{ "rotri_w    $dst, $src, $rshift #@rolI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_w(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_32_63 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rolL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_0_31 lshift, immI_32_63 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rolL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
++
++  ins_cost(100);
++  format %{ "rotri_w    $dst, $src, $rshift #@rorI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_w(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 rshift, immI_32_63 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rorL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotri_d    $dst, $src, $rshift #@rorL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotri_d(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right
++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (URShiftI src shift));
++
++  format %{ "SRL_W    $dst, $src, $shift #@shr_logical_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ srl_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRAI_W    $dst, $src, $shift #@shr_arith_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++    __ srai_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRA_W    $dst, $src, $shift #@shr_arith_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ sra_w(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++//----------Convert Int to Boolean---------------------------------------------
++
++instruct convI2B(mRegI dst, mRegI src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convI2B    $dst, $src @ convI2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, src);
++    } else {
++      __ move(AT, src);
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct convI2L_reg( mRegL dst, mRegI src) %{
++  match(Set dst (ConvI2L src));
++
++  ins_cost(100);
++  format %{ "SLLI_W    $dst, $src @ convI2L_reg\t"  %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if(dst != src) __ slli_w(dst, src, 0);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct convL2I_reg( mRegI dst, mRegLorI2L src ) %{
++  match(Set dst (ConvL2I src));
++
++  format %{ "MOV    $dst, $src @ convL2I_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ slli_w(dst, src, 0);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct convL2D_reg( regD dst, mRegL src ) %{
++  match(Set dst (ConvL2D src));
++  format %{ "convL2D    $dst, $src @ convL2D_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ movgr2fr_d(dst, src);
++    __ ffint_d_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++// Convert double to int.
++// If the double is NaN, stuff a zero in instead.
++instruct convD2I_reg_reg(mRegI dst, regD src, regD tmp) %{
++  match(Set dst (ConvD2I src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convd2i    $dst, $src, using $tmp as TEMP @ convD2I_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_w_d($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convD2L_reg_reg(mRegL dst, regD src, regD tmp) %{
++  match(Set dst (ConvD2L src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convd2l    $dst, $src, using $tmp as TEMP @ convD2L_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_l_d($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// Convert float to int.
++// If the float is NaN, stuff a zero in instead.
++instruct convF2I_reg_reg(mRegI dst, regF src, regF tmp) %{
++  match(Set dst (ConvF2I src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convf2i    $dst, $src, using $tmp as TEMP @ convF2I_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_w_s($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convF2L_reg_reg(mRegL dst, regF src, regF tmp) %{
++  match(Set dst (ConvF2L src));
++  effect(USE src, TEMP tmp);
++
++  format %{ "convf2l    $dst, $src, using $tmp as TEMP @ convF2L_reg_reg" %}
++
++  ins_encode %{
++    __ ftintrz_l_s($tmp$$FloatRegister, $src$$FloatRegister);
++    __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convL2F_reg( regF dst, mRegL src ) %{
++  match(Set dst (ConvL2F src));
++  format %{ "convl2f    $dst, $src @ convL2F_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    Register src = as_Register($src$$reg);
++    Label L;
++
++    __ movgr2fr_d(dst, src);
++    __ ffint_s_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convI2F_reg( regF dst, mRegI src ) %{
++  match(Set dst (ConvI2F src));
++  format %{ "convi2f    $dst, $src @ convI2F_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ movgr2fr_w(dst, src);
++    __ ffint_s_w(dst, dst);
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{
++  match(Set dst (CmpLTMask p zero));
++  ins_cost(100);
++
++  format %{ "srai_w    $dst, $p, 31 @ cmpLTMask_immI_0" %}
++    ins_encode %{
++       Register src = $p$$Register;
++       Register dst = $dst$$Register;
++
++       __ srai_w(dst, src, 31);
++    %}
++    ins_pipe( pipe_slow );
++%}
++
++
++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{
++  match(Set dst (CmpLTMask p q));
++  ins_cost(400);
++
++  format %{ "cmpLTMask    $dst, $p, $q @ cmpLTMask" %}
++  ins_encode %{
++    Register p   = $p$$Register;
++    Register q   = $q$$Register;
++    Register dst = $dst$$Register;
++
++    __ slt(dst, p, q);
++    __ sub_d(dst, R0, dst);
++    %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct convP2B(mRegI dst, mRegP src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convP2B    $dst, $src @ convP2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, src);
++    } else {
++      __ move(AT, src);
++      __ addi_d(dst, R0, 1);
++      __ maskeqz(dst, dst, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct convI2D_reg_reg(regD dst, mRegI src) %{
++  match(Set dst (ConvI2D src));
++  format %{ "conI2D $dst, $src @convI2D_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++    __ movgr2fr_w(dst ,src);
++    __ ffint_d_w(dst, dst);
++    %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convF2D_reg_reg(regD dst, regF src) %{
++  match(Set dst (ConvF2D src));
++  format %{ "convF2D  $dst, $src\t# @convF2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ fcvt_d_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convD2F_reg_reg(regF dst, regD src) %{
++  match(Set dst (ConvD2F src));
++  format %{ "convD2F  $dst, $src\t# @convD2F_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ fcvt_s_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// Convert oop pointer into compressed form
++instruct encodeHeapOop(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop $dst,$src" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ encode_heap_oop(dst, src);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %}
++  ins_encode %{
++    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
++            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++
++    __ decode_heap_oop(d, s);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
++            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_heap_oop_not_null(d, s);
++    } else {
++      __ decode_heap_oop_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{
++  match(Set dst (EncodePKlass src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %}
++  ins_encode %{
++    __ encode_klass_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{
++  match(Set dst (DecodeNKlass src));
++  format %{ "decode_heap_klass_not_null $dst,$src" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_klass_not_null(d, s);
++    } else {
++      __ decode_klass_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++//FIXME
++instruct tlsLoadP(mRegP dst) %{
++  match(Set dst (ThreadLocal));
++
++  ins_cost(0);
++  format %{ " get_thread in $dst #@tlsLoadP" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++#ifdef OPT_THREAD
++    __ move(dst, TREG);
++#else
++    __ get_thread(dst);
++#endif
++  %}
++
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct checkCastPP( mRegP dst ) %{
++  match(Set dst (CheckCastPP dst));
++
++  format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %}
++  ins_encode( /*empty encoding*/ );
++  ins_pipe( empty );
++%}
++
++instruct castPP(mRegP dst)
++%{
++  match(Set dst (CastPP dst));
++
++  size(0);
++  format %{ "# castPP of $dst" %}
++  ins_encode(/* empty encoding */);
++  ins_pipe(empty);
++%}
++
++instruct castII( mRegI dst ) %{
++  match(Set dst (CastII dst));
++  format %{ "#castII of $dst  empty encoding" %}
++  ins_encode( /*empty encoding*/ );
++  ins_cost(0);
++  ins_pipe( empty );
++%}
++
++// Return Instruction
++// Remove the return address & jump to it.
++instruct Ret() %{
++  match(Return);
++  format %{ "RET #@Ret" %}
++
++  ins_encode %{
++   __ jr(RA);
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++
++
++// Tail Jump; remove the return address; jump to target.
++// TailCall above leaves the return address around.
++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
++// "restore" before this instruction (in Epilogue), we need to materialize it
++// in %i0.
++//FIXME
++instruct tailjmpInd(no_Ax_mRegP jump_target, mRegP ex_oop) %{
++  match( TailJump jump_target ex_oop );
++  ins_cost(200);
++  format %{ "Jmp     $jump_target  ; ex_oop = $ex_oop #@tailjmpInd" %}
++  ins_encode %{
++    Register target = $jump_target$$Register;
++
++    // V0, V1 are indicated in:
++    //     [stubGenerator_loongarch.cpp] generate_forward_exception()
++    //     [runtime_loongarch.cpp] OptoRuntime::generate_exception_blob()
++    //
++    Register oop  = $ex_oop$$Register;
++    Register exception_oop = V0;
++    Register exception_pc = V1;
++
++    __ move(exception_pc, RA);
++    __ move(exception_oop, oop);
++
++    __ jr(target);
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Procedure Call/Return Instructions
++// Call Java Static Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallStaticJavaDirect(method meth) %{
++  match(CallStaticJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,static #@CallStaticJavaDirect " %}
++  ins_encode( Java_Static_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++// Call Java Dynamic Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallDynamicJavaDirect(method meth) %{
++  match(CallDynamicJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t"
++           "CallDynamic @ CallDynamicJavaDirect" %}
++  ins_encode( Java_Dynamic_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++instruct CallLeafNoFPDirect(method meth) %{
++  match(CallLeafNoFP);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF_NOFP,runtime " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++// Prefetch instructions for allocation.
++
++instruct prefetchAlloc(memory mem) %{
++  match(PrefetchAllocation mem);
++  ins_cost(125);
++  format %{ "preld $mem\t# Prefetch allocation @ prefetchAlloc" %}
++  ins_encode %{
++    int  base = $mem$$base;
++    int  index = $mem$$index;
++    int  scale = $mem$$scale;
++    int  disp = $mem$$disp;
++
++    if (index != 0) {
++      if (scale == 0) {
++        __ add_d(AT, as_Register(base), as_Register(index));
++      } else {
++        __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1);
++      }
++
++      if (Assembler::is_simm(disp, 12)) {
++        __ preld(8, AT, disp);
++      } else {
++        __ li(T4, disp);
++        __ add_d(AT, AT, T4);
++        __ preld(8, AT, 0);
++      }
++    } else {
++      if (Assembler::is_simm(disp, 12)) {
++        __ preld(8, as_Register(base), disp);
++      } else {
++        __ li(T4, disp);
++        __ add_d(AT, as_Register(base), T4);
++        __ preld(8, AT, 0);
++      }
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// Call runtime without safepoint
++instruct CallLeafDirect(method meth) %{
++  match(CallLeaf);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF,runtime #@CallLeafDirect " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(4);
++%}
++
++// Load Char (16bit unsigned)
++instruct loadUS(mRegI dst, memory mem) %{
++  match(Set dst (LoadUS mem));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadC" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUS mem)));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadUS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Char (16bit unsigned)
++instruct storeC(memory mem, mRegIorL2I src) %{
++  match(Set mem (StoreC mem src));
++
++  ins_cost(125);
++  format %{ "storeC  $src, $mem @ storeC" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeC_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreC mem zero));
++
++  ins_cost(125);
++  format %{ "storeC  $zero, $mem @ storeC_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct loadConF_immF_0(regF dst, immF_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConF_immF_0\n"%}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ movgr2fr_w(dst, R0);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConF(regF dst, immF src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "fld_s  $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm(con_offset, 12)) {
++      __ fld_s($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ li(AT, con_offset);
++      __ fldx_s($dst$$FloatRegister, $constanttablebase, AT);
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConD_immD_0(regD dst, immD_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConD_immD_0"%}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ movgr2fr_d(dst, R0);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++instruct loadConD(regD dst, immD src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "fld_d  $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm(con_offset, 12)) {
++      __ fld_d($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ li(AT, con_offset);
++      __ fldx_d($dst$$FloatRegister, $constanttablebase, AT);
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++// Store register Float value (it is faster than store from FPU register)
++instruct storeF_reg( memory mem, regF src) %{
++  match(Set mem (StoreF mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeF_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeF_immF_0( memory mem, immF_0 zero) %{
++  match(Set mem (StoreF mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeF_immF_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Double
++instruct loadD(regD dst, memory mem) %{
++  match(Set dst (LoadD mem));
++
++  ins_cost(150);
++  format %{ "loadD   $dst, $mem #@loadD" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Double - UNaligned
++instruct loadD_unaligned(regD dst, memory mem ) %{
++  match(Set dst (LoadD_unaligned mem));
++  ins_cost(250);
++  // FIXME: Need more effective ldl/ldr
++  format %{ "loadD_unaligned   $dst, $mem #@loadD_unaligned" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeD_reg( memory mem, regD src) %{
++  match(Set mem (StoreD mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeD_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeD_immD_0( memory mem, immD_0 zero) %{
++  match(Set mem (StoreD mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeD_immD_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct loadSSI(mRegI dst, stackSlotI src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld_w    $dst, $src\t# int stk @ loadSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSI) !");
++    __ ld_w($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSI(stackSlotI dst, mRegI src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "st_w    $dst, $src\t# int stk @ storeSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSI) !");
++    __ st_w($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSL(mRegL dst, stackSlotL src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld_d    $dst, $src\t# long stk @ loadSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSL) !");
++    __ ld_d($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSL(stackSlotL dst, mRegL src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "st_d    $dst, $src\t# long stk @ storeSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSL) !");
++    __ st_d($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSP(mRegP dst, stackSlotP src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld_d    $dst, $src\t# ptr stk @ loadSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSP) !");
++    __ ld_d($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSP(stackSlotP dst, mRegP src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sd    $dst, $src\t# ptr stk @ storeSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSP) !");
++    __ st_d($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSF(regF dst, stackSlotF src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "fld_s   $dst, $src\t# float stk @ loadSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSF) !");
++    __ fld_s($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSF(stackSlotF dst, regF src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "fst_s    $dst, $src\t# float stk @ storeSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSF) !");
++    __ fst_s($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++// Use the same format since predicate() can not be used here.
++instruct loadSSD(regD dst, stackSlotD src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "fld_d   $dst, $src\t# double stk @ loadSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSD) !");
++    __ fld_d($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSD(stackSlotD dst, regD src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "fst_d    $dst, $src\t# double stk @ storeSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSD) !");
++    __ fst_d($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastLock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %}
++  ins_encode %{
++    __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastUnlock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %}
++  ins_encode %{
++    __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++// Store CMS card-mark Immediate 0
++instruct storeImmCM_order(memory mem, immI_0 zero) %{
++  match(Set mem (StoreCM mem zero));
++  predicate(UseConcMarkSweepGC && !UseCondCardMark);
++  ins_cost(100);
++  format %{ "StoreCM MEMBAR storestore\n\t"
++            "st_b   $mem, zero\t! card-mark imm0" %}
++  ins_encode %{
++    __ membar(__ StoreStore);
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmCM(memory mem, immI_0 zero) %{
++  match(Set mem (StoreCM mem zero));
++
++  ins_cost(150);
++  format %{ "st_b   $mem, zero\t! card-mark imm0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Die now
++instruct ShouldNotReachHere( )
++%{
++  match(Halt);
++  ins_cost(300);
++
++  // Use the following format syntax
++  format %{ "ILLTRAP   ;#@ShouldNotReachHere" %}
++  ins_encode %{
++    if (is_reachable()) {
++      // Here we should emit illtrap!
++      __ stop("ShouldNotReachHere");
++    }
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++instruct leaP12Narrow(mRegP dst, indOffset12Narrow mem)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# ptr off12narrow @ leaP12Narrow" %}
++  ins_encode %{
++    Register  dst  = $dst$$Register;
++    Register  base = as_Register($mem$$base);
++    int       disp = $mem$$disp;
++
++    __ addi_d(dst, base, disp);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct leaPIdxScale(mRegP dst, mRegP reg, mRegLorI2L lreg, immI_0_3 scale)
++%{
++  match(Set dst (AddP reg (LShiftL lreg scale)));
++
++  ins_cost(110);
++  format %{ "leaq    $dst, [$reg + $lreg << $scale]\t# @ leaPIdxScale" %}
++  ins_encode %{
++    Register  dst   = $dst$$Register;
++    Register  base  = $reg$$Register;
++    Register  index = $lreg$$Register;
++    int       scale = $scale$$constant;
++
++    if (scale == 0) {
++       __ add_d($dst$$Register, $reg$$Register, index);
++    } else {
++       __ alsl_d(dst, index, base, scale - 1);
++    }
++ %}
++
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++
++// ============================================================================
++// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
++// array for an instance of the superklass.  Set a hidden internal cache on a
++// hit (cache is checked with exposed code in gen_subtype_check()).  Return
++// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{
++  match(Set result (PartialSubtypeCheck sub super));
++  effect(KILL tmp);
++  ins_cost(1100);  // slightly larger than the next version
++  format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %}
++
++  ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) );
++  ins_pipe( pipe_slow );
++%}
++
++// Conditional-store of the updated heap-top.
++// Used during allocation of the shared heap.
++
++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{
++  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
++
++  format %{ "move AT, $newval\n\t"
++            "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t"
++            "move $cr, AT\n" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp);
++
++    int     index = $heap_top_ptr$$index;
++    int     scale = $heap_top_ptr$$scale;
++    int      disp = $heap_top_ptr$$disp;
++
++    guarantee(Assembler::is_simm(disp, 12), "");
++
++    if (index != 0) {
++      __ stop("in storePConditional: index != 0");
++    } else {
++      __ move(AT, newval);
++      __ sc_d(AT, addr);
++      __ move($cr$$Register, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of an int value.
++// AT flag is set on success, reset otherwise.
++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{
++  match(Set cr (StoreIConditional mem (Binary oldval newval)));
++  format %{ "CMPXCHG  $newval, $mem, $oldval \t# @storeIConditional" %}
++
++  ins_encode %{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address  addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm(disp, 12), "");
++
++    if (index != 0) {
++      __ stop("in storeIConditional: index != 0");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg32(addr, oldval, newval, cr, true, false, true);
++      } else {
++        __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++        __ move(cr, AT);
++      }
++    }
++  %}
++
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of a long value.
++// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr)
++%{
++  match(Set cr (StoreLConditional mem (Binary oldval newval)));
++
++  format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm(disp, 12), "");
++
++    if (index != 0) {
++      __ stop("in storeIConditional: index != 0");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg(addr, oldval, newval, cr, false, true);
++      } else {
++        __ cmpxchg(addr, oldval, newval, AT, false, true);
++        __ move(cr, AT);
++      }
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Implement LoadPLocked. Must be ordered against changes of the memory location
++// by storePConditional.
++instruct loadPLocked(mRegP dst, memory mem) %{
++  match(Set dst (LoadPLocked mem));
++  ins_cost(MEMORY_REF_COST);
++
++  format %{ "ll_d    $dst, $mem #@loadPLocked\n\t" %}
++  size(12);
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{
++  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
++  ins_cost(3 * MEMORY_REF_COST);
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, true, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{
++  predicate(VM_Version::supports_cx8());
++  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
++  ins_cost(3 * MEMORY_REF_COST);
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{
++  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
++  ins_cost(3 * MEMORY_REF_COST);
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{
++  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
++  ins_cost(3 * MEMORY_REF_COST);
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, false, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, false, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct get_and_setI(indirect mem, mRegI newv, mRegI prev) %{
++  match(Set prev (GetAndSetI mem newv));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amswap_db_w $prev, $newv, [$mem]" %}
++  ins_encode %{
++    Register prev = $prev$$Register;
++    Register newv = $newv$$Register;
++    Register addr = as_Register($mem$$base);
++    if (prev == newv || prev == addr) {
++      __ amswap_db_w(AT, newv, addr);
++      __ move(prev, AT);
++    } else {
++      __ amswap_db_w(prev, newv, addr);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_setL(indirect mem, mRegL newv, mRegL prev) %{
++  match(Set prev (GetAndSetL mem newv));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amswap_db_d $prev, $newv, [$mem]" %}
++  ins_encode %{
++    Register prev = $prev$$Register;
++    Register newv = $newv$$Register;
++    Register addr = as_Register($mem$$base);
++    if (prev == newv || prev == addr) {
++      __ amswap_db_d(AT, newv, addr);
++      __ move(prev, AT);
++    } else {
++      __ amswap_db_d(prev, newv, addr);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_setN(indirect mem, mRegN newv, mRegN prev) %{
++  match(Set prev (GetAndSetN mem newv));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amswap_db_w $prev, $newv, [$mem]" %}
++  ins_encode %{
++    Register prev = $prev$$Register;
++    Register newv = $newv$$Register;
++    Register addr = as_Register($mem$$base);
++    __ amswap_db_w(AT, newv, addr);
++    __ bstrpick_d(prev, AT, 31, 0);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_setP(indirect mem, mRegP newv, mRegP prev) %{
++  match(Set prev (GetAndSetP mem newv));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amswap_db_d $prev, $newv, [$mem]" %}
++  ins_encode %{
++    Register prev = $prev$$Register;
++    Register newv = $newv$$Register;
++    Register addr = as_Register($mem$$base);
++    if (prev == newv || prev == addr) {
++      __ amswap_db_d(AT, newv, addr);
++      __ move(prev, AT);
++    } else {
++      __ amswap_db_d(prev, newv, addr);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_addL(indirect mem, mRegL newval, mRegL incr) %{
++  match(Set newval (GetAndAddL mem incr));
++  ins_cost(2 * MEMORY_REF_COST + 1);
++  format %{ "amadd_db_d $newval, [$mem], $incr" %}
++  ins_encode %{
++    Register newv = $newval$$Register;
++    Register incr = $incr$$Register;
++    Register addr = as_Register($mem$$base);
++    if (newv == incr || newv == addr) {
++      __ amadd_db_d(AT, incr, addr);
++      __ move(newv, AT);
++    } else {
++      __ amadd_db_d(newv, incr, addr);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_addL_no_res(indirect mem, Universe dummy, mRegL incr) %{
++  predicate(n->as_LoadStore()->result_not_used());
++  match(Set dummy (GetAndAddL mem incr));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amadd_db_d [$mem], $incr" %}
++  ins_encode %{
++    __ amadd_db_d(R0, $incr$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_addI(indirect mem, mRegI newval, mRegIorL2I incr) %{
++  match(Set newval (GetAndAddI mem incr));
++  ins_cost(2 * MEMORY_REF_COST + 1);
++  format %{ "amadd_db_w $newval, [$mem], $incr" %}
++  ins_encode %{
++    Register newv = $newval$$Register;
++    Register incr = $incr$$Register;
++    Register addr = as_Register($mem$$base);
++    if (newv == incr || newv == addr) {
++      __ amadd_db_w(AT, incr, addr);
++      __ move(newv, AT);
++    } else {
++      __ amadd_db_w(newv, incr, addr);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct get_and_addI_no_res(indirect mem, Universe dummy, mRegIorL2I incr) %{
++  predicate(n->as_LoadStore()->result_not_used());
++  match(Set dummy (GetAndAddI mem incr));
++  ins_cost(2 * MEMORY_REF_COST);
++  format %{ "amadd_db_w [$mem], $incr" %}
++  ins_encode %{
++    __ amadd_db_w(R0, $incr$$Register, as_Register($mem$$base));
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++//----------Max and Min--------------------------------------------------------
++
++// Min Register with Register (generic version)
++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MinI dst src));
++  //effect(KILL flags);
++  ins_cost(80);
++
++  format %{ "MIN    $dst, $src @minI_Reg_Reg" %}
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, src, dst);
++    __ masknez(dst, dst, AT);
++    __ maskeqz(AT, src, AT);
++    __ OR(dst, dst, AT);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// Max Register with Register (generic version)
++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MaxI dst src));
++  ins_cost(80);
++
++  format %{ "MAX    $dst, $src @maxI_Reg_Reg" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, dst, src);
++    __ masknez(dst, dst, AT);
++    __ maskeqz(AT, src, AT);
++    __ OR(dst, dst, AT);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{
++  match(Set dst (MaxI dst zero));
++  ins_cost(50);
++
++  format %{ "MAX    $dst, 0 @maxI_Reg_zero" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++
++    __ slt(AT, dst, R0);
++    __ masknez(dst, dst, AT);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL src mask));
++
++  format %{ "movl    $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ bstrpick_d(dst, src, 31, 0);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32)
++%{
++  match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32)));
++
++  format %{ "combine_i2l    $dst, $src2(H), $src1(L) @ combine_i2l" %}
++  ins_encode %{
++    Register dst  = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    if (src1 == dst) {
++       __ bstrins_d(dst, src2, 63, 32);
++    } else if (src2 == dst) {
++       __ slli_d(dst, dst, 32);
++       __ bstrins_d(dst, src1, 31, 0);
++    } else {
++       __ bstrpick_d(dst, src1, 31, 0);
++       __ bstrins_d(dst, src2, 63, 32);
++    }
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Zero-extend convert int to long
++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L src) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ bstrpick_d(dst, src, 31, 0);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L (ConvL2I src)) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ bstrpick_d(dst, src, 31, 0);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Match loading integer and casting it to unsigned int in long register.
++// LoadI + ConvI2L + AndL 0xffffffff.
++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
++
++  format %{ "ld_wu     $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL mask (ConvI2L (LoadI mem))));
++
++  format %{ "ld_wu     $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++
++// ============================================================================
++// Safepoint Instruction
++
++instruct safePoint_poll() %{
++  predicate(SafepointMechanism::uses_global_page_poll());
++  match(SafePoint);
++
++  ins_cost(105);
++  format %{ "poll for GC @ safePoint_poll" %}
++
++  ins_encode %{
++    __ block_comment("Safepoint:");
++    __ li(T4, (long)os::get_polling_page());
++    __ relocate(relocInfo::poll_type);
++    __ ld_w(AT, T4, 0);
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++instruct safePoint_poll_tls(mRegP poll) %{
++  match(SafePoint poll);
++  predicate(SafepointMechanism::uses_thread_local_poll());
++  effect(USE poll);
++
++  ins_cost(125);
++  format %{ "ld_w AT, [$poll]\t"
++            "Safepoint @ [$poll] : poll for GC" %}
++  size(4);
++  ins_encode %{
++    Register poll_reg = $poll$$Register;
++
++    __ block_comment("Safepoint:");
++    __ relocate(relocInfo::poll_type);
++    address pre_pc = __ pc();
++    __ ld_w(AT, poll_reg, 0);
++    assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit lw AT, [$poll]");
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++//----------Arithmetic Conversion Instructions---------------------------------
++
++instruct roundFloat_nop(regF dst)
++%{
++  match(Set dst (RoundFloat dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct roundDouble_nop(regD dst)
++%{
++  match(Set dst (RoundDouble dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++//----------BSWAP Instructions-------------------------------------------------
++instruct bytes_reverse_int(mRegI dst, mRegIorL2I src) %{
++  match(Set dst (ReverseBytesI src));
++
++  format %{ "RevB_I  $dst, $src" %}
++  ins_encode %{
++    __ revb_2w($dst$$Register, $src$$Register);
++    __ slli_w($dst$$Register, $dst$$Register, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct bytes_reverse_long(mRegL dst, mRegL src) %{
++  match(Set dst (ReverseBytesL src));
++
++  format %{ "RevB_L  $dst, $src" %}
++  ins_encode %{
++    __ revb_d($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct bytes_reverse_unsigned_short(mRegI dst, mRegIorL2I src) %{
++  match(Set dst (ReverseBytesUS src));
++
++  format %{ "RevB_US  $dst, $src" %}
++  ins_encode %{
++    __ revb_2h($dst$$Register, $src$$Register);
++    __ bstrpick_d($dst$$Register, $dst$$Register, 15, 0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct bytes_reverse_short(mRegI dst, mRegIorL2I src) %{
++  match(Set dst (ReverseBytesS src));
++
++  format %{ "RevB_S  $dst, $src" %}
++  ins_encode %{
++    __ revb_2h($dst$$Register, $src$$Register);
++    __ ext_w_h($dst$$Register, $dst$$Register);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++//---------- Zeros Count Instructions ------------------------------------------
++// CountLeadingZerosINode CountTrailingZerosINode
++instruct countLeadingZerosI(mRegI dst, mRegIorL2I src) %{
++  match(Set dst (CountLeadingZerosI src));
++
++  format %{ "clz_w  $dst, $src\t# count leading zeros (int)" %}
++  ins_encode %{
++    __ clz_w($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countLeadingZerosL(mRegI dst, mRegL src) %{
++  match(Set dst (CountLeadingZerosL src));
++
++  format %{ "clz_d  $dst, $src\t# count leading zeros (long)" %}
++  ins_encode %{
++    __ clz_d($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosI(mRegI dst, mRegIorL2I src) %{
++  match(Set dst (CountTrailingZerosI src));
++
++  format %{ "ctz_w    $dst, $src\t# count trailing zeros (int)" %}
++  ins_encode %{
++    __ ctz_w($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosL(mRegI dst, mRegL src) %{
++  match(Set dst (CountTrailingZerosL src));
++
++  format %{ "ctz_d    $dst, $src\t# count trailing zeros (long)" %}
++  ins_encode %{
++    __ ctz_d($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// ====================VECTOR INSTRUCTIONS=====================================
++
++// --------------------------------- Load -------------------------------------
++
++instruct loadV16(vecX dst, memory mem) %{
++  predicate(n->as_LoadVector()->memory_size() == 16);
++  match(Set dst (LoadVector mem));
++  format %{ "vload    $dst, $mem\t# @loadV16" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORX);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct loadV32(vecY dst, memory mem) %{
++  predicate(n->as_LoadVector()->memory_size() == 32);
++  match(Set dst (LoadVector mem));
++  format %{ "xvload    $dst, $mem\t# @loadV32" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORY);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- Store ------------------------------------
++
++instruct storeV16(memory mem, vecX src) %{
++  predicate(n->as_StoreVector()->memory_size() == 16);
++  match(Set mem (StoreVector mem src));
++  format %{ "vstore    $src, $mem\t# @storeV16" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORX);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct storeV32(memory mem, vecY src) %{
++  predicate(n->as_StoreVector()->memory_size() == 32);
++  match(Set mem (StoreVector mem src));
++  format %{ "xvstore    $src, $mem\t# @storeV32" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORY);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------------- Replicate ----------------------------------
++
++instruct repl16B(vecX dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (ReplicateB src));
++  format %{ "vreplgr2vr.b    $dst, $src\t# @repl16B" %}
++  ins_encode %{
++    __ vreplgr2vr_b($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl16B_imm(vecX dst, immI_M128_255 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (ReplicateB imm));
++  format %{ "vldi    $dst, $imm\t# @repl16B_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, ($imm$$constant & 0xff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8S(vecX dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateS src));
++  format %{ "vreplgr2vr.h    $dst, $src\t# @repl8S" %}
++  ins_encode %{
++    __ vreplgr2vr_h($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8S_imm(vecX dst, immI10 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateS imm));
++  format %{ "vldi    $dst, $imm\t# @repl8S_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4I(vecX dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateI src));
++  format %{ "vreplgr2vr.w    $dst, $src\t# @repl4I" %}
++  ins_encode %{
++    __ vreplgr2vr_w($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4I_imm(vecX dst, immI10 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateI imm));
++  format %{ "vldi    $dst, $imm\t# @repl4I_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl2L(vecX dst, mRegL src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateL src));
++  format %{ "vreplgr2vr.d    $dst, $src\t# @repl2L" %}
++  ins_encode %{
++    __ vreplgr2vr_d($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl2L_imm(vecX dst, immL10 imm) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateL imm));
++  format %{ "vldi    $dst, $imm\t# @repl2L_imm" %}
++  ins_encode %{
++    __ vldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4F(vecX dst, regF src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateF src));
++  format %{ "vreplvei.w    $dst, $src, 0\t# @repl4F" %}
++  ins_encode %{
++    __ vreplvei_w($dst$$FloatRegister, $src$$FloatRegister, 0);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl2D(vecX dst, regD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateD src));
++  format %{ "vreplvei.d    $dst, $src, 0\t# @repl2D" %}
++  ins_encode %{
++    __ vreplvei_d($dst$$FloatRegister, $src$$FloatRegister, 0);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl32B(vecY dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (ReplicateB src));
++  format %{ "xvreplgr2vr.b    $dst, $src\t# @repl32B" %}
++  ins_encode %{
++    __ xvreplgr2vr_b($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl32B_imm(vecY dst, immI_M128_255 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (ReplicateB imm));
++  format %{ "xvldi    $dst, $imm\t# @repl32B_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, ($imm$$constant & 0xff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl16S(vecY dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (ReplicateS src));
++  format %{ "xvreplgr2vr.h    $dst, $src\t# @repl16S" %}
++  ins_encode %{
++    __ xvreplgr2vr_h($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl16S_imm(vecY dst, immI10 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (ReplicateS imm));
++  format %{ "xvldi    $dst, $imm\t# @repl16S_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8I(vecY dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateI src));
++  format %{ "xvreplgr2vr.w    $dst, $src\t# @repl8I" %}
++  ins_encode %{
++    __ xvreplgr2vr_w($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8I_imm(vecY dst, immI10 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateI imm));
++  format %{ "xvldi    $dst, $imm\t# @repl8I_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4L(vecY dst, mRegL src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateL src));
++  format %{ "xvreplgr2vr.d    $dst, $src\t# @repl4L" %}
++  ins_encode %{
++    __ xvreplgr2vr_d($dst$$FloatRegister, $src$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4L_imm(vecY dst, immL10 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateL imm));
++  format %{ "xvldi    $dst, $imm\t# @repl4L_imm" %}
++  ins_encode %{
++    __ xvldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl8F(vecY dst, regF src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateF src));
++  format %{ "xvreplve0.w    $dst, $src\t# @repl8F" %}
++  ins_encode %{
++    __ xvreplve0_w($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct repl4D(vecY dst, regD src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateD src));
++  format %{ "xvreplve0.d    $dst, $src\t# @repl4D" %}
++  ins_encode %{
++    __ xvreplve0_d($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ADD --------------------------------------
++
++instruct add16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVB src1 src2));
++  format %{ "vadd.b    $dst, $src1, $src2\t# @add16B" %}
++  ins_encode %{
++    __ vadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add16B_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVB src (ReplicateB imm)));
++  format %{ "vaddi.bu    $dst, $src, $imm\t# @add16B_imm" %}
++  ins_encode %{
++    __ vaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVS src1 src2));
++  format %{ "vadd.h    $dst, $src1, $src2\t# @add8S" %}
++  ins_encode %{
++    __ vadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8S_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVS src (ReplicateS imm)));
++  format %{ "vaddi.hu    $dst, $src, $imm\t# @add8S_imm" %}
++  ins_encode %{
++    __ vaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVI src1 src2));
++  format %{ "vadd.w    $dst, $src1, src2\t# @add4I" %}
++  ins_encode %{
++    __ vadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4I_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVI src (ReplicateI imm)));
++  format %{ "vaddi.wu    $dst, $src, $imm\t# @add4I_imm" %}
++  ins_encode %{
++    __ vaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVL src1 src2));
++  format %{ "vadd.d    $dst, $src1, $src2\t# @add2L" %}
++  ins_encode %{
++    __ vadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add2L_imm(vecX dst, vecX src, immLU5 imm) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVL src (ReplicateL imm)));
++  format %{ "vaddi.du    $dst, $src, $imm\t# @add2L_imm" %}
++  ins_encode %{
++    __ vaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVF src1 src2));
++  format %{ "vfadd.s    $dst, $src1, $src2\t# @add4F" %}
++  ins_encode %{
++    __ vfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVD src1 src2));
++  format %{ "vfadd.d    $dst, $src1, $src2\t# @add2D" %}
++  ins_encode %{
++    __ vfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (AddVB src1 src2));
++  format %{ "xvadd.b    $dst, $src1, $src2\t# @add32B" %}
++  ins_encode %{
++    __ xvadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add32B_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (AddVB src (ReplicateB imm)));
++  format %{ "xvaddi.bu    $dst, $src, $imm\t# @add32B_imm" %}
++  ins_encode %{
++    __ xvaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVS src1 src2));
++  format %{ "xvadd.h    $dst, $src1, $src2\t# @add16S" %}
++  ins_encode %{
++    __ xvadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add16S_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVS src (ReplicateS imm)));
++  format %{ "xvaddi.hu    $dst, $src, $imm\t# @add16S_imm" %}
++  ins_encode %{
++    __ xvaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVI src1 src2));
++  format %{ "xvadd.wu    $dst, $src1, $src2\t# @add8I" %}
++  ins_encode %{
++    __ xvadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8I_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVI src (ReplicateI imm)));
++  format %{ "xvaddi.wu    $dst, $src, $imm\t# @add8I_imm" %}
++  ins_encode %{
++    __ xvaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVL src1 src2));
++  format %{ "xvadd.d    $dst, $src1, $src2\t# @add4L" %}
++  ins_encode %{
++    __ xvadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4L_imm(vecY dst, vecY src, immLU5 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVL src (ReplicateL imm)));
++  format %{ "xvaddi.du    $dst, $src, $imm\t# @add4L_imm" %}
++  ins_encode %{
++    __ xvaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVF src1 src2));
++  format %{ "xvfadd.s    $dst, $src1, $src2\t# @add8F" %}
++  ins_encode %{
++    __ xvfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct add4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVD src1 src2));
++  format %{ "xvfadd.d    $dst, $src1, $src2\t# @add4D" %}
++  ins_encode %{
++    __ xvfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- SUB --------------------------------------
++
++instruct sub16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVB src1 src2));
++  format %{ "vsub.b    $dst, $src1, $src2\t# @sub16B" %}
++  ins_encode %{
++    __ vsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub16B_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVB src (ReplicateB imm)));
++  format %{ "vsubi.bu    $dst, $src, $imm\t# @sub16B_imm" %}
++  ins_encode %{
++    __ vsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVS src1 src2));
++  format %{ "vsub.h    $dst, $src1, $src2\t# @sub8S" %}
++  ins_encode %{
++    __ vsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8S_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVS src (ReplicateS imm)));
++  format %{ "vsubi.hu    $dst, $src, $imm\t# @sub8S_imm" %}
++  ins_encode %{
++    __ vsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVI src1 src2));
++  format %{ "vsub.w    $dst, $src1, src2\t# @sub4I" %}
++  ins_encode %{
++    __ vsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4I_imm(vecX dst, vecX src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVI src (ReplicateI imm)));
++  format %{ "vsubi.wu    $dst, $src, $imm\t# @sub4I_imm" %}
++  ins_encode %{
++    __ vsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVL src1 src2));
++  format %{ "vsub.d    $dst, $src1, $src2\t# @sub2L" %}
++  ins_encode %{
++    __ vsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub2L_imm(vecX dst, vecX src, immLU5 imm) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVL src (ReplicateL imm)));
++  format %{ "vsubi.du    $dst, $src, $imm\t# @sub2L_imm" %}
++  ins_encode %{
++    __ vsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVF src1 src2));
++  format %{ "vfsub.s    $dst, $src1, $src2\t# @sub4F" %}
++  ins_encode %{
++    __ vfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVD src1 src2));
++  format %{ "vfsub.d    $dst, $src1, $src2\t# @sub2D" %}
++  ins_encode %{
++    __ vfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (SubVB src1 src2));
++  format %{ "xvsub.b    $dst, $src1, $src2\t# @sub32B" %}
++  ins_encode %{
++    __ xvsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub32B_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (SubVB src (ReplicateB imm)));
++  format %{ "xvsubi.bu    $dst, $src, $imm\t# @sub32B_imm" %}
++  ins_encode %{
++    __ xvsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVS src1 src2));
++  format %{ "xvsub.h    $dst, $src1, $src2\t# @sub16S" %}
++  ins_encode %{
++    __ xvsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub16S_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVS src (ReplicateS imm)));
++  format %{ "xvsubi.hu    $dst, $src, $imm\t# @sub16S_imm" %}
++  ins_encode %{
++    __ xvsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVI src1 src2));
++  format %{ "xvsub.w    $dst, $src1, $src2\t# @sub8I" %}
++  ins_encode %{
++    __ xvsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8I_imm(vecY dst, vecY src, immIU5 imm) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVI src (ReplicateI imm)));
++  format %{ "xvsubi.wu    $dst, $src, $imm\t# @sub8I_imm" %}
++  ins_encode %{
++    __ xvsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVL src1 src2));
++  format %{ "xvsub.d    $dst, $src1, $src2\t# @sub4L" %}
++  ins_encode %{
++    __ xvsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4L_imm(vecY dst, vecY src, immLU5 imm) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVL src (ReplicateL imm)));
++  format %{ "xvsubi.du    $dst, $src, $imm\t# @sub4L_imm" %}
++  ins_encode %{
++    __ xvsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVF src1 src2));
++  format %{ "xvfsub.s    $dst, $src1, $src2\t# @sub8F" %}
++  ins_encode %{
++    __ xvfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sub4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVD src1 src2));
++  format %{ "xvfsub.d    $dst,$src1,$src2\t# @sub4D" %}
++  ins_encode %{
++    __ xvfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MUL --------------------------------------
++instruct mul16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (MulVB src1 src2));
++  format %{ "vmul.b    $dst, $src1, $src2\t# @mul16B" %}
++  ins_encode %{
++    __ vmul_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (MulVS src1 src2));
++  format %{ "vmul.h    $dst, $src1, $src2\t# @mul8S" %}
++  ins_encode %{
++    __ vmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (MulVI src1 src2));
++  format %{ "vmul.w    $dst, $src1, $src2\t# @mul4I" %}
++  ins_encode %{
++    __ vmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (MulVL src1 src2));
++  format %{ "vmul.d    $dst, $src1, $src2\t# @mul2L" %}
++  ins_encode %{
++    __ vmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (MulVF src1 src2));
++  format %{ "vfmul.s    $dst, $src1, $src2\t# @mul4F" %}
++  ins_encode %{
++    __ vfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (MulVD src1 src2));
++  format %{ "vfmul.d    $dst, $src1, $src2\t# @mul2D" %}
++  ins_encode %{
++    __ vfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (MulVB src1 src2));
++  format %{ "xvmul.b    $dst, $src1, $src2\t# @mul32B" %}
++  ins_encode %{
++    __ xvmul_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (MulVS src1 src2));
++  format %{ "xvmul.h    $dst, $src1, $src2\t# @mul16S" %}
++  ins_encode %{
++    __ xvmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (MulVI src1 src2));
++  format %{ "xvmul.w    $dst, $src1, $src2\t# @mul8I" %}
++  ins_encode %{
++    __ xvmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (MulVL src1 src2));
++  format %{ "xvmul.d    $dst, $src1, $src2\t# @mul4L" %}
++  ins_encode %{
++    __ xvmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (MulVF src1 src2));
++  format %{ "xvfmul.s    $dst, $src1, $src2\t# @mul8F" %}
++  ins_encode %{
++    __ xvfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mul4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (MulVD src1 src2));
++  format %{ "xvfmul.d    $dst, $src1, $src2\t# @mul4D" %}
++  ins_encode %{
++    __ xvfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- DIV --------------------------------------
++instruct div4F(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (DivVF src1 src2));
++  format %{ "vfdiv.s    $dst, $src1, $src2\t# @div4F" %}
++  ins_encode %{
++    __ vfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct div2D(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (DivVD src1 src2));
++  format %{ "vfdiv.d    $dst, $src1, $src2\t# @div2D" %}
++  ins_encode %{
++    __ vfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct div8F(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (DivVF src1 src2));
++  format %{ "xvfdiv.s    $dst, $src1, $src2\t# @div8F" %}
++  ins_encode %{
++    __ xvfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct div4D(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (DivVD src1 src2));
++  format %{ "xvfdiv.d    $dst, $src1, $src2\t# @div4D" %}
++  ins_encode %{
++    __ xvfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ABS --------------------------------------
++
++instruct abs16B(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AbsVB src));
++  effect(TEMP_DEF dst);
++  format %{ "vabs    $dst, $src\t# @abs16B" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ vabsd_b($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs8S(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AbsVS src));
++  effect(TEMP_DEF dst);
++  format %{ "vabs    $dst, $src\t# @abs8S" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ vabsd_h($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs4I(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AbsVI src));
++  effect(TEMP_DEF dst);
++  format %{ "vabs    $dst, $src\t# @abs4I" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ vabsd_w($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs2L(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AbsVL src));
++  effect(TEMP_DEF dst);
++  format %{ "vabs    $dst, $src\t# @abs2L" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ vabsd_d($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs4F(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AbsVF src));
++  format %{ "vbitclri.w    $dst, $src\t# @abs4F" %}
++  ins_encode %{
++    __ vbitclri_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs2D(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AbsVD src));
++  format %{ "vbitclri.d    $dst, $src\t# @abs2D" %}
++  ins_encode %{
++    __ vbitclri_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs32B(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (AbsVB src));
++  effect(TEMP_DEF dst);
++  format %{ "xvabs    $dst, $src\t# @abs32B" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ xvabsd_b($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs16S(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AbsVS src));
++  effect(TEMP_DEF dst);
++  format %{ "xvabs    $dst, $src\t# @abs16S" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ xvabsd_h($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs8I(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AbsVI src));
++  effect(TEMP_DEF dst);
++  format %{ "xvabs    $dst, $src\t# @abs8I" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ xvabsd_w($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs4L(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AbsVL src));
++  effect(TEMP_DEF dst);
++  format %{ "xvabs    $dst, $src\t# @abs4L" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    __ xvabsd_d($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs8F(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AbsVF src));
++  format %{ "xvbitclri.w    $dst, $src\t# @abs8F" %}
++  ins_encode %{
++    __ xvbitclri_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct abs4D(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AbsVD src));
++  format %{ "xvbitclri.d    $dst, $src\t# @abs4D" %}
++  ins_encode %{
++    __ xvbitclri_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ABS DIFF ---------------------------------
++
++instruct absd4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AbsVI (SubVI src1 src2)));
++  format %{ "vabsd.w    $dst, $src1, $src2\t# @absd4I" %}
++  ins_encode %{
++    __ vabsd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct absd2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AbsVL (SubVL src1 src2)));
++  format %{ "vabsd.d    $dst, $src1, $src2\t# @absd2L" %}
++  ins_encode %{
++    __ vabsd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct absd8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AbsVI (SubVI src1 src2)));
++  format %{ "xvabsd.w    $dst, $src1, $src2\t# @absd8I" %}
++  ins_encode %{
++    __ xvabsd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct absd4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AbsVL (SubVL src1 src2)));
++  format %{ "xvabsd.d    $dst, $src1, $src2\t# @absd4L" %}
++  ins_encode %{
++    __ xvabsd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MAX --------------------------------------
++
++instruct max16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MaxV src1 src2));
++  format %{ "vmax.b    $dst, $src1, $src2\t# @max16B" %}
++  ins_encode %{
++    __ vmax_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MaxV src1 src2));
++  format %{ "vmax.h    $dst, $src1, $src2\t# @max8S" %}
++  ins_encode %{
++    __ vmax_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MaxV src1 src2));
++  format %{ "vmax.w    $dst, $src1, $src2\t# @max4I" %}
++  ins_encode %{
++    __ vmax_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MaxV src1 src2));
++  format %{ "vmax.d    $dst, $src1, $src2\t# @max2L" %}
++  ins_encode %{
++    __ vmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max4F(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MaxV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "vfmax    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max4F" %}
++  ins_encode %{
++    __ vfmax_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max2D(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{
++  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MaxV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "vfmax    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max2D" %}
++  ins_encode %{
++    __ vfmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MaxV src1 src2));
++  format %{ "xvmax.b    $dst, $src1, $src2\t# @max32B" %}
++  ins_encode %{
++    __ xvmax_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MaxV src1 src2));
++  format %{ "xvmax.h    $dst, $src1, $src2\t# @max16S" %}
++  ins_encode %{
++    __ xvmax_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MaxV src1 src2));
++  format %{ "xvmax.w    $dst, $src1, $src2\t# @max8I" %}
++  ins_encode %{
++    __ xvmax_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MaxV src1 src2));
++  format %{ "xvmax.d    $dst, $src1, $src2\t# @max4L" %}
++  ins_encode %{
++    __ xvmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max8F(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{
++  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MaxV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "xvfmax    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max8F" %}
++  ins_encode %{
++    __ xvfmax_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct max4D(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MaxV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "xvfmax    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max4D" %}
++  ins_encode %{
++    __ xvfmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MIN --------------------------------------
++
++instruct min16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MinV src1 src2));
++  format %{ "vmin.b    $dst, $src1, $src2\t# @min16B" %}
++  ins_encode %{
++    __ vmin_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MinV src1 src2));
++  format %{ "vmin.h    $dst, $src1, $src2\t# @min8S" %}
++  ins_encode %{
++    __ vmin_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MinV src1 src2));
++  format %{ "vmin.w    $dst, $src1, $src2\t# @min4I" %}
++  ins_encode %{
++    __ vmin_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MinV src1 src2));
++  format %{ "vmin.d    $dst, $src1, $src2\t# @min2L" %}
++  ins_encode %{
++    __ vmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min4F(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MinV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "vfmin    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min4F" %}
++  ins_encode %{
++    __ vfmin_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min2D(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{
++  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MinV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "vfmin    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min2D" %}
++  ins_encode %{
++    __ vfmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ vfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MinV src1 src2));
++  format %{ "xvmin.b    $dst, $src1, $src2\t# @min32B" %}
++  ins_encode %{
++    __ xvmin_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MinV src1 src2));
++  format %{ "xvmin.h    $dst, $src1, $src2\t# @min16S" %}
++  ins_encode %{
++    __ xvmin_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MinV src1 src2));
++  format %{ "xvmin.w    $dst, $src1, $src2\t# @min8I" %}
++  ins_encode %{
++    __ xvmin_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MinV src1 src2));
++  format %{ "xvmin.d    $dst, $src1, $src2\t# @min4L" %}
++  ins_encode %{
++    __ xvmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min8F(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{
++  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MinV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "xvfmin    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min8F" %}
++  ins_encode %{
++    __ xvfmin_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct min4D(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{
++  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MinV src1 src2));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "xvfmin    $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min4D" %}
++  ins_encode %{
++    __ xvfmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister);
++    __ xvfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++    __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- NEG --------------------------------------
++
++instruct neg4F(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (NegVF src));
++  format %{ "vbitrevi.w    $dst, $src\t# @neg4F" %}
++  ins_encode %{
++    __ vbitrevi_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct neg2D(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (NegVD src));
++  format %{ "vbitrevi.d    $dst, $src\t# @neg2D" %}
++  ins_encode %{
++    __ vbitrevi_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct neg8F(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (NegVF src));
++  format %{ "xvbitrevi.w    $dst, $src\t# @neg8F" %}
++  ins_encode %{
++    __ xvbitrevi_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct neg4D(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (NegVD src));
++  format %{ "xvbitrevi.d    $dst, $src\t# @neg4D" %}
++  ins_encode %{
++    __ xvbitrevi_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- SQRT -------------------------------------
++
++instruct sqrt4F(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SqrtVF src));
++  format %{ "vfsqrt.s    $dst, $src\t# @sqrt4F" %}
++  ins_encode %{
++    __ vfsqrt_s($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sqrt2D(vecX dst, vecX src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SqrtVD src));
++  format %{ "vfsqrt.d    $dst, $src\t# @sqrt2D" %}
++  ins_encode %{
++    __ vfsqrt_d($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sqrt8F(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SqrtVF src));
++  format %{ "xvfsqrt.s    $dst, $src\t# @sqrt8F" %}
++  ins_encode %{
++    __ xvfsqrt_s($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sqrt4D(vecY dst, vecY src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SqrtVD src));
++  format %{ "xvfsqrt.d    $dst, $src\t# @sqrt4D" %}
++  ins_encode %{
++    __ xvfsqrt_d($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MADD -------------------------------------
++
++instruct madd16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVB dst (MulVB src1 src2)));
++  format %{ "vmadd.b    $dst, $src1, $src2\t# @madd16B" %}
++  ins_encode %{
++    __ vmadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVS dst (MulVS src1 src2)));
++  format %{ "vmadd.h    $dst, $src1, $src2\t# @madd8S" %}
++  ins_encode %{
++    __ vmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVI dst (MulVI src1 src2)));
++  format %{ "vmadd    $dst, $src1, $src2\t# @madd4I" %}
++  ins_encode %{
++    __ vmadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVL dst (MulVL src1 src2)));
++  format %{ "vmadd.d    $dst, $src1, $src2\t# @madd2L" %}
++  ins_encode %{
++    __ vmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 + src3
++instruct madd4F(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVF src3 (Binary src1 src2)));
++  format %{ "vfmadd.s    $dst, $src1, $src2, $src3\t# @madd4F" %}
++  ins_encode %{
++    __ vfmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 + src3
++instruct madd2D(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 2);
++  match(Set dst (FmaVD src3 (Binary src1 src2)));
++  format %{ "vfmadd.d    $dst, $src1, $src2, $src3\t# @madd2D" %}
++  ins_encode %{
++    __ vfmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (AddVB dst (MulVB src1 src2)));
++  format %{ "xvmadd.b    $dst, $src1, $src2\t# @madd32B" %}
++  ins_encode %{
++    __ xvmadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AddVS dst (MulVS src1 src2)));
++  format %{ "xvmadd.h    $dst, $src1, $src2\t# @madd16S" %}
++  ins_encode %{
++    __ xvmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (AddVI dst (MulVI src1 src2)));
++  format %{ "xvmadd.w    $dst, $src1, $src2\t# @madd8I" %}
++  ins_encode %{
++    __ xvmadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct madd4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (AddVL dst (MulVL src1 src2)));
++  format %{ "xvmadd.d    $dst, $src1, $src2\t# @madd4L" %}
++  ins_encode %{
++    __ xvmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 + src3
++instruct madd8F(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 8);
++  match(Set dst (FmaVF src3 (Binary src1 src2)));
++  format %{ "xvfmadd.s    $dst, $src1, $src2, $src3\t# @madd8F" %}
++  ins_encode %{
++    __ xvfmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 + src3
++instruct madd4D(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVD src3 (Binary src1 src2)));
++  format %{ "xvfmadd.d    $dst, $src1, $src2, $src3\t# @madd4D" %}
++  ins_encode %{
++    __ xvfmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- MSUB -------------------------------------
++
++instruct msub16B(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVB dst (MulVB src1 src2)));
++  format %{ "vmsub.b    $dst, $src1, $src2\t# @msub16B" %}
++  ins_encode %{
++    __ vmsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub8S(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVS dst (MulVS src1 src2)));
++  format %{ "vmsub.h    $dst, $src1, $src2\t# @msub8S" %}
++  ins_encode %{
++    __ vmsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub4I(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVI dst (MulVI src1 src2)));
++  format %{ "vmsub.w    $dst, $src1, $src2\t# @msub4I" %}
++  ins_encode %{
++    __ vmsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub2L(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVL dst (MulVL src1 src2)));
++  format %{ "vmsub.d    $dst, $src1, $src2\t# @msub2L" %}
++  ins_encode %{
++    __ vmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 - src3
++instruct msub4F(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVF (NegVF src3) (Binary src1 src2)));
++  format %{ "vfmsub.s    $dst, $src1, $src2, $src3\t# @msub4F" %}
++  ins_encode %{
++    __ vfmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 - src3
++instruct msub2D(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 2);
++  match(Set dst (FmaVD (NegVD src3) (Binary src1 src2)));
++  format %{ "vfmsub.d    $dst, $src1, $src2, $src3\t# @msub2D" %}
++  ins_encode %{
++    __ vfmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub32B(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (SubVB dst (MulVB src1 src2)));
++  format %{ "xvmsub.b    $dst, $src1, $src2\t# @msub32B" %}
++  ins_encode %{
++    __ xvmsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub16S(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (SubVS dst (MulVS src1 src2)));
++  format %{ "xvmsub.h    $dst, $src1, $src2\t# @msub16S" %}
++  ins_encode %{
++    __ xvmsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub8I(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (SubVI dst (MulVI src1 src2)));
++  format %{ "xvmsub.w    $dst, $src1, $src2\t# @msub8I" %}
++  ins_encode %{
++    __ xvmsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct msub4L(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (SubVL dst (MulVL src1 src2)));
++  format %{ "xvmsub.d    $dst, $src1, $src2\t# @msub4L" %}
++  ins_encode %{
++    __ xvmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 - src3
++instruct msub8F(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 8);
++  match(Set dst (FmaVF (NegVF src3) (Binary src1 src2)));
++  format %{ "xvfmsub.s    $dst, $src1, $src2, $src3\t# @msub8F" %}
++  ins_encode %{
++    __ xvfmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// src1 * src2 - src3
++instruct msub4D(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVD (NegVD src3) (Binary src1 src2)));
++  format %{ "xvfmsub.d    $dst, $src1, $src2, $src3\t# @msub4D" %}
++  ins_encode %{
++    __ xvfmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- FNMADD -----------------------------------
++
++// -src1 * src2 - src3
++instruct nmadd4F(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVF (NegVF src3) (Binary (NegVF src1) src2)));
++  match(Set dst (FmaVF (NegVF src3) (Binary src1 (NegVF src2))));
++  format %{ "vfnmadd.s    $dst, $src1, $src2, $src3\t# @nmadd4F" %}
++  ins_encode %{
++    __ vfnmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 - src3
++instruct nmadd2D(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 2);
++  match(Set dst (FmaVD (NegVD src3) (Binary (NegVD src1) src2)));
++  match(Set dst (FmaVD (NegVD src3) (Binary src1 (NegVD src2))));
++  format %{ "vfnmadd.d    $dst, $src1, $src2, $src3\t# @nmadd2D" %}
++  ins_encode %{
++    __ vfnmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 - src3
++instruct nmadd8F(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 8);
++  match(Set dst (FmaVF (NegVF src3) (Binary (NegVF src1) src2)));
++  match(Set dst (FmaVF (NegVF src3) (Binary src1 (NegVF src2))));
++  format %{ "xvfnmadd.s    $dst, $src1, $src2, $src3\t# @nmadd8F" %}
++  ins_encode %{
++    __ xvfnmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 - src3
++instruct nmadd4D(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVD (NegVD src3) (Binary (NegVD src1) src2)));
++  match(Set dst (FmaVD (NegVD src3) (Binary src1 (NegVD src2))));
++  format %{ "xvfnmadd.d    $dst, $src1, $src2, $src3\t# @nmadd4D" %}
++  ins_encode %{
++    __ xvfnmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- FNMSUB -----------------------------------
++
++// -src1 * src2 + src3
++instruct nmsub4F(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVF src3 (Binary (NegVF src1) src2)));
++  match(Set dst (FmaVF src3 (Binary src1 (NegVF src2))));
++  format %{ "vfnmsub.s    $dst, $src1, $src2, $src3\t# @nmsub4F" %}
++  ins_encode %{
++    __ vfnmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 + src3
++instruct nmsub2D(vecX dst, vecX src1, vecX src2, vecX src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 2);
++  match(Set dst (FmaVD src3 (Binary (NegVD src1) src2)));
++  match(Set dst (FmaVD src3 (Binary src1 (NegVD src2))));
++  format %{ "vfnmsub.d    $dst, $src1, $src2, $src3\t# @nmsub2D" %}
++  ins_encode %{
++    __ vfnmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 + src3
++instruct nmsub8F(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 8);
++  match(Set dst (FmaVF src3 (Binary (NegVF src1) src2)));
++  match(Set dst (FmaVF src3 (Binary src1 (NegVF src2))));
++  format %{ "xvfnmsub.s    $dst, $src1, $src2, $src3\t# @nmsub8F" %}
++  ins_encode %{
++    __ xvfnmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// -src1 * src2 + src3
++instruct nmsub4D(vecY dst, vecY src1, vecY src2, vecY src3) %{
++  predicate(UseFMA && n->as_Vector()->length() == 4);
++  match(Set dst (FmaVD src3 (Binary (NegVD src1) src2)));
++  match(Set dst (FmaVD src3 (Binary src1 (NegVD src2))));
++  format %{ "xvfnmsub.d    $dst, $src1, $src2, $src3\t# @nmsub4D" %}
++  ins_encode %{
++    __ xvfnmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------------ Shift ---------------------------------------
++
++instruct shiftcntX(vecX dst, mRegI cnt) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (LShiftCntV cnt));
++  match(Set dst (RShiftCntV cnt));
++  format %{ "vreplgr2vr.b    $dst, $cnt\t# @shiftcntX" %}
++  ins_encode %{
++    __ vreplgr2vr_b($dst$$FloatRegister, $cnt$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct shiftcntY(vecY dst, mRegI cnt) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (LShiftCntV cnt));
++  match(Set dst (RShiftCntV cnt));
++  format %{ "xvreplgr2vr.b    $dst, $cnt\t# @shiftcntY" %}
++  ins_encode %{
++    __ xvreplgr2vr_b($dst$$FloatRegister, $cnt$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------------ LeftShift -----------------------------------
++
++instruct sll16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (LShiftVB src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "vsll    $dst, $src, $shift\t# TEMP($tmp) @sll16B" %}
++  ins_encode %{
++    __ vsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll16B_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (LShiftVB src shift));
++  format %{ "vslli.b    $dst, $src, $shift\t# @sll16B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (LShiftVS src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "vsll    $dst, $src, $shift\t# TEMP($tmp) @sll8S" %}
++  ins_encode %{
++    __ vsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8S_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (LShiftVS src shift));
++  format %{ "vslli.h    $dst, $src, $shift\t# @sll8S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4I(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (LShiftVI src shift));
++  format %{ "vsll.w    $dst, $src, $shift\t# @sll4I" %}
++  ins_encode %{
++    __ vsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4I_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (LShiftVI src shift));
++  format %{ "vslli.w    $dst, $src, $shift\t# @sll4I_imm" %}
++  ins_encode %{
++    __ vslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll2L(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (LShiftVL src shift));
++  format %{ "vsll.d    $dst, $src, $shift\t# @sll2L" %}
++  ins_encode %{
++    __ vsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll2L_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (LShiftVL src shift));
++  format %{ "vslli.d    $dst, $src, $shift\t# @sll2L_imm" %}
++  ins_encode %{
++    __ vslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll32B(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (LShiftVB src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "xvsll    $dst, $src, $shift\t# TEMP($tmp) @sll32B" %}
++  ins_encode %{
++    __ xvsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll32B_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (LShiftVB src shift));
++  format %{ "xvslli.b    $dst, $src, $shift\t# @sll32B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll16S(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (LShiftVS src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "xvsll    $dst, $src, $shift\t# TEMP($tmp) @sll16S" %}
++  ins_encode %{
++    __ xvsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll16S_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (LShiftVS src shift));
++  format %{ "xvslli.h    $dst, $src, $shift\t# @sll16S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8I(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (LShiftVI src shift));
++  format %{ "xvsll.w    $dst, $src, $shift\t# @sll8I" %}
++  ins_encode %{
++    __ xvsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll8I_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (LShiftVI src shift));
++  format %{ "xvslli.w    $dst, $src, $shift\t# @sll8I_imm" %}
++  ins_encode %{
++    __ xvslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4L(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (LShiftVL src shift));
++  format %{ "xvsll.d    $dst, $src, $shift\t# @sll4L" %}
++  ins_encode %{
++    __ xvsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sll4L_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (LShiftVL src shift));
++  format %{ "xvslli.d    $dst, $src, $shift\t# @sll4L_imm" %}
++  ins_encode %{
++    __ xvslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ----------------------- LogicalRightShift ----------------------------------
++
++instruct srl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (URShiftVB src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "vsrl    $dst, $src, $shift\t# TEMP($tmp) @srl16B" %}
++  ins_encode %{
++    __ vsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl16B_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (URShiftVB src shift));
++  format %{ "vsrli.b    $dst, $src, $shift\t# @srl16B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (URShiftVS src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "vsrl    $dst, $src, $shift\t# TEMP($tmp) @srl8S" %}
++  ins_encode %{
++    __ vsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8S_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (URShiftVS src shift));
++  format %{ "vsrli.h    $dst, $src, $shift\t# @srl8S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ vsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4I(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (URShiftVI src shift));
++  format %{ "vsrl.w    $dst, $src, $shift\t# @srl4I" %}
++  ins_encode %{
++    __ vsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4I_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (URShiftVI src shift));
++  format %{ "vsrli.w    $dst, $src, $shift\t# @srl4I_imm" %}
++  ins_encode %{
++    __ vsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl2L(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (URShiftVL src shift));
++  format %{ "vsrl.d    $dst, $src, $shift\t# @srl2L" %}
++  ins_encode %{
++    __ vsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl2L_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (URShiftVL src shift));
++  format %{ "vsrli.d    $dst, $src, $shift\t# @srl2L_imm" %}
++  ins_encode %{
++    __ vsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl32B(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (URShiftVB src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "xvsrl    $dst, $src, $shift\t# TEMP($tmp) @srl32B" %}
++  ins_encode %{
++    __ xvsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl32B_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (URShiftVB src shift));
++  format %{ "xvsrli.b    $dst, $src, $shift\t# @srl32B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl16S(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (URShiftVS src shift));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "xvsrl    $dst, $src, $shift\t# TEMP($tmp) @srl16S" %}
++  ins_encode %{
++    __ xvsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl16S_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (URShiftVS src shift));
++  format %{ "xvsrli.h    $dst, $src, $shift\t# @srl16S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
++    } else {
++      __ xvsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8I(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (URShiftVI src shift));
++  format %{ "xvsrl.w    $dst, $src, $shift\t# @srl8I" %}
++  ins_encode %{
++    __ xvsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl8I_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (URShiftVI src shift));
++  format %{ "xvsrli.w    $dst, $src, $shift\t# @srl8I_imm" %}
++  ins_encode %{
++    __ xvsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4L(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (URShiftVL src shift));
++  format %{ "xvsrl.d    $dst, $src, $shift\t# @srl4L" %}
++  ins_encode %{
++    __ xvsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct srl4L_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (URShiftVL src shift));
++  format %{ "xvsrli.d    $dst, $src, $shift\t# @srl4L_imm" %}
++  ins_encode %{
++    __ xvsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ------------------------- ArithmeticRightShift -----------------------------
++
++instruct sra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (RShiftVB src shift));
++  effect(TEMP tmp);
++  format %{ "vsra    $dst, $src, $shift\t# TEMP($tmp) @sra16B" %}
++  ins_encode %{
++    __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ vsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra16B_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (RShiftVB src shift));
++  format %{ "vsrai.b    $dst, $src, $shift\t# @sra16B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7);
++    } else {
++      __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (RShiftVS src shift));
++  effect(TEMP tmp);
++  format %{ "vsra    $dst, $src, $shift\t# TEMP($tmp) @sra8S" %}
++  ins_encode %{
++    __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ vsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8S_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (RShiftVS src shift));
++  format %{ "vsrai.h    $dst, $src, $shift\t# @sra8S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15);
++    } else {
++      __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4I(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (RShiftVI src shift));
++  format %{ "vsra.w    $dst, $src, $shift\t# @sra4I" %}
++  ins_encode %{
++    __ vsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4I_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (RShiftVI src shift));
++  format %{ "vsrai.w    $dst, $src, $shift\t# @sra4I_imm" %}
++  ins_encode %{
++    __ vsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra2L(vecX dst, vecX src, vecX shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (RShiftVL src shift));
++  format %{ "vsra.d    $dst, $src, $shift\t# @sra2L" %}
++  ins_encode %{
++    __ vsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra2L_imm(vecX dst, vecX src, immI shift) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (RShiftVL src shift));
++  format %{ "vsrai.d    $dst, $src, $shift\t# @sra2L_imm" %}
++  ins_encode %{
++    __ vsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra32B(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (RShiftVB src shift));
++  effect(TEMP tmp);
++  format %{ "xvsra    $dst, $src, $shift\t# TEMP($tmp) @sra32B" %}
++  ins_encode %{
++    __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8);
++    __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ xvsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra32B_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (RShiftVB src shift));
++  format %{ "xvsrai.b    $dst, $src, $shift\t# @sra32B_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 8) {
++      __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7);
++    } else {
++      __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra16S(vecY dst, vecY src, vecY shift, vecY tmp) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (RShiftVS src shift));
++  effect(TEMP tmp);
++  format %{ "xvsra    $dst, $src, $shift\t# TEMP($tmp) @sra16S" %}
++  ins_encode %{
++    __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10);
++    __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
++    __ xvsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra16S_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (RShiftVS src shift));
++  format %{ "xvsrai.h    $dst, $src, $shift\t# @sra16S_imm" %}
++  ins_encode %{
++    if ($shift$$constant >= 16) {
++      __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15);
++    } else {
++      __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8I(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (RShiftVI src shift));
++  format %{ "xvsra.w    $dst, $src, $shift\t# @sra8I" %}
++  ins_encode %{
++    __ xvsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra8I_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (RShiftVI src shift));
++  format %{ "xvsrai.w    $dst, $src, $shift\t# @sra8I_imm" %}
++  ins_encode %{
++    __ xvsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4L(vecY dst, vecY src, vecY shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (RShiftVL src shift));
++  format %{ "xvsra.d    $dst, $src, $shift\t# @sra4L" %}
++  ins_encode %{
++    __ xvsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct sra4L_imm(vecY dst, vecY src, immI shift) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (RShiftVL src shift));
++  format %{ "xvsrai.d    $dst, $src, $shift\t# @sra4L_imm" %}
++  ins_encode %{
++    __ xvsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- AND --------------------------------------
++
++instruct andV16(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (AndV src1 src2));
++  format %{ "vand.v    $dst, $src1, $src2\t# @andV16" %}
++  ins_encode %{
++    __ vand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct and16B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (AndV src (ReplicateB imm)));
++  format %{ "vandi.b    $dst, $src, $imm\t# @and16B_imm" %}
++  ins_encode %{
++    __ vandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct andV32(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (AndV src1 src2));
++  format %{ "xvand.v    $dst, $src1, $src2\t# @andV32" %}
++  ins_encode %{
++    __ xvand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct and32B_imm(vecY dst, vecY src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (AndV src (ReplicateB imm)));
++  format %{ "xvandi.b    $dst, $src, $imm\t# @and32B_imm" %}
++  ins_encode %{
++    __ xvandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- OR ---------------------------------------
++
++instruct orV16(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (OrV src1 src2));
++  format %{ "vor.v    $dst, $src1, $src2\t# @orV16" %}
++  ins_encode %{
++    __ vor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct or16B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (OrV src (ReplicateB imm)));
++  format %{ "vori.b    $dst, $src, $imm\t# @or16B_imm" %}
++  ins_encode %{
++    __ vori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct orV32(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (OrV src1 src2));
++  format %{ "xvor.v    $dst, $src1, $src2\t# @orV32" %}
++  ins_encode %{
++    __ xvor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct or32B_imm(vecY dst, vecY src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (OrV src (ReplicateB imm)));
++  format %{ "xvori.b    $dst, $src, $imm\t# @or32B_imm" %}
++  ins_encode %{
++    __ xvori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- XOR --------------------------------------
++
++instruct xorV16(vecX dst, vecX src1, vecX src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (XorV src1 src2));
++  format %{ "vxor.v    $dst, $src1, $src2\t# @xorV16" %}
++  ins_encode %{
++    __ vxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct xor16B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (XorV src (ReplicateB imm)));
++  format %{ "vxori.b    $dst, $src, $imm\t# @xor16B_imm" %}
++  ins_encode %{
++    __ vxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct xorV32(vecY dst, vecY src1, vecY src2) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (XorV src1 src2));
++  format %{ "xvxor.v    $dst, $src1, $src2\t# @xorV32" %}
++  ins_encode %{
++    __ xvxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct xor32B_imm(vecX dst, vecX src, immIU8 imm) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (XorV src (ReplicateB imm)));
++  format %{ "xvxori.b    $dst, $src, $imm\t# @xor32B_imm" %}
++  ins_encode %{
++    __ xvxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- NOR --------------------------------------
++
++instruct norV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (XorV (OrV src1 src2) (ReplicateB m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateS m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateI m1)));
++  format %{ "vnor.v    $dst, $src1, $src2\t# @norV16" %}
++  ins_encode %{
++    __ vnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct nor16B_imm(vecX dst, vecX src, immIU8 imm, immI_M1 m1) %{
++  predicate(n->as_Vector()->length() == 16);
++  match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1)));
++  format %{ "vnori.b    $dst, $src, $imm\t# @nor16B_imm" %}
++  ins_encode %{
++    __ vnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct norV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (XorV (OrV src1 src2) (ReplicateB m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateS m1)));
++  match(Set dst (XorV (OrV src1 src2) (ReplicateI m1)));
++  format %{ "xvnor.v    $dst, $src1, $src2\t# @norV32" %}
++  ins_encode %{
++    __ xvnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct nor32B_imm(vecY dst, vecY src, immIU8 imm, immI_M1 m1) %{
++  predicate(n->as_Vector()->length() == 32);
++  match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1)));
++  format %{ "xvnori.b    $dst, $src, $imm\t# @nor32B_imm" %}
++  ins_encode %{
++    __ xvnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ANDN -------------------------------------
++
++instruct andnV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (AndV src2 (XorV src1 (ReplicateB m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateS m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateI m1))));
++  format %{ "vandn.v    $dst, $src1, $src2\t# @andnV16" %}
++  ins_encode %{
++    __ vandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct andnV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (AndV src2 (XorV src1 (ReplicateB m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateS m1))));
++  match(Set dst (AndV src2 (XorV src1 (ReplicateI m1))));
++  format %{ "xvandn.v    $dst, $src1, $src2\t# @andnV32" %}
++  ins_encode %{
++    __ xvandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// --------------------------------- ORN --------------------------------------
++
++instruct ornV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 16);
++  match(Set dst (OrV src1 (XorV src2 (ReplicateB m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateS m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateI m1))));
++  format %{ "vorn.v    $dst, $src1, $src2\t# @ornV16" %}
++  ins_encode %{
++    __ vorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct ornV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{
++  predicate(n->as_Vector()->length_in_bytes() == 32);
++  match(Set dst (OrV src1 (XorV src2 (ReplicateB m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateS m1))));
++  match(Set dst (OrV src1 (XorV src2 (ReplicateI m1))));
++  format %{ "xvorn.v    $dst, $src1, $src2\t# @ornV32" %}
++  ins_encode %{
++    __ xvorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ----------------------------- Reduction Add --------------------------------
++
++instruct reduce_add16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add16B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add8S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add4I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (AddReductionVL src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add2L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add4F(regF dst, regF src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (AddReductionVF src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add4F" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add2D(regD dst, regD src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (AddReductionVD src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add2D" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add32B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add16S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (AddReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add8I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (AddReductionVL src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add4L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add8F(regF dst, regF src, vecY vsrc, vecY tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (AddReductionVF src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add8F" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_add4D(regD dst, regD src, vecY vsrc, vecY tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (AddReductionVD src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add4D" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ----------------------------- Reduction Mul --------------------------------
++
++instruct reduce_mul16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul16B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul8S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul4I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MulReductionVL src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul2L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul4F(regF dst, regF src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MulReductionVF src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul4F" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul2D(regD dst, regD src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MulReductionVD src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul2D" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul32B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul16S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MulReductionVI src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul8I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MulReductionVL src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul4L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul8F(regF dst, regF src, vecY vsrc, vecY tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
++  match(Set dst (MulReductionVF src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul8F" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_mul4D(regD dst, regD src, vecY vsrc, vecY tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
++  match(Set dst (MulReductionVD src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul4D" %}
++  ins_encode %{
++    __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ----------------------------- Reduction Max --------------------------------
++
++instruct reduce_max16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max16B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max8S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max4I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_max2L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max32B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max16S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max8I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_max4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MaxReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max4L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ----------------------------- Reduction Min --------------------------------
++
++instruct reduce_min16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min16B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min8S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min4I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp) @reduce_min2L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min32B" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min16S" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min8I" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct reduce_min4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{
++  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
++  match(Set dst (MinReductionV src vsrc));
++  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
++  format %{ "reduce    $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min4L" %}
++  ins_encode %{
++    __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// ------------------------------ RoundDoubleModeV ----------------------------
++
++instruct round2D(vecX dst, vecX src, immI rmode) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (RoundDoubleModeV src rmode));
++  format %{ "vfrint    $dst, $src, $rmode\t# @round2D" %}
++  ins_encode %{
++    DEBUG_ONLY(Unimplemented()); // unverified
++    switch ($rmode$$constant) {
++      case 0: __ vfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break;
++      case 1: __ vfrintrm_d($dst$$FloatRegister, $src$$FloatRegister);  break;
++      case 2: __ vfrintrp_d($dst$$FloatRegister, $src$$FloatRegister);  break;
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct round4D(vecY dst, vecY src, immI rmode) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (RoundDoubleModeV src rmode));
++  format %{ "xvfrint    $dst, $src, $rmode\t# @round4D" %}
++  ins_encode %{
++    DEBUG_ONLY(Unimplemented()); // unverified
++    switch ($rmode$$constant) {
++      case 0: __ xvfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break;
++      case 1: __ xvfrintrm_d($dst$$FloatRegister, $src$$FloatRegister);  break;
++      case 2: __ xvfrintrp_d($dst$$FloatRegister, $src$$FloatRegister);  break;
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// ---------------------------- PopCount --------------------------------------
++
++instruct popcount4I(vecX dst, vecX src) %{
++  predicate(UsePopCountInstruction && n->as_Vector()->length() == 4);
++  match(Set dst (PopCountVI src));
++  format %{ "vpcnt.w    $dst, $src\t# @popcount4I" %}
++  ins_encode %{
++    DEBUG_ONLY(Unimplemented()); // unverified
++    __ vpcnt_w($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct popcount8I(vecY dst, vecY src) %{
++  predicate(UsePopCountInstruction && n->as_Vector()->length() == 8);
++  match(Set dst (PopCountVI src));
++  format %{ "xvpcnt.w    $dst, $src\t# @popcount8I" %}
++  ins_encode %{
++    DEBUG_ONLY(Unimplemented()); // unverified
++    __ xvpcnt_w($dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++
++//----------PEEPHOLE RULES-----------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++//
++// peepmatch ( root_instr_name [preceeding_instruction]* );
++//
++// peepconstraint %{
++// (instruction_number.operand_name relational_op instruction_number.operand_name
++//  [, ...] );
++// // instruction numbers are zero-based using left to right order in peepmatch
++//
++// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
++// // provide an instruction_number.operand_name for each operand that appears
++// // in the replacement instruction's match rule
++//
++// ---------VM FLAGS---------------------------------------------------------
++//
++// All peephole optimizations can be turned off using -XX:-OptoPeephole
++//
++// Each peephole rule is given an identifying number starting with zero and
++// increasing by one in the order seen by the parser.  An individual peephole
++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
++// on the command-line.
++//
++// ---------CURRENT LIMITATIONS----------------------------------------------
++//
++// Only match adjacent instructions in same basic block
++// Only equality constraints
++// Only constraints between operands, not (0.dest_reg == EAX_enc)
++// Only one replacement instruction
++//
++// ---------EXAMPLE----------------------------------------------------------
++//
++// // pertinent parts of existing instructions in architecture description
++// instruct movI(eRegI dst, eRegI src) %{
++//   match(Set dst (CopyI src));
++// %}
++//
++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{
++//   match(Set dst (AddI dst src));
++//   effect(KILL cr);
++// %}
++//
++// // Change (inc mov) to lea
++// peephole %{
++//   // increment preceeded by register-register move
++//   peepmatch ( incI_eReg movI );
++//   // require that the destination register of the increment
++//   // match the destination register of the move
++//   peepconstraint ( 0.dst == 1.dst );
++//   // construct a replacement instruction that sets
++//   // the destination to ( move's source register + one )
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// Implementation no longer uses movX instructions since
++// machine-independent system no longer uses CopyX nodes.
++//
++// peephole %{
++//   peepmatch ( incI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( decI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addI_eReg_imm movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addP_eReg_imm movP );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++
++// // Change load of spilled value to only a spill
++// instruct storeI(memory mem, eRegI src) %{
++//   match(Set mem (StoreI mem src));
++// %}
++//
++// instruct loadI(eRegI dst, memory mem) %{
++//   match(Set dst (LoadI mem));
++// %}
++//
++//peephole %{
++//  peepmatch ( loadI storeI );
++//  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
++//  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
++//%}
++
++//----------SMARTSPILL RULES---------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/loongarch.ad b/src/hotspot/cpu/loongarch/loongarch.ad
+--- a/src/hotspot/cpu/loongarch/loongarch.ad	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/loongarch.ad	2024-01-30 10:00:11.838098438 +0800
+@@ -0,0 +1,25 @@
++//
++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,4567 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "jvm.h"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/disassembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/safepoint.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++
++#ifdef COMPILER2
++#include "opto/compile.hpp"
++#include "opto/intrinsicnode.hpp"
++#endif
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// Implementation of MacroAssembler
++
++intptr_t MacroAssembler::i[32] = {0};
++float MacroAssembler::f[32] = {0.0};
++
++void MacroAssembler::print(outputStream *s) {
++  unsigned int k;
++  for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
++    s->print_cr("i%d = 0x%.16lx", k, i[k]);
++  }
++  s->cr();
++
++  for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
++    s->print_cr("f%d = %f", k, f[k]);
++  }
++  s->cr();
++}
++
++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
++
++void MacroAssembler::save_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ st_w (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ fst_s (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++void MacroAssembler::restore_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ ld_w (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ fld_s (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++
++void MacroAssembler::pd_patch_instruction(address branch, address target) {
++  jint& stub_inst = *(jint*)branch;
++  jint *pc = (jint *)branch;
++
++  if (high(stub_inst, 7) == pcaddu18i_op) {
++    // far:
++    //   pcaddu18i reg, si20
++    //   jirl  r0, reg, si18
++
++    assert(high(pc[1], 6) == jirl_op, "Not a branch label patch");
++    jlong offs = target - branch;
++    CodeBuffer cb(branch, 2 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    if (reachable_from_branch_short(offs)) {
++      // convert far to short
++#define __ masm.
++      __ b(target);
++      __ nop();
++#undef __
++    } else {
++      masm.patchable_jump_far(R0, offs);
++    }
++    return;
++  } else if (high(stub_inst, 7) == pcaddi_op) {
++    // see MacroAssembler::set_last_Java_frame:
++    //   pcaddi reg, si20
++
++    jint offs = (target - branch) >> 2;
++    guarantee(is_simm(offs, 20), "Not signed 20-bit offset");
++    CodeBuffer cb(branch, 1 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.pcaddi(as_Register(low(stub_inst, 5)), offs);
++    return;
++  } else if (high(stub_inst, 7) == pcaddu12i_op) {
++    // pc-relative
++    jlong offs = target - branch;
++    guarantee(is_simm(offs, 32), "Not signed 32-bit offset");
++    jint si12, si20;
++    jint& stub_instNext = *(jint*)(branch+4);
++    split_simm32(offs, si12, si20);
++    CodeBuffer cb(branch, 2 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.pcaddu12i(as_Register(low(stub_inst, 5)), si20);
++    masm.addi_d(as_Register(low((stub_instNext), 5)), as_Register(low((stub_instNext) >> 5, 5)), si12);
++    return;
++  } else if (high(stub_inst, 7) == lu12i_w_op) {
++    // long call (absolute)
++    CodeBuffer cb(branch, 3 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.call_long(target);
++    return;
++  }
++
++  stub_inst = patched_branch(target - branch, stub_inst, 0);
++}
++
++bool MacroAssembler::reachable_from_branch_short(jlong offs) {
++  if (ForceUnreachable) {
++    return false;
++  }
++  return is_simm(offs >> 2, 26);
++}
++
++void MacroAssembler::patchable_jump_far(Register ra, jlong offs) {
++  jint si18, si20;
++  guarantee(is_simm(offs, 38), "Not signed 38-bit offset");
++  split_simm38(offs, si18, si20);
++  pcaddu18i(T4, si20);
++  jirl(ra, T4, si18);
++}
++
++void MacroAssembler::patchable_jump(address target, bool force_patchable) {
++  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
++  assert(CodeCache::find_blob(target) != NULL,
++         "destination of jump not found in code cache");
++  if (force_patchable || patchable_branches()) {
++    jlong offs = target - pc();
++    if (reachable_from_branch_short(offs)) { // Short jump
++      b(offset26(target));
++      nop();
++    } else {                                 // Far jump
++      patchable_jump_far(R0, offs);
++    }
++  } else {                                   // Real short jump
++    b(offset26(target));
++  }
++}
++
++void MacroAssembler::patchable_call(address target, address call_site) {
++  jlong offs = target - (call_site ? call_site : pc());
++  if (reachable_from_branch_short(offs - BytesPerInstWord)) { // Short call
++    nop();
++    bl((offs - BytesPerInstWord) >> 2);
++  } else {                                                    // Far call
++    patchable_jump_far(RA, offs);
++  }
++}
++
++// Maybe emit a call via a trampoline.  If the code cache is small
++// trampolines won't be emitted.
++
++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) {
++  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
++  assert(entry.rspec().type() == relocInfo::runtime_call_type
++         || entry.rspec().type() == relocInfo::opt_virtual_call_type
++         || entry.rspec().type() == relocInfo::static_call_type
++         || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
++
++  // We need a trampoline if branches are far.
++  if (far_branches()) {
++    bool in_scratch_emit_size = false;
++#ifdef COMPILER2
++    // We don't want to emit a trampoline if C2 is generating dummy
++    // code during its branch shortening phase.
++    CompileTask* task = ciEnv::current()->task();
++    in_scratch_emit_size =
++      (task != NULL && is_c2_compile(task->comp_level()) &&
++       Compile::current()->in_scratch_emit_size());
++#endif
++    if (!in_scratch_emit_size) {
++      address stub = emit_trampoline_stub(offset(), entry.target());
++      if (stub == NULL) {
++        postcond(pc() == badAddress);
++        return NULL; // CodeCache is full
++      }
++    }
++  }
++
++  if (cbuf) cbuf->set_insts_mark();
++  relocate(entry.rspec());
++  if (!far_branches()) {
++    bl(entry.target());
++  } else {
++    bl(pc());
++  }
++  // just need to return a non-null address
++  postcond(pc() != badAddress);
++  return pc();
++}
++
++// Emit a trampoline stub for a call to a target which is too far away.
++//
++// code sequences:
++//
++// call-site:
++//   branch-and-link to <destination> or <trampoline stub>
++//
++// Related trampoline stub for this call site in the stub section:
++//   load the call target from the constant pool
++//   branch (RA still points to the call site above)
++
++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
++                                             address dest) {
++  // Start the stub
++  address stub = start_a_stub(NativeInstruction::nop_instruction_size
++                   + NativeCallTrampolineStub::instruction_size);
++  if (stub == NULL) {
++    return NULL;  // CodeBuffer::expand failed
++  }
++
++  // Create a trampoline stub relocation which relates this trampoline stub
++  // with the call instruction at insts_call_instruction_offset in the
++  // instructions code-section.
++  align(wordSize);
++  relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
++                                            + insts_call_instruction_offset));
++  const int stub_start_offset = offset();
++
++  // Now, create the trampoline stub's code:
++  // - load the call
++  // - call
++  pcaddi(T4, 0);
++  ld_d(T4, T4, 16);
++  jr(T4);
++  nop();  //align
++  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
++         "should be");
++  emit_int64((int64_t)dest);
++
++  const address stub_start_addr = addr_at(stub_start_offset);
++
++  NativeInstruction* ni = nativeInstruction_at(stub_start_addr);
++  assert(ni->is_NativeCallTrampolineStub_at(), "doesn't look like a trampoline");
++
++  end_a_stub();
++  return stub_start_addr;
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    beq(rs, rt, offset16(entry));
++  } else {                              // Far jump
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    beq_far(rs, rt, target(L));
++  } else {
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    bne(rs, rt, offset16(entry));
++  } else {                              // Far jump
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    bne_far(rs, rt, target(L));
++  } else {
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::blt_far(Register rs, Register rt, address entry, bool is_signed) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    if (is_signed) {
++      blt(rs, rt, offset16(entry));
++    } else {
++      bltu(rs, rt, offset16(entry));
++    }
++  } else {                              // Far jump
++    Label not_jump;
++    if (is_signed) {
++      bge(rs, rt, not_jump);
++    } else {
++      bgeu(rs, rt, not_jump);
++    }
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::blt_far(Register rs, Register rt, Label& L, bool is_signed) {
++  if (L.is_bound()) {
++    blt_far(rs, rt, target(L), is_signed);
++  } else {
++    Label not_jump;
++    if (is_signed) {
++      bge(rs, rt, not_jump);
++    } else {
++      bgeu(rs, rt, not_jump);
++    }
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bge_far(Register rs, Register rt, address entry, bool is_signed) {
++  if (is_simm16((entry - pc()) >> 2)) { // Short jump
++    if (is_signed) {
++      bge(rs, rt, offset16(entry));
++    } else {
++      bgeu(rs, rt, offset16(entry));
++    }
++  } else {                              // Far jump
++    Label not_jump;
++    if (is_signed) {
++      blt(rs, rt, not_jump);
++    } else {
++      bltu(rs, rt, not_jump);
++    }
++    b_far(entry);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::bge_far(Register rs, Register rt, Label& L, bool is_signed) {
++  if (L.is_bound()) {
++    bge_far(rs, rt, target(L), is_signed);
++  } else {
++    Label not_jump;
++    if (is_signed) {
++      blt(rs, rt, not_jump);
++    } else {
++      bltu(rs, rt, not_jump);
++    }
++    b_far(L);
++    bind(not_jump);
++  }
++}
++
++void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++  bne(rs, rt, not_taken);
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++  beq(rs, rt, not_taken);
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::bc1t_long(Label& L) {
++  Label not_taken;
++  bceqz(FCC0, not_taken);
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::blt_long(Register rs, Register rt, Label& L, bool is_signed) {
++  Label not_taken;
++  if (is_signed) {
++    bge(rs, rt, not_taken);
++  } else {
++    bgeu(rs, rt, not_taken);
++  }
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::bge_long(Register rs, Register rt, Label& L, bool is_signed) {
++  Label not_taken;
++  if (is_signed) {
++    blt(rs, rt, not_taken);
++  } else {
++    bltu(rs, rt, not_taken);
++  }
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::bc1f_long(Label& L) {
++  Label not_taken;
++  bcnez(FCC0, not_taken);
++  jmp_far(L);
++  bind(not_taken);
++}
++
++void MacroAssembler::b_far(Label& L) {
++  if (L.is_bound()) {
++    b_far(target(L));
++  } else {
++    L.add_patch_at(code(), locator());
++    if (ForceUnreachable) {
++      patchable_jump_far(R0, 0);
++    } else {
++      b(0);
++    }
++  }
++}
++
++void MacroAssembler::b_far(address entry) {
++  jlong offs = entry - pc();
++  if (reachable_from_branch_short(offs)) { // Short jump
++    b(offset26(entry));
++  } else {                                 // Far jump
++    patchable_jump_far(R0, offs);
++  }
++}
++
++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) {
++  ldx_d(rt, base, offset);
++}
++
++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) {
++  stx_d(rt, base, offset);
++}
++
++Address MacroAssembler::as_Address(AddressLiteral adr) {
++  return Address(adr.target(), adr.rspec());
++}
++
++Address MacroAssembler::as_Address(ArrayAddress adr) {
++  return Address::make_array(adr);
++}
++
++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
++  li(tmp_reg1, inc);
++  li(tmp_reg2, counter_addr);
++  amadd_w(R0, tmp_reg1, tmp_reg2);
++}
++
++void MacroAssembler::reserved_stack_check() {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // testing if reserved zone needs to be enabled
++  Label no_reserved_zone_enabling;
++
++  ld_d(AT, Address(thread, JavaThread::reserved_stack_activation_offset()));
++  sub_d(AT, SP, AT);
++  blt(AT, R0,  no_reserved_zone_enabling);
++
++  enter();   // RA and FP are live.
++  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread);
++  leave();
++
++  // We have already removed our own frame.
++  // throw_delayed_StackOverflowError will think that it's been
++  // called by our caller.
++  li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry());
++  jr(AT);
++  should_not_reach_here();
++
++  bind(no_reserved_zone_enabling);
++}
++
++int MacroAssembler::biased_locking_enter(Register lock_reg,
++                                         Register obj_reg,
++                                         Register swap_reg,
++                                         Register tmp_reg,
++                                         bool swap_reg_contains_mark,
++                                         Label& done,
++                                         Label* slow_case,
++                                         BiasedLockingCounters* counters) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++  bool need_tmp_reg = false;
++  if (tmp_reg == noreg) {
++    need_tmp_reg = true;
++    tmp_reg = T4;
++  }
++  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
++  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
++  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
++  Address saved_mark_addr(lock_reg, 0);
++
++  // Biased locking
++  // See whether the lock is currently biased toward our thread and
++  // whether the epoch is still valid
++  // Note that the runtime guarantees sufficient alignment of JavaThread
++  // pointers to allow age to be placed into low bits
++  // First check to see whether biasing is even enabled for this object
++  Label cas_label;
++  int null_check_offset = -1;
++  if (!swap_reg_contains_mark) {
++    null_check_offset = offset();
++    ld_ptr(swap_reg, mark_addr);
++  }
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  move(tmp_reg, swap_reg);
++  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
++  addi_d(AT, R0, markOopDesc::biased_lock_pattern);
++  sub_d(AT, AT, tmp_reg);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++
++  bne(AT, R0, cas_label);
++
++
++  // The bias pattern is present in the object's header. Need to check
++  // whether the bias owner and the epoch are both still current.
++  // Note that because there is no current thread register on LA we
++  // need to store off the mark word we read out of the object to
++  // avoid reloading it and needing to recheck invariants below. This
++  // store is unfortunate but it makes the overall code shorter and
++  // simpler.
++  st_ptr(swap_reg, saved_mark_addr);
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  if (swap_reg_contains_mark) {
++    null_check_offset = offset();
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  xorr(tmp_reg, tmp_reg, swap_reg);
++#ifndef OPT_THREAD
++  get_thread(swap_reg);
++  xorr(swap_reg, swap_reg, tmp_reg);
++#else
++  xorr(swap_reg, TREG, tmp_reg);
++#endif
++
++  li(AT, ~((int) markOopDesc::age_mask_in_place));
++  andr(swap_reg, swap_reg, AT);
++
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(swap_reg, R0, L);
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  beq(swap_reg, R0, done);
++  Label try_revoke_bias;
++  Label try_rebias;
++
++  // At this point we know that the header has the bias pattern and
++  // that we are not the bias owner in the current epoch. We need to
++  // figure out more details about the state of the header in order to
++  // know what operations can be legally performed on the object's
++  // header.
++
++  // If the low three bits in the xor result aren't clear, that means
++  // the prototype header is no longer biased and we have to revoke
++  // the bias on this object.
++
++  li(AT, markOopDesc::biased_lock_mask_in_place);
++  andr(AT, swap_reg, AT);
++  bne(AT, R0, try_revoke_bias);
++  // Biasing is still enabled for this data type. See whether the
++  // epoch of the current bias is still valid, meaning that the epoch
++  // bits of the mark word are equal to the epoch bits of the
++  // prototype header. (Note that the prototype header's epoch bits
++  // only change at a safepoint.) If not, attempt to rebias the object
++  // toward the current thread. Note that we must be absolutely sure
++  // that the current epoch is invalid in order to do this because
++  // otherwise the manipulations it performs on the mark word are
++  // illegal.
++
++  li(AT, markOopDesc::epoch_mask_in_place);
++  andr(AT,swap_reg, AT);
++  bne(AT, R0, try_rebias);
++  // The epoch of the current bias is still valid but we know nothing
++  // about the owner; it might be set or it might be clear. Try to
++  // acquire the bias of the object using an atomic operation. If this
++  // fails we will go in to the runtime to revoke the object's bias.
++  // Note that we first construct the presumed unbiased header so we
++  // don't accidentally blow away another thread's valid bias.
++
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  li(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
++  andr(swap_reg, swap_reg, AT);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++#ifndef OPT_THREAD
++  get_thread(tmp_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++#else
++  orr(tmp_reg, TREG, swap_reg);
++#endif
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, this means that
++  // another thread succeeded in biasing it toward itself and we
++  // need to revoke that bias. The revocation will occur in the
++  // interpreter runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++  }
++  b(done);
++
++  bind(try_rebias);
++  // At this point we know the epoch has expired, meaning that the
++  // current "bias owner", if any, is actually invalid. Under these
++  // circumstances _only_, we are allowed to use the current header's
++  // value as the comparison value when doing the cas to acquire the
++  // bias in the current epoch. In other words, we allow transfer of
++  // the bias from one thread to another directly in this situation.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++#ifndef OPT_THREAD
++  get_thread(swap_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++#else
++  orr(tmp_reg, tmp_reg, TREG);
++#endif
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, then another thread
++  // succeeded in biasing it toward itself and we need to revoke that
++  // bias. The revocation will occur in the runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++  }
++
++  b(done);
++  bind(try_revoke_bias);
++  // The prototype mark in the klass doesn't have the bias bit set any
++  // more, indicating that objects of this data type are not supposed
++  // to be biased any more. We are going to try to reset the mark of
++  // this object to the prototype value and fall through to the
++  // CAS-based locking scheme. Note that if our CAS fails, it means
++  // that another thread raced us for the privilege of revoking the
++  // bias of this particular object, so it's okay to continue in the
++  // normal locking code.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // Fall through to the normal CAS-based lock, because no matter what
++  // the result of the above CAS, some thread must have succeeded in
++  // removing the bias bit from the object's header.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++
++  bind(cas_label);
++  return null_check_offset;
++}
++
++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++
++  // Check for biased locking unlock case, which is a no-op
++  // Note: we do not have to check the thread ID for two reasons.
++  // First, the interpreter checks for IllegalMonitorStateException at
++  // a higher level. Second, if the bias was revoked while we held the
++  // lock, the object could not be rebiased toward another thread, so
++  // the bias bit would be clear.
++  ld_d(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++  andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
++  addi_d(AT, R0, markOopDesc::biased_lock_pattern);
++
++  beq(AT, temp_reg, done);
++}
++
++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
++// this method will handle the stack problem, you need not to preserve the stack space for the argument now
++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
++  Label L, E;
++
++  assert(number_of_arguments <= 4, "just check");
++
++  andi(AT, SP, 0xf);
++  beq(AT, R0, L);
++  addi_d(SP, SP, -8);
++  call(entry_point, relocInfo::runtime_call_type);
++  addi_d(SP, SP, 8);
++  b(E);
++
++  bind(L);
++  call(entry_point, relocInfo::runtime_call_type);
++  bind(E);
++}
++
++void MacroAssembler::jmp(address entry) {
++  jlong offs = entry - pc();
++  if (reachable_from_branch_short(offs)) { // Short jump
++    b(offset26(entry));
++  } else {                                 // Far jump
++    patchable_jump_far(R0, offs);
++  }
++}
++
++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::none:
++      jmp(entry);
++      break;
++    default:
++      {
++        InstructionMark im(this);
++        relocate(rtype);
++        patchable_jump(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::jmp_far(Label& L) {
++  if (L.is_bound()) {
++    assert(target(L) != NULL, "jmp most probably wrong");
++    patchable_jump(target(L), true /* force patchable */);
++  } else {
++    L.add_patch_at(code(), locator());
++    patchable_jump_far(R0, 0);
++  }
++}
++
++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_li52(AT, (long)obj);
++  st_d(AT, dst);
++}
++
++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_li52(dst, (long)obj);
++}
++
++void MacroAssembler::call(address entry) {
++  jlong offs = entry - pc();
++  if (reachable_from_branch_short(offs)) { // Short call (pc-rel)
++    bl(offset26(entry));
++  } else if (is_simm(offs, 38)) {          // Far call (pc-rel)
++    patchable_jump_far(RA, offs);
++  } else {                                 // Long call (absolute)
++    call_long(entry);
++  }
++}
++
++void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::none:
++      call(entry);
++      break;
++    case relocInfo::runtime_call_type:
++      if (!is_simm(entry - pc(), 38)) {
++        call_long(entry);
++        break;
++      }
++      // fallthrough
++    default:
++      {
++        InstructionMark im(this);
++        relocate(rtype);
++        patchable_call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::call(address entry, RelocationHolder& rh){
++  switch (rh.type()) {
++    case relocInfo::none:
++      call(entry);
++      break;
++    case relocInfo::runtime_call_type:
++      if (!is_simm(entry - pc(), 38)) {
++        call_long(entry);
++        break;
++      }
++      // fallthrough
++    default:
++      {
++        InstructionMark im(this);
++        relocate(rh);
++        patchable_call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::call_long(address entry) {
++  jlong value = (jlong)entry;
++  lu12i_w(T4, split_low20(value >> 12));
++  lu32i_d(T4, split_low20(value >> 32));
++  jirl(RA, T4, split_low12(value));
++}
++
++address MacroAssembler::ic_call(address entry, jint method_index) {
++  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
++  patchable_li52(IC_Klass, (long)Universe::non_oop_word());
++  assert(entry != NULL, "call most probably wrong");
++  InstructionMark im(this);
++  return trampoline_call(AddressLiteral(entry, rh));
++}
++
++void MacroAssembler::c2bool(Register r) {
++  sltu(r, R0, r);
++}
++
++#ifndef PRODUCT
++extern "C" void findpc(intptr_t x);
++#endif
++
++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
++  if ( ShowMessageBoxOnError ) {
++    JavaThreadState saved_state = JavaThread::current()->thread_state();
++    JavaThread::current()->set_thread_state(_thread_in_vm);
++    {
++      // In order to get locks work, we need to fake a in_VM state
++      ttyLocker ttyl;
++      ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
++      if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
++  BytecodeCounter::print();
++      }
++
++    }
++    ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
++  }
++  else
++    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
++}
++
++
++void MacroAssembler::stop(const char* msg) {
++  li(A0, (long)msg);
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  brk(17);
++}
++
++void MacroAssembler::warn(const char* msg) {
++  pushad();
++  li(A0, (long)msg);
++  push(S2);
++  li(AT, -(StackAlignmentInBytes));
++  move(S2, SP);     // use S2 as a sender SP holder
++  andr(SP, SP, AT); // align stack as required by ABI
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  move(SP, S2);     // use S2 as a sender SP holder
++  pop(S2);
++  popad();
++}
++
++void MacroAssembler::increment(Register reg, int imm) {
++  if (!imm) return;
++  if (is_simm(imm, 12)) {
++    addi_d(reg, reg, imm);
++  } else {
++    li(AT, imm);
++    add_d(reg, reg, AT);
++  }
++}
++
++void MacroAssembler::decrement(Register reg, int imm) {
++  increment(reg, -imm);
++}
++
++void MacroAssembler::increment(Address addr, int imm) {
++  if (!imm) return;
++  assert(is_simm(imm, 12), "must be");
++  ld_ptr(AT, addr);
++  addi_d(AT, AT, imm);
++  st_ptr(AT, addr);
++}
++
++void MacroAssembler::decrement(Address addr, int imm) {
++  increment(addr, -imm);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             bool check_exceptions) {
++  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2);
++  assert(arg_2 != A1, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             int number_of_arguments,
++                             bool check_exceptions) {
++  call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM_base(Register oop_result,
++                                  Register java_thread,
++                                  Register last_java_sp,
++                                  address  entry_point,
++                                  int      number_of_arguments,
++                                  bool     check_exceptions) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++  // debugging support
++  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
++  assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
++  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
++  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
++
++  assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp");
++
++  // set last Java frame before call
++  Label before_call;
++  bind(before_call);
++  set_last_Java_frame(java_thread, last_java_sp, FP, before_call);
++
++  // do the call
++  move(A0, java_thread);
++  call(entry_point, relocInfo::runtime_call_type);
++
++  // restore the thread (cannot use the pushed argument since arguments
++  // may be overwritten by C code generated by an optimizing compiler);
++  // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++  get_thread(java_thread);
++#else
++#ifdef ASSERT
++  {
++    Label L;
++    get_thread(AT);
++    beq(java_thread, AT, L);
++    stop("MacroAssembler::call_VM_base: TREG not callee saved?");
++    bind(L);
++  }
++#endif
++#endif
++
++  // discard thread and arguments
++  ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // reset last Java frame
++  reset_last_Java_frame(java_thread, false);
++
++  check_and_handle_popframe(java_thread);
++  check_and_handle_earlyret(java_thread);
++  if (check_exceptions) {
++    // check for pending exceptions (java_thread is set upon return)
++    Label L;
++    ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    beq(AT, R0, L);
++    li(AT, target(before_call));
++    push(AT);
++    jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    bind(L);
++  }
++
++  // get oop result if there is one and reset the value in the thread
++  if (oop_result->is_valid()) {
++    ld_d(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    st_d(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    verify_oop(oop_result);
++  }
++}
++
++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
++  move(V0, SP);
++  //we also reserve space for java_thread here
++  li(AT, -(StackAlignmentInBytes));
++  andr(SP, SP, AT);
++  call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
++  call_VM_leaf_base(entry_point, number_of_arguments);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
++  if (arg_0 != A0) move(A0, arg_0);
++  call_VM_leaf(entry_point, 1);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  call_VM_leaf(entry_point, 2);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
++  call_VM_leaf(entry_point, 3);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point) {
++  MacroAssembler::call_VM_leaf_base(entry_point, 0);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1) {
++  if (arg_1 != A0) move(A0, arg_1);
++  MacroAssembler::call_VM_leaf_base(entry_point, 1);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 2);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2,
++                                                   Register arg_3) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 3);
++}
++
++void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
++}
++
++void MacroAssembler::check_and_handle_popframe(Register java_thread) {
++}
++
++void MacroAssembler::null_check(Register reg, int offset) {
++  if (needs_explicit_null_check(offset)) {
++    // provoke OS NULL exception if reg = NULL by
++    // accessing M[reg] w/o changing any (non-CC) registers
++    // NOTE: cmpl is plenty here to provoke a segv
++    ld_w(AT, reg, 0);
++  } else {
++    // nothing to do, (later) access of M[reg + offset]
++    // will provoke OS NULL exception if reg = NULL
++  }
++}
++
++void MacroAssembler::enter() {
++  push2(RA, FP);
++  move(FP, SP);
++}
++
++void MacroAssembler::leave() {
++  move(SP, FP);
++  pop2(RA, FP);
++}
++
++void MacroAssembler::build_frame(int framesize) {
++  assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA");
++  assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment");
++  if (Assembler::is_simm(-framesize, 12)) {
++    addi_d(SP, SP, -framesize);
++    st_ptr(FP, Address(SP, framesize - 2 * wordSize));
++    st_ptr(RA, Address(SP, framesize - 1 * wordSize));
++    if (PreserveFramePointer)
++      addi_d(FP, SP, framesize - 2 * wordSize);
++  } else {
++    addi_d(SP, SP, -2 * wordSize);
++    st_ptr(FP, Address(SP, 0 * wordSize));
++    st_ptr(RA, Address(SP, 1 * wordSize));
++    if (PreserveFramePointer)
++      move(FP, SP);
++    li(SCR1, framesize - 2 * wordSize);
++    sub_d(SP, SP, SCR1);
++  }
++}
++
++void MacroAssembler::remove_frame(int framesize) {
++  assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA");
++  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
++  if (Assembler::is_simm(framesize, 12)) {
++    ld_ptr(FP, Address(SP, framesize - 2 * wordSize));
++    ld_ptr(RA, Address(SP, framesize - 1 * wordSize));
++    addi_d(SP, SP, framesize);
++  } else {
++    li(SCR1, framesize - 2 * wordSize);
++    add_d(SP, SP, SCR1);
++    ld_ptr(FP, Address(SP, 0 * wordSize));
++    ld_ptr(RA, Address(SP, 1 * wordSize));
++    addi_d(SP, SP, 2 * wordSize);
++  }
++}
++
++void MacroAssembler::unimplemented(const char* what) {
++  const char* buf = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("unimplemented: %s", what);
++    buf = code_string(ss.as_string());
++  }
++  stop(buf);
++}
++
++void MacroAssembler::get_thread(Register thread) {
++#ifdef MINIMIZE_RAM_USAGE
++  Register tmp;
++
++  if (thread == AT)
++    tmp = T4;
++  else
++    tmp = AT;
++
++  move(thread, SP);
++  shr(thread, PAGE_SHIFT);
++
++  push(tmp);
++  li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1));
++  andr(thread, thread, tmp);
++  shl(thread, Address::times_ptr); // sizeof(Thread *)
++  li(tmp, (long)ThreadLocalStorage::sp_map_addr());
++  add_d(tmp, tmp, thread);
++  ld_ptr(thread, tmp, 0);
++  pop(tmp);
++#else
++  if (thread != V0) {
++    push(V0);
++  }
++  pushad_except_v0();
++
++  push(S5);
++  move(S5, SP);
++  li(AT, -StackAlignmentInBytes);
++  andr(SP, SP, AT);
++  // TODO: confirm reloc
++  call(CAST_FROM_FN_PTR(address, Thread::current), relocInfo::runtime_call_type);
++  move(SP, S5);
++  pop(S5);
++
++  popad_except_v0();
++  if (thread != V0) {
++    move(thread, V0);
++    pop(V0);
++  }
++#endif // MINIMIZE_RAM_USAGE
++}
++
++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T1;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // we must set sp to zero to clear frame
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is possible
++  // that we need it only for debugging
++  if(clear_fp) {
++    st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
++}
++
++void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // we must set sp to zero to clear frame
++  st_d(R0, Address(thread, JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is
++  // possible that we need it only for debugging
++  if (clear_fp) {
++    st_d(R0, Address(thread, JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  st_d(R0, Address(thread, JavaThread::last_Java_pc_offset()));
++}
++
++// Write serialization page so VM thread can do a pseudo remote membar.
++// We use the current thread pointer to calculate a thread specific
++// offset to write to within the page. This minimizes bus traffic
++// due to cache line collision.
++void MacroAssembler::serialize_memory(Register thread, Register tmp) {
++  assert_different_registers(AT, tmp);
++  juint sps = os::get_serialize_page_shift_count();
++  juint lsb = sps + 2;
++  juint msb = sps + log2_uint(os::vm_page_size()) - 1;
++  bstrpick_w(AT, thread, msb, lsb);
++  li(tmp, os::get_memory_serialize_page());
++  alsl_d(tmp, AT, tmp, Address::times_2 - 1);
++  st_w(R0, tmp, 0);
++}
++
++void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld_d(AT, thread_reg, in_bytes(Thread::polling_page_offset()));
++    andi(AT, AT, SafepointMechanism::poll_bit());
++    bne(AT, R0, slow_path);
++  } else {
++    li(AT, SafepointSynchronize::address_of_state());
++    ld_w(AT, AT, 0);
++    addi_d(AT, AT, -SafepointSynchronize::_not_synchronized);
++    bne(AT, R0, slow_path);
++  }
++}
++
++// Just like safepoint_poll, but use an acquiring load for thread-
++// local polling.
++//
++// We need an acquire here to ensure that any subsequent load of the
++// global SafepointSynchronize::_state flag is ordered after this load
++// of the local Thread::_polling page.  We don't want this poll to
++// return false (i.e. not safepointing) and a later poll of the global
++// SafepointSynchronize::_state spuriously to return true.
++//
++// This is to avoid a race when we're in a native->Java transition
++// racing the code which wakes up from a safepoint.
++//
++void MacroAssembler::safepoint_poll_acquire(Label& slow_path, Register thread_reg) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld_d(AT, thread_reg, in_bytes(Thread::polling_page_offset()));
++    membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore));
++    andi(AT, AT, SafepointMechanism::poll_bit());
++    bne(AT, R0, slow_path);
++  } else {
++    safepoint_poll(slow_path, thread_reg);
++  }
++}
++
++// Calls to C land
++//
++// When entering C land, the fp, & sp of the last Java frame have to be recorded
++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
++// has to be reset to 0. This is required to allow proper stack traversal.
++void MacroAssembler::set_last_Java_frame(Register java_thread,
++                                         Register last_java_sp,
++                                         Register last_java_fp,
++                                         Label& last_java_pc) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc
++  lipc(AT, last_java_pc);
++  st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() +
++                                   JavaFrameAnchor::last_Java_pc_offset()));
++
++  st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++}
++
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         Label& last_java_pc) {
++  set_last_Java_frame(NOREG, last_java_sp, last_java_fp, last_java_pc);
++}
++
++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
++void MacroAssembler::tlab_allocate(Register obj,
++                                   Register var_size_in_bytes,
++                                   int con_size_in_bytes,
++                                   Register t1,
++                                   Register t2,
++                                   Label& slow_case) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
++}
++
++// Defines obj, preserves var_size_in_bytes
++void MacroAssembler::eden_allocate(Register obj,
++                                   Register var_size_in_bytes,
++                                   int con_size_in_bytes,
++                                   Register t1,
++                                   Label& slow_case) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
++}
++
++
++void MacroAssembler::incr_allocated_bytes(Register thread,
++                                          Register var_size_in_bytes,
++                                          int con_size_in_bytes,
++                                          Register t1) {
++  if (!thread->is_valid()) {
++#ifndef OPT_THREAD
++    assert(t1->is_valid(), "need temp reg");
++    thread = t1;
++    get_thread(thread);
++#else
++    thread = TREG;
++#endif
++  }
++
++  ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++  if (var_size_in_bytes->is_valid()) {
++    add_d(AT, AT, var_size_in_bytes);
++  } else {
++    addi_d(AT, AT, con_size_in_bytes);
++  }
++  st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++}
++
++void MacroAssembler::li(Register rd, jlong value) {
++  jlong hi12 = bitfield(value, 52, 12);
++  jlong lo52 = bitfield(value,  0, 52);
++
++  if (hi12 != 0 && lo52 == 0) {
++    lu52i_d(rd, R0, hi12);
++  } else {
++    jlong hi20 = bitfield(value, 32, 20);
++    jlong lo20 = bitfield(value, 12, 20);
++    jlong lo12 = bitfield(value,  0, 12);
++
++    if (lo20 == 0) {
++      ori(rd, R0, lo12);
++    } else if (bitfield(simm12(lo12), 12, 20) == lo20) {
++      addi_w(rd, R0, simm12(lo12));
++    } else {
++      lu12i_w(rd, lo20);
++      if (lo12 != 0)
++        ori(rd, rd, lo12);
++    }
++    if (hi20 != bitfield(simm20(lo20), 20, 20))
++      lu32i_d(rd, hi20);
++    if (hi12 != bitfield(simm20(hi20), 20, 12))
++      lu52i_d(rd, rd, hi12);
++  }
++}
++
++void MacroAssembler::patchable_li52(Register rd, jlong value) {
++  int count = 0;
++
++  if (value <= max_jint && value >= min_jint) {
++    if (is_simm(value, 12)) {
++      addi_d(rd, R0, value);
++      count++;
++    } else {
++      lu12i_w(rd, split_low20(value >> 12));
++      count++;
++      if (split_low12(value)) {
++        ori(rd, rd, split_low12(value));
++        count++;
++      }
++    }
++  } else if (is_simm(value, 52)) {
++    lu12i_w(rd, split_low20(value >> 12));
++    count++;
++    if (split_low12(value)) {
++      ori(rd, rd, split_low12(value));
++      count++;
++    }
++    lu32i_d(rd, split_low20(value >> 32));
++    count++;
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 3) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::lipc(Register rd, Label& L) {
++  if (L.is_bound()) {
++    jint offs = (target(L) - pc()) >> 2;
++    guarantee(is_simm(offs, 20), "Not signed 20-bit offset");
++    pcaddi(rd, offs);
++  } else {
++    InstructionMark im(this);
++    L.add_patch_at(code(), locator());
++    pcaddi(rd, 0);
++  }
++}
++
++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
++  assert(UseCompressedClassPointers, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int klass_index = oop_recorder()->find_index(k);
++  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++  long narrowKlass = (long)Klass::encode_klass(k);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_li52(dst, narrowKlass);
++}
++
++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
++  assert(UseCompressedOops, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int oop_index = oop_recorder()->find_index(obj);
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_li52(dst, oop_index);
++}
++
++// ((OopHandle)result).resolve();
++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
++  // OopHandle::resolve is an indirection.
++  access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG);
++}
++
++void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
++  // get mirror
++  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++  ld_ptr(mirror, method, in_bytes(Method::const_offset()));
++  ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset()));
++  ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes());
++  ld_ptr(mirror, mirror, mirror_offset);
++  resolve_oop_handle(mirror, tmp);
++}
++
++void MacroAssembler::verify_oop(Register reg, const char* s) {
++  if (!VerifyOops) return;
++
++  const char * b = NULL;
++  stringStream ss;
++  ss.print("verify_oop: %s: %s", reg->name(), s);
++  b = code_string(ss.as_string());
++
++  addi_d(SP, SP, -6 * wordSize);
++  st_ptr(SCR1, Address(SP, 0 * wordSize));
++  st_ptr(SCR2, Address(SP, 1 * wordSize));
++  st_ptr(RA, Address(SP, 2 * wordSize));
++  st_ptr(A0, Address(SP, 3 * wordSize));
++  st_ptr(A1, Address(SP, 4 * wordSize));
++
++  move(A1, reg);
++  patchable_li52(A0, (uintptr_t)(address)b); // Fixed size instructions
++  li(SCR2, StubRoutines::verify_oop_subroutine_entry_address());
++  ld_ptr(SCR2, Address(SCR2));
++  jalr(SCR2);
++
++  ld_ptr(SCR1, Address(SP, 0 * wordSize));
++  ld_ptr(SCR2, Address(SP, 1 * wordSize));
++  ld_ptr(RA, Address(SP, 2 * wordSize));
++  ld_ptr(A0, Address(SP, 3 * wordSize));
++  ld_ptr(A1, Address(SP, 4 * wordSize));
++  addi_d(SP, SP, 6 * wordSize);
++}
++
++void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
++  if (!VerifyOops) return;
++
++  const char* b = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("verify_oop_addr: %s", s);
++    b = code_string(ss.as_string());
++  }
++
++  addi_d(SP, SP, -6 * wordSize);
++  st_ptr(SCR1, Address(SP, 0 * wordSize));
++  st_ptr(SCR2, Address(SP, 1 * wordSize));
++  st_ptr(RA, Address(SP, 2 * wordSize));
++  st_ptr(A0, Address(SP, 3 * wordSize));
++  st_ptr(A1, Address(SP, 4 * wordSize));
++
++  patchable_li52(A0, (uintptr_t)(address)b); // Fixed size instructions
++  // addr may contain sp so we will have to adjust it based on the
++  // pushes that we just did.
++  if (addr.uses(SP)) {
++    lea(A1, addr);
++    ld_ptr(A1, Address(A1, 6 * wordSize));
++  } else {
++    ld_ptr(A1, addr);
++  }
++
++  // call indirectly to solve generation ordering problem
++  li(SCR2, StubRoutines::verify_oop_subroutine_entry_address());
++  ld_ptr(SCR2, Address(SCR2));
++  jalr(SCR2);
++
++  ld_ptr(SCR1, Address(SP, 0 * wordSize));
++  ld_ptr(SCR2, Address(SP, 1 * wordSize));
++  ld_ptr(RA, Address(SP, 2 * wordSize));
++  ld_ptr(A0, Address(SP, 3 * wordSize));
++  ld_ptr(A1, Address(SP, 4 * wordSize));
++  addi_d(SP, SP, 6 * wordSize);
++}
++
++// used registers :  SCR1, SCR2
++void MacroAssembler::verify_oop_subroutine() {
++  // RA: ra
++  // A0: char* error message
++  // A1: oop   object to verify
++  Label exit, error;
++  // increment counter
++  li(SCR2, (long)StubRoutines::verify_oop_count_addr());
++  ld_w(SCR1, SCR2, 0);
++  addi_d(SCR1, SCR1, 1);
++  st_w(SCR1, SCR2, 0);
++
++  // make sure object is 'reasonable'
++  beqz(A1, exit);         // if obj is NULL it is ok
++
++  // Check if the oop is in the right area of memory
++  // const int oop_mask = Universe::verify_oop_mask();
++  // const int oop_bits = Universe::verify_oop_bits();
++  const uintptr_t oop_mask = Universe::verify_oop_mask();
++  const uintptr_t oop_bits = Universe::verify_oop_bits();
++  li(SCR1, oop_mask);
++  andr(SCR2, A1, SCR1);
++  li(SCR1, oop_bits);
++  bne(SCR2, SCR1, error);
++
++  // make sure klass is 'reasonable'
++  // add for compressedoops
++  load_klass(SCR2, A1);
++  beqz(SCR2, error);                        // if klass is NULL it is broken
++  // return if everything seems ok
++  bind(exit);
++
++  jr(RA);
++
++  // handle errors
++  bind(error);
++  pushad();
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  popad();
++  jr(RA);
++}
++
++void MacroAssembler::verify_tlab(Register t1, Register t2) {
++#ifdef ASSERT
++  assert_different_registers(t1, t2, AT);
++  if (UseTLAB && VerifyOops) {
++    Label next, ok;
++
++    get_thread(t1);
++
++    ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
++    bgeu(t2, AT, next);
++
++    stop("assert(top >= start)");
++
++    bind(next);
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
++    bgeu(AT, t2, ok);
++
++    stop("assert(top <= end)");
++
++    bind(ok);
++
++  }
++#endif
++}
++
++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
++                                                      Register tmp,
++                                                      int offset) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++  return RegisterOrConstant(tmp);
++}
++
++void MacroAssembler::hswap(Register reg) {
++  //short
++  //andi(reg, reg, 0xffff);
++  srli_w(AT, reg, 8);
++  slli_w(reg, reg, 24);
++  srai_w(reg, reg, 16);
++  orr(reg, reg, AT);
++}
++
++void MacroAssembler::huswap(Register reg) {
++  srli_d(AT, reg, 8);
++  slli_d(reg, reg, 24);
++  srli_d(reg, reg, 16);
++  orr(reg, reg, AT);
++  bstrpick_d(reg, reg, 15, 0);
++}
++
++// something funny to do this will only one more register AT
++// 32 bits
++void MacroAssembler::swap(Register reg) {
++  srli_w(AT, reg, 8);
++  slli_w(reg, reg, 24);
++  orr(reg, reg, AT);
++  //reg : 4 1 2 3
++  srli_w(AT, AT, 16);
++  xorr(AT, AT, reg);
++  andi(AT, AT, 0xff);
++  //AT : 0 0 0 1^3);
++  xorr(reg, reg, AT);
++  //reg : 4 1 2 1
++  slli_w(AT, AT, 16);
++  xorr(reg, reg, AT);
++  //reg : 4 3 2 1
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register resflag, bool retold, bool barrier) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  Label again, succ, fail;
++
++  bind(again);
++  ll_d(resflag, addr);
++  bne(resflag, oldval, fail);
++  move(resflag, newval);
++  sc_d(resflag, addr);
++  beqz(resflag, again);
++  b(succ);
++
++  bind(fail);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  move(resflag, R0);
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register tmp, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  ll_d(tmp, addr);
++  bne(tmp, oldval, neq);
++  move(tmp, newval);
++  sc_d(tmp, addr);
++  beqz(tmp, again);
++  b(succ);
++
++  bind(neq);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail)
++    b(*fail);
++}
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval,
++                               Register resflag, bool sign, bool retold, bool barrier) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  Label again, succ, fail;
++
++  bind(again);
++  ll_w(resflag, addr);
++  if (!sign)
++    lu32i_d(resflag, 0);
++  bne(resflag, oldval, fail);
++  move(resflag, newval);
++  sc_w(resflag, addr);
++  beqz(resflag, again);
++  b(succ);
++
++  bind(fail);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  move(resflag, R0);
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                               bool sign, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  ll_w(tmp, addr);
++  if (!sign)
++    lu32i_d(tmp, 0);
++  bne(tmp, oldval, neq);
++  move(tmp, newval);
++  sc_w(tmp, addr);
++  beqz(tmp, again);
++  b(succ);
++
++  bind(neq);
++  if (barrier)
++    dbar(0x700);
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail)
++    b(*fail);
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++#ifdef COMPILER2
++// Fast_Lock and Fast_Unlock used by C2
++
++// Because the transitions from emitted code to the runtime
++// monitorenter/exit helper stubs are so slow it's critical that
++// we inline both the stack-locking fast-path and the inflated fast path.
++//
++// See also: cmpFastLock and cmpFastUnlock.
++//
++// What follows is a specialized inline transliteration of the code
++// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
++// another option would be to emit TrySlowEnter and TrySlowExit methods
++// at startup-time.  These methods would accept arguments as
++// (Obj, Self, box, Scratch) and return success-failure
++// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
++// In practice, however, the # of lock sites is bounded and is usually small.
++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
++// if the processor uses simple bimodal branch predictors keyed by EIP
++// Since the helper routines would be called from multiple synchronization
++// sites.
++//
++// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
++// to those specialized methods.  That'd give us a mostly platform-independent
++// implementation that the JITs could optimize and inline at their pleasure.
++// Done correctly, the only time we'd need to cross to native could would be
++// to park() or unpark() threads.  We'd also need a few more unsafe operators
++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
++// (b) explicit barriers or fence operations.
++//
++// TODO:
++//
++// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
++//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
++//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
++//    the lock operators would typically be faster than reifying Self.
++//
++// *  Ideally I'd define the primitives as:
++//       fast_lock   (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED.
++//       fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED
++//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
++//    Instead, we're stuck with a rather awkward and brittle register assignments below.
++//    Furthermore the register assignments are overconstrained, possibly resulting in
++//    sub-optimal code near the synchronization site.
++//
++// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
++//    Alternately, use a better sp-proximity test.
++//
++// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
++//    Either one is sufficient to uniquely identify a thread.
++//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
++//
++// *  Intrinsify notify() and notifyAll() for the common cases where the
++//    object is locked by the calling thread but the waitlist is empty.
++//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
++//
++// *  use jccb and jmpb instead of jcc and jmp to improve code density.
++//    But beware of excessive branch density on AMD Opterons.
++//
++// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
++//    or failure of the fast-path.  If the fast-path fails then we pass
++//    control to the slow-path, typically in C.  In Fast_Lock and
++//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
++//    will emit a conditional branch immediately after the node.
++//    So we have branches to branches and lots of ICC.ZF games.
++//    Instead, it might be better to have C2 pass a "FailureLabel"
++//    into Fast_Lock and Fast_Unlock.  In the case of success, control
++//    will drop through the node.  ICC.ZF is undefined at exit.
++//    In the case of failure, the node will branch directly to the
++//    FailureLabel
++
++// obj: object to lock
++// box: on-stack box address (displaced header location)
++// tmp: tmp -- KILLED
++// scr: tmp -- KILLED
++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg,
++                               Register tmpReg, Register scrReg) {
++  Label IsInflated, DONE, DONE_SET;
++
++  // Ensure the register assignents are disjoint
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastLock");
++
++  if (PrintBiasedLockingStatistics) {
++    atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg);
++  }
++
++  if (EmitSync & 1) {
++    move(AT, R0);
++    return;
++  } else
++    if (EmitSync & 2) {
++      Label DONE_LABEL ;
++      if (UseBiasedLocking) {
++        // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
++        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
++      }
++
++      ld_d(tmpReg, Address(objReg, 0)) ;          // fetch markword
++      ori(tmpReg, tmpReg, 0x1);
++      st_d(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
++
++      cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg
++
++      // Recursive locking
++      sub_d(tmpReg, tmpReg, SP);
++      li(AT, (7 - os::vm_page_size() ));
++      andr(tmpReg, tmpReg, AT);
++      st_d(tmpReg, Address(boxReg, 0));
++      bind(DONE_LABEL) ;
++    } else {
++      // Possible cases that we'll encounter in fast_lock
++      // ------------------------------------------------
++      // * Inflated
++      //    -- unlocked
++      //    -- Locked
++      //       = by self
++      //       = by other
++      // * biased
++      //    -- by Self
++      //    -- by other
++      // * neutral
++      // * stack-locked
++      //    -- by self
++      //       = sp-proximity test hits
++      //       = sp-proximity test generates false-negative
++      //    -- by other
++      //
++
++      // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
++      // order to reduce the number of conditional branches in the most common cases.
++      // Beware -- there's a subtle invariant that fetch of the markword
++      // at [FETCH], below, will never observe a biased encoding (*101b).
++      // If this invariant is not held we risk exclusion (safety) failure.
++      if (UseBiasedLocking && !UseOptoBiasInlining) {
++        Label succ, fail;
++        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL);
++        b(fail);
++        bind(succ);
++        li(resReg, 1);
++        b(DONE);
++        bind(fail);
++      }
++
++      ld_d(tmpReg, Address(objReg, 0)); //Fetch the markword of the object.
++      andi(AT, tmpReg, markOopDesc::monitor_value);
++      bnez(AT, IsInflated); // inflated vs stack-locked|neutral|bias
++
++      // Attempt stack-locking ...
++      ori(tmpReg, tmpReg, markOopDesc::unlocked_value);
++      st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
++
++      if (PrintBiasedLockingStatistics) {
++        Label SUCC, FAIL;
++        cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg
++        bind(SUCC);
++        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++        li(resReg, 1);
++        b(DONE);
++        bind(FAIL);
++      } else {
++        // If cmpxchg is succ, then scrReg = 1
++        cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg
++      }
++
++      // Recursive locking
++      // The object is stack-locked: markword contains stack pointer to BasicLock.
++      // Locked by current thread if difference with current SP is less than one page.
++      sub_d(tmpReg, tmpReg, SP);
++      li(AT, 7 - os::vm_page_size());
++      andr(tmpReg, tmpReg, AT);
++      st_d(tmpReg, Address(boxReg, 0));
++
++      if (PrintBiasedLockingStatistics) {
++        Label L;
++        // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
++        bnez(tmpReg, L);
++        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++        bind(L);
++      }
++
++      sltui(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0
++      b(DONE);
++
++      bind(IsInflated);
++      // The object's monitor m is unlocked iff m->owner == NULL,
++      // otherwise m->owner may contain a thread or a stack address.
++
++      // TODO: someday avoid the ST-before-CAS penalty by
++      // relocating (deferring) the following ST.
++      // We should also think about trying a CAS without having
++      // fetched _owner.  If the CAS is successful we may
++      // avoid an RTO->RTS upgrade on the $line.
++      // Without cast to int32_t a movptr will destroy r10 which is typically obj
++      li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
++      st_d(AT, Address(boxReg, 0));
++
++      ld_d(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++      // if (m->owner != 0) => AT = 0, goto slow path.
++      move(scrReg, R0);
++      bnez(AT, DONE_SET);
++
++#ifndef OPT_THREAD
++      get_thread(TREG) ;
++#endif
++      // It's inflated and appears unlocked
++      addi_d(tmpReg, tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2);
++      cmpxchg(Address(tmpReg, 0), R0, TREG, scrReg, false, false);
++      // Intentional fall-through into DONE ...
++
++      bind(DONE_SET);
++      move(resReg, scrReg);
++
++      // DONE is a hot target - we'd really like to place it at the
++      // start of cache line by padding with NOPs.
++      // See the AMD and Intel software optimization manuals for the
++      // most efficient "long" NOP encodings.
++      // Unfortunately none of our alignment mechanisms suffice.
++      bind(DONE);
++      // At DONE the resReg is set as follows ...
++      // Fast_Unlock uses the same protocol.
++      // resReg == 1 -> Success
++      // resREg == 0 -> Failure - force control through the slow-path
++
++      // Avoid branch-to-branch on AMD processors
++      // This appears to be superstition.
++      if (EmitSync & 32) nop() ;
++
++    }
++}
++
++// obj: object to unlock
++// box: box address (displaced header location), killed.
++// tmp: killed tmp; cannot be obj nor box.
++//
++// Some commentary on balanced locking:
++//
++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
++// Methods that don't have provably balanced locking are forced to run in the
++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
++// The interpreter provides two properties:
++// I1:  At return-time the interpreter automatically and quietly unlocks any
++//      objects acquired the current activation (frame).  Recall that the
++//      interpreter maintains an on-stack list of locks currently held by
++//      a frame.
++// I2:  If a method attempts to unlock an object that is not held by the
++//      the frame the interpreter throws IMSX.
++//
++// Lets say A(), which has provably balanced locking, acquires O and then calls B().
++// B() doesn't have provably balanced locking so it runs in the interpreter.
++// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
++// is still locked by A().
++//
++// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
++// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
++// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
++
++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg,
++                                 Register tmpReg, Register scrReg) {
++  Label DONE, DONE_SET, Stacked, Inflated;
++
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastUnlock");
++
++  if (EmitSync & 4) {
++    // Disable - inhibit all inlining.  Force control through the slow-path
++    move(AT, R0);
++    return;
++  } else
++    if (EmitSync & 8) {
++      Label DONE_LABEL ;
++      if (UseBiasedLocking) {
++        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
++      }
++      // classic stack-locking code ...
++      ld_d(tmpReg, Address(boxReg, 0)) ;
++      assert_different_registers(AT, tmpReg);
++      li(AT, 0x1);
++      beq(tmpReg, R0, DONE_LABEL) ;
++
++      cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
++      bind(DONE_LABEL);
++    } else {
++      Label CheckSucc;
++
++      // Critically, the biased locking test must have precedence over
++      // and appear before the (box->dhw == 0) recursive stack-lock test.
++      if (UseBiasedLocking && !UseOptoBiasInlining) {
++        Label succ, fail;
++        biased_locking_exit(objReg, tmpReg, succ);
++        b(fail);
++        bind(succ);
++        li(resReg, 1);
++        b(DONE);
++        bind(fail);
++      }
++
++      ld_d(tmpReg, Address(boxReg, 0)); // Examine the displaced header
++      sltui(AT, tmpReg, 1);
++      beqz(tmpReg, DONE_SET); // 0 indicates recursive stack-lock
++
++      ld_d(tmpReg, Address(objReg, 0)); // Examine the object's markword
++      andi(AT, tmpReg, markOopDesc::monitor_value);
++      beqz(AT, Stacked); // Inflated?
++
++      bind(Inflated);
++      // It's inflated.
++      // Despite our balanced locking property we still check that m->_owner == Self
++      // as java routines or native JNI code called by this thread might
++      // have released the lock.
++      // Refer to the comments in synchronizer.cpp for how we might encode extra
++      // state in _succ so we can avoid fetching EntryList|cxq.
++      //
++      // I'd like to add more cases in fast_lock() and fast_unlock() --
++      // such as recursive enter and exit -- but we have to be wary of
++      // I$ bloat, T$ effects and BP$ effects.
++      //
++      // If there's no contention try a 1-0 exit.  That is, exit without
++      // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
++      // we detect and recover from the race that the 1-0 exit admits.
++      //
++      // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
++      // before it STs null into _owner, releasing the lock.  Updates
++      // to data protected by the critical section must be visible before
++      // we drop the lock (and thus before any other thread could acquire
++      // the lock and observe the fields protected by the lock).
++#ifndef OPT_THREAD
++      get_thread(TREG);
++#endif
++
++      // It's inflated
++      ld_d(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++      xorr(scrReg, scrReg, TREG);
++
++      ld_d(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2));
++      orr(scrReg, scrReg, AT);
++
++      move(AT, R0);
++      bnez(scrReg, DONE_SET);
++
++      ld_d(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2));
++      ld_d(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2));
++      orr(scrReg, scrReg, AT);
++
++      move(AT, R0);
++      bnez(scrReg, DONE_SET);
++
++      membar(Assembler::Membar_mask_bits(LoadStore|StoreStore));
++      st_d(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++      li(resReg, 1);
++      b(DONE);
++
++      bind(Stacked);
++      ld_d(tmpReg, Address(boxReg, 0));
++      cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
++
++      bind(DONE_SET);
++      move(resReg, AT);
++
++      if (EmitSync & 65536) {
++        bind (CheckSucc);
++      }
++
++      bind(DONE);
++
++      // Avoid branch to branch on AMD processors
++      if (EmitSync & 32768) { nop() ; }
++    }
++}
++#endif // COMPILER2
++
++void MacroAssembler::align(int modulus) {
++  while (offset() % modulus != 0) nop();
++}
++
++
++void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
++  //Unimplemented();
++}
++
++Register caller_saved_registers[]           = {T7, T5, T6, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP};
++Register caller_saved_registers_except_v0[] = {T7, T5, T6,     A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP};
++
++  //TODO: LA
++//In LA, F0~23 are all caller-saved registers
++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
++
++// We preserve all caller-saved register
++void  MacroAssembler::pushad(){
++  int i;
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  addi_d(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    st_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  addi_d(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++};
++
++void  MacroAssembler::popad(){
++  int i;
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  addi_d(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    ld_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  addi_d(SP, SP, len * wordSize);
++};
++
++// We preserve all caller-saved register except V0
++void MacroAssembler::pushad_except_v0() {
++  int i;
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  addi_d(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    st_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  addi_d(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++}
++
++void MacroAssembler::popad_except_v0() {
++  int i;
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++) {
++    fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  addi_d(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  for (i = 0; i < len; i++) {
++    ld_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++  addi_d(SP, SP, len * wordSize);
++}
++
++void MacroAssembler::push2(Register reg1, Register reg2) {
++  addi_d(SP, SP, -16);
++  st_d(reg1, SP, 8);
++  st_d(reg2, SP, 0);
++}
++
++void MacroAssembler::pop2(Register reg1, Register reg2) {
++  ld_d(reg1, SP, 8);
++  ld_d(reg2, SP, 0);
++  addi_d(SP, SP, 16);
++}
++
++void MacroAssembler::push(unsigned int bitset) {
++  unsigned char regs[31];
++  int count = 0;
++
++  bitset >>= 1;
++  for (int reg = 1; reg < 31; reg++) {
++    if (1 & bitset)
++      regs[count++] = reg;
++    bitset >>= 1;
++  }
++
++  addi_d(SP, SP, -align_up(count, 2) * wordSize);
++  for (int i = 0; i < count; i ++)
++    st_d(as_Register(regs[i]), SP, i * wordSize);
++}
++
++void MacroAssembler::pop(unsigned int bitset) {
++  unsigned char regs[31];
++  int count = 0;
++
++  bitset >>= 1;
++  for (int reg = 1; reg < 31; reg++) {
++    if (1 & bitset)
++      regs[count++] = reg;
++    bitset >>= 1;
++  }
++
++  for (int i = 0; i < count; i ++)
++    ld_d(as_Register(regs[i]), SP, i * wordSize);
++  addi_d(SP, SP, align_up(count, 2) * wordSize);
++}
++
++// for UseCompressedOops Option
++void MacroAssembler::load_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    ld_wu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++    decode_klass_not_null(dst);
++  } else {
++    ld_d(dst, src, oopDesc::klass_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::store_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    encode_klass_not_null(src);
++    st_w(src, dst, oopDesc::klass_offset_in_bytes());
++  } else {
++    st_d(src, dst, oopDesc::klass_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::load_prototype_header(Register dst, Register src) {
++  load_klass(dst, src);
++  ld_d(dst, Address(dst, Klass::prototype_header_offset()));
++}
++
++void MacroAssembler::store_klass_gap(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    st_w(src, dst, oopDesc::klass_gap_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
++                                    Register tmp1, Register thread_tmp) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  } else {
++    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  }
++}
++
++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
++                                     Register tmp1, Register tmp2) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2);
++  } else {
++    bs->store_at(this, decorators, type, dst, src, tmp1, tmp2);
++  }
++}
++
++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
++                                   Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
++}
++
++// Doesn't do verfication, generates fixed size code
++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
++                                            Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
++}
++
++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
++                                    Register tmp2, DecoratorSet decorators) {
++  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
++}
++
++// Used for storing NULLs.
++void MacroAssembler::store_heap_oop_null(Address dst) {
++  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
++}
++
++#ifdef ASSERT
++void MacroAssembler::verify_heapbase(const char* msg) {
++  assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++}
++#endif
++
++// Algorithm must match oop.inline.hpp encode_heap_oop.
++void MacroAssembler::encode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shr(r, LogMinObjAlignmentInBytes);
++    }
++    return;
++  }
++
++  sub_d(AT, r, S5_heapbase);
++  maskeqz(r, AT, r);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      srli_d(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) {
++        move(dst, src);
++      }
++    }
++    return;
++  }
++
++  sub_d(AT, src, S5_heapbase);
++  maskeqz(dst, AT, src);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(dst, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register r) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(r, R0, ok);
++    stop("null oop passed to encode_heap_oop_not_null");
++    bind(ok);
++  }
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop_not_null");
++  if (Universe::narrow_oop_base() != NULL) {
++    sub_d(r, r, S5_heapbase);
++  }
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(src, R0, ok);
++    stop("null oop passed to encode_heap_oop_not_null2");
++    bind(ok);
++  }
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      srli_d(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) {
++        move(dst, src);
++      }
++    }
++    return;
++  }
++
++  sub_d(dst, src, S5_heapbase);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(dst, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::decode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++    return;
++  }
++
++  move(AT, r);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (LogMinObjAlignmentInBytes <= 4) {
++      alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++    } else {
++      shl(r, LogMinObjAlignmentInBytes);
++      add_d(r, r, S5_heapbase);
++    }
++  } else {
++    add_d(r, r, S5_heapbase);
++  }
++  maskeqz(r, r, AT);
++  verify_oop(r, "broken oop in decode_heap_oop");
++}
++
++void MacroAssembler::decode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      slli_d(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) {
++        move(dst, src);
++      }
++    }
++    return;
++  }
++
++  Register cond;
++  if (dst == src) {
++    cond = AT;
++    move(cond, src);
++  } else {
++    cond = src;
++  }
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (LogMinObjAlignmentInBytes <= 4) {
++      alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++    } else {
++      slli_d(dst, src, LogMinObjAlignmentInBytes);
++      add_d(dst, dst, S5_heapbase);
++    }
++  } else {
++    add_d(dst, src, S5_heapbase);
++  }
++  maskeqz(dst, dst, cond);
++  verify_oop(dst, "broken oop in decode_heap_oop");
++}
++
++void MacroAssembler::decode_heap_oop_not_null(Register r) {
++  // Note: it will change flags
++  assert(UseCompressedOops, "should only be used for compressed headers");
++  assert(Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (Universe::narrow_oop_base() != NULL) {
++      if (LogMinObjAlignmentInBytes <= 4) {
++        alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++      } else {
++        shl(r, LogMinObjAlignmentInBytes);
++        add_d(r, r, S5_heapbase);
++      }
++    } else {
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    assert(Universe::narrow_oop_base() == NULL, "sanity");
++  }
++}
++
++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
++  assert(UseCompressedOops, "should only be used for compressed headers");
++  assert(Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (Universe::narrow_oop_base() != NULL) {
++      if (LogMinObjAlignmentInBytes <= 4) {
++        alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1);
++      } else {
++        slli_d(dst, src, LogMinObjAlignmentInBytes);
++        add_d(dst, dst, S5_heapbase);
++      }
++    } else {
++      slli_d(dst, src, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    assert (Universe::narrow_oop_base() == NULL, "sanity");
++    if (dst != src) {
++      move(dst, src);
++    }
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register r) {
++  if (Universe::narrow_klass_base() != NULL) {
++    if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0
++        && Universe::narrow_klass_shift() == 0) {
++      bstrpick_d(r, r, 31, 0);
++      return;
++    }
++    assert(r != AT, "Encoding a klass in AT");
++    li(AT, (int64_t)Universe::narrow_klass_base());
++    sub_d(r, r, AT);
++  }
++  if (Universe::narrow_klass_shift() != 0) {
++    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    shr(r, LogKlassAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
++  if (dst == src) {
++    encode_klass_not_null(src);
++  } else {
++    if (Universe::narrow_klass_base() != NULL) {
++      if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0
++          && Universe::narrow_klass_shift() == 0) {
++        bstrpick_d(dst, src, 31, 0);
++        return;
++      }
++      li(dst, (int64_t)Universe::narrow_klass_base());
++      sub_d(dst, src, dst);
++      if (Universe::narrow_klass_shift() != 0) {
++        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        shr(dst, LogKlassAlignmentInBytes);
++      }
++    } else {
++      if (Universe::narrow_klass_shift() != 0) {
++        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        srli_d(dst, src, LogKlassAlignmentInBytes);
++      } else {
++        move(dst, src);
++      }
++    }
++  }
++}
++
++void MacroAssembler::decode_klass_not_null(Register r) {
++  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++  assert(r != AT, "Decoding a klass in AT");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_klass_base() != NULL) {
++    if (Universe::narrow_klass_shift() == 0) {
++      if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0) {
++        lu32i_d(r, (uint64_t)Universe::narrow_klass_base() >> 32);
++      } else {
++        li(AT, (int64_t)Universe::narrow_klass_base());
++        add_d(r, r, AT);
++      }
++    } else {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
++      li(AT, (int64_t)Universe::narrow_klass_base());
++      alsl_d(r, r, AT, Address::times_8 - 1);
++    }
++  } else {
++    if (Universe::narrow_klass_shift() != 0) {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++      shl(r, LogKlassAlignmentInBytes);
++    }
++  }
++}
++
++void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
++  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++  if (dst == src) {
++    decode_klass_not_null(dst);
++  } else {
++    // Cannot assert, unverified entry point counts instructions (see .ad file)
++    // vtableStubs also counts instructions in pd_code_size_limit.
++    // Also do not verify_oop as this is called by verify_oop.
++    if (Universe::narrow_klass_base() != NULL) {
++      if (Universe::narrow_klass_shift() == 0) {
++        if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0) {
++          move(dst, src);
++          lu32i_d(dst, (uint64_t)Universe::narrow_klass_base() >> 32);
++        } else {
++          li(dst, (int64_t)Universe::narrow_klass_base());
++          add_d(dst, dst, src);
++        }
++      } else {
++        assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
++        li(dst, (int64_t)Universe::narrow_klass_base());
++        alsl_d(dst, src, dst, Address::times_8 - 1);
++      }
++    } else {
++      if (Universe::narrow_klass_shift() != 0) {
++        assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        slli_d(dst, src, LogKlassAlignmentInBytes);
++      } else {
++        move(dst, src);
++      }
++    }
++  }
++}
++
++void MacroAssembler::reinit_heapbase() {
++  if (UseCompressedOops || UseCompressedClassPointers) {
++    if (Universe::heap() != NULL) {
++      if (Universe::narrow_oop_base() == NULL) {
++        move(S5_heapbase, R0);
++      } else {
++        li(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
++      }
++    } else {
++      li(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
++      ld_d(S5_heapbase, S5_heapbase, 0);
++    }
++  }
++}
++
++void MacroAssembler::check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success) {
++//implement ind   gen_subtype_check
++  Label L_failure;
++  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
++  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
++  bind(L_failure);
++}
++
++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   Label* L_slow_path,
++                                        RegisterOrConstant super_check_offset) {
++  assert_different_registers(sub_klass, super_klass, temp_reg);
++  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
++  if (super_check_offset.is_register()) {
++    assert_different_registers(sub_klass, super_klass,
++                               super_check_offset.as_register());
++  } else if (must_load_sco) {
++    assert(temp_reg != noreg, "supply either a temp or a register offset");
++  }
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  int sco_offset = in_bytes(Klass::super_check_offset_offset());
++  // If the pointers are equal, we are done (e.g., String[] elements).
++  // This self-check enables sharing of secondary supertype arrays among
++  // non-primary types such as array-of-interface.  Otherwise, each such
++  // type would need its own customized SSA.
++  // We move this check to the front of the fast path because many
++  // type checks are in fact trivially successful in this manner,
++  // so we get a nicely predicted branch right at the start of the check.
++  beq(sub_klass, super_klass, *L_success);
++  // Check the supertype display:
++  if (must_load_sco) {
++    ld_wu(temp_reg, super_klass, sco_offset);
++    super_check_offset = RegisterOrConstant(temp_reg);
++  }
++  slli_d(AT, super_check_offset.register_or_noreg(), Address::times_1);
++  add_d(AT, sub_klass, AT);
++  ld_d(AT, AT, super_check_offset.constant_or_zero()*Address::times_1);
++
++  // This check has worked decisively for primary supers.
++  // Secondary supers are sought in the super_cache ('super_cache_addr').
++  // (Secondary supers are interfaces and very deeply nested subtypes.)
++  // This works in the same check above because of a tricky aliasing
++  // between the super_cache and the primary super display elements.
++  // (The 'super_check_addr' can address either, as the case requires.)
++  // Note that the cache is updated below if it does not help us find
++  // what we need immediately.
++  // So if it was a primary super, we can just fail immediately.
++  // Otherwise, it's the slow path for us (no success at this point).
++
++  if (super_check_offset.is_register()) {
++    beq(super_klass, AT, *L_success);
++    addi_d(AT, super_check_offset.as_register(), -sc_offset);
++    if (L_failure == &L_fallthrough) {
++      beq(AT, R0, *L_slow_path);
++    } else {
++      bne_far(AT, R0, *L_failure);
++      b(*L_slow_path);
++    }
++  } else if (super_check_offset.as_constant() == sc_offset) {
++    // Need a slow path; fast failure is impossible.
++    if (L_slow_path == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++    } else {
++      bne(super_klass, AT, *L_slow_path);
++      b(*L_success);
++    }
++  } else {
++    // No slow path; it's a fast decision.
++    if (L_failure == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++    } else {
++      bne_far(super_klass, AT, *L_failure);
++      b(*L_success);
++    }
++  }
++
++  bind(L_fallthrough);
++}
++
++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Register temp2_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   bool set_cond_codes) {
++  if (temp2_reg == noreg)
++    temp2_reg = TSR;
++  assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  // a couple of useful fields in sub_klass:
++  int ss_offset = in_bytes(Klass::secondary_supers_offset());
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  Address secondary_supers_addr(sub_klass, ss_offset);
++  Address super_cache_addr(     sub_klass, sc_offset);
++
++  // Do a linear scan of the secondary super-klass chain.
++  // This code is rarely used, so simplicity is a virtue here.
++  // The repne_scan instruction uses fixed registers, which we must spill.
++  // Don't worry too much about pre-existing connections with the input regs.
++
++#ifndef PRODUCT
++  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
++  ExternalAddress pst_counter_addr((address) pst_counter);
++#endif //PRODUCT
++
++  // We will consult the secondary-super array.
++  ld_d(temp_reg, secondary_supers_addr);
++  // Load the array length.
++  ld_w(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
++  // Skip to start of data.
++  addi_d(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
++
++  Label Loop, subtype;
++  bind(Loop);
++  beq(temp2_reg, R0, *L_failure);
++  ld_d(AT, temp_reg, 0);
++  addi_d(temp_reg, temp_reg, 1 * wordSize);
++  beq(AT, super_klass, subtype);
++  addi_d(temp2_reg, temp2_reg, -1);
++  b(Loop);
++
++  bind(subtype);
++  st_d(super_klass, super_cache_addr);
++  if (L_success != &L_fallthrough) {
++    b(*L_success);
++  }
++
++  // Success.  Cache the super we found and proceed in triumph.
++#undef IS_A_TEMP
++
++  bind(L_fallthrough);
++}
++
++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
++  ld_d(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
++  st_d(R0, Address(java_thread, JavaThread::vm_result_offset()));
++  verify_oop(oop_result, "broken oop in call_VM_base");
++}
++
++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
++  ld_d(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
++  st_d(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
++}
++
++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
++                                         int extra_slot_offset) {
++  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
++  int stackElementSize = Interpreter::stackElementSize;
++  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
++#ifdef ASSERT
++  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
++  assert(offset1 - offset == stackElementSize, "correct arithmetic");
++#endif
++  Register             scale_reg    = NOREG;
++  Address::ScaleFactor scale_factor = Address::no_scale;
++  if (arg_slot.is_constant()) {
++    offset += arg_slot.as_constant() * stackElementSize;
++  } else {
++    scale_reg    = arg_slot.as_register();
++    scale_factor = Address::times_8;
++  }
++  // We don't push RA on stack in prepare_invoke.
++  //  offset += wordSize;           // return PC is on stack
++  if(scale_reg==NOREG) return Address(SP, offset);
++  else {
++  alsl_d(scale_reg, scale_reg, SP, scale_factor - 1);
++  return Address(scale_reg, offset);
++  }
++}
++
++SkipIfEqual::~SkipIfEqual() {
++  _masm->bind(_label);
++}
++
++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
++  switch (size_in_bytes) {
++  case  8:  ld_d(dst, src); break;
++  case  4:  ld_w(dst, src); break;
++  case  2:  is_signed ? ld_h(dst, src) : ld_hu(dst, src); break;
++  case  1:  is_signed ? ld_b( dst, src) : ld_bu( dst, src); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
++  switch (size_in_bytes) {
++  case  8:  st_d(src, dst); break;
++  case  4:  st_w(src, dst); break;
++  case  2:  st_h(src, dst); break;
++  case  1:  st_b(src, dst); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++// Look up the method for a megamorphic invokeinterface call.
++// The target method is determined by <intf_klass, itable_index>.
++// The receiver klass is in recv_klass.
++// On success, the result will be in method_result, and execution falls through.
++// On failure, execution transfers to the given label.
++void MacroAssembler::lookup_interface_method(Register recv_klass,
++                                             Register intf_klass,
++                                             RegisterOrConstant itable_index,
++                                             Register method_result,
++                                             Register scan_temp,
++                                             Label& L_no_such_interface,
++                                             bool return_method) {
++  assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
++  assert_different_registers(method_result, intf_klass, scan_temp, AT);
++  assert(recv_klass != method_result || !return_method,
++         "recv_klass can be destroyed when method isn't needed");
++
++  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
++         "caller must use same register for non-constant itable index as for method");
++
++  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
++  int vtable_base = in_bytes(Klass::vtable_start_offset());
++  int itentry_off = itableMethodEntry::method_offset_in_bytes();
++  int scan_step   = itableOffsetEntry::size() * wordSize;
++  int vte_size    = vtableEntry::size() * wordSize;
++  Address::ScaleFactor times_vte_scale = Address::times_ptr;
++  assert(vte_size == wordSize, "else adjust times_vte_scale");
++
++  ld_w(scan_temp, Address(recv_klass, Klass::vtable_length_offset()));
++
++  // %%% Could store the aligned, prescaled offset in the klassoop.
++  alsl_d(scan_temp, scan_temp, recv_klass, times_vte_scale - 1);
++  addi_d(scan_temp, scan_temp, vtable_base);
++
++  if (return_method) {
++    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
++    if (itable_index.is_constant()) {
++      li(AT, (itable_index.as_constant() * itableMethodEntry::size() * wordSize) + itentry_off);
++      add_d(recv_klass, recv_klass, AT);
++    } else {
++      assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++      alsl_d(AT, itable_index.as_register(), recv_klass, (int)Address::times_ptr - 1);
++      addi_d(recv_klass, AT, itentry_off);
++    }
++  }
++
++  Label search, found_method;
++
++  ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
++  beq(intf_klass, method_result, found_method);
++
++  bind(search);
++  // Check that the previous entry is non-null.  A null entry means that
++  // the receiver class doesn't implement the interface, and wasn't the
++  // same as when the caller was compiled.
++  beqz(method_result, L_no_such_interface);
++  addi_d(scan_temp, scan_temp, scan_step);
++  ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
++  bne(intf_klass, method_result, search);
++
++  bind(found_method);
++  if (return_method) {
++    // Got a hit.
++    ld_wu(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
++    ldx_d(method_result, recv_klass, scan_temp);
++  }
++}
++
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++                                           RegisterOrConstant vtable_index,
++                                           Register method_result) {
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
++
++  if (vtable_index.is_constant()) {
++    li(AT, vtable_index.as_constant());
++    alsl_d(AT, AT, recv_klass, Address::times_ptr - 1);
++  } else {
++    alsl_d(AT, vtable_index.as_register(), recv_klass, Address::times_ptr - 1);
++  }
++
++  ld_d(method_result, AT, base + vtableEntry::method_offset_in_bytes());
++}
++
++#ifdef COMPILER2
++// Compare strings, used for char[] and byte[].
++void MacroAssembler::string_compare(Register str1, Register str2,
++                                    Register cnt1, Register cnt2, Register result,
++                                    int ae) {
++  Label L, Loop, haveResult, done;
++
++  bool isLL = ae == StrIntrinsicNode::LL;
++  bool isLU = ae == StrIntrinsicNode::LU;
++  bool isUL = ae == StrIntrinsicNode::UL;
++
++  bool str1_isL = isLL || isLU;
++  bool str2_isL = isLL || isUL;
++
++  if (!str1_isL) srli_w(cnt1, cnt1, 1);
++  if (!str2_isL) srli_w(cnt2, cnt2, 1);
++
++  // compute the and difference of lengths (in result)
++  sub_d(result, cnt1, cnt2); // result holds the difference of two lengths
++
++  // compute the shorter length (in cnt1)
++  bge(cnt2, cnt1, Loop);
++  move(cnt1, cnt2);
++
++  // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register
++  bind(Loop);                        // Loop begin
++  if (str1_isL) {
++    ld_bu(AT, str1, 0);
++  } else {
++    ld_hu(AT, str1, 0);
++  }
++  beq(cnt1, R0, done);
++
++  // compare current character
++  if (str2_isL) {
++    ld_bu(cnt2, str2, 0);
++  } else {
++    ld_hu(cnt2, str2, 0);
++  }
++  addi_d(str1, str1, str1_isL ? 1 : 2);
++  bne(AT, cnt2, haveResult);
++  addi_d(str2, str2, str2_isL ? 1 : 2);
++  addi_d(cnt1, cnt1, -1);
++  b(Loop);
++
++  bind(haveResult);
++  sub_d(result, AT, cnt2);
++
++  bind(done);
++}
++
++// Compare char[] or byte[] arrays or substrings.
++void MacroAssembler::arrays_equals(Register str1, Register str2,
++                                   Register cnt, Register tmp1, Register tmp2, Register result,
++                                   bool is_char) {
++  Label Loop, LoopEnd, True, False;
++
++  addi_d(result, R0, 1);
++  beq(str1, str2, True);  // same char[] ?
++  beqz(cnt, True);
++
++  addi_d(AT, R0, is_char ? wordSize/2 : wordSize);
++  bind(Loop);
++  blt(cnt, AT, LoopEnd);
++  ld_d(tmp1, str1, 0);
++  ld_d(tmp2, str2, 0);
++  bne(tmp1, tmp2, False);
++  addi_d(str1, str1, 8);
++  addi_d(str2, str2, 8);
++  addi_d(cnt, cnt, is_char ? -wordSize/2 : -wordSize);
++  b(Loop);
++
++  bind(LoopEnd);
++  beqz(cnt, True);
++  // compare current character
++  if (is_char) {
++    ld_hu(tmp1, str1, 0);
++    ld_hu(tmp2, str2, 0);
++  } else {
++    ld_bu(tmp1, str1, 0);
++    ld_bu(tmp2, str2, 0);
++  }
++  bne(tmp1, tmp2, False);
++  addi_d(str1, str1, is_char ? 2 : 1);
++  addi_d(str2, str2, is_char ? 2 : 1);
++  addi_d(cnt, cnt, -1);
++  b(LoopEnd);
++
++  bind(False);
++  addi_d(result, R0, 0);
++
++  bind(True);
++}
++#endif // COMPILER2
++
++void MacroAssembler::load_byte_map_base(Register reg) {
++  jbyte *byte_map_base =
++    ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
++
++  // Strictly speaking the byte_map_base isn't an address at all, and it might
++  // even be negative. It is thus materialised as a constant.
++  li(reg, (uint64_t)byte_map_base);
++}
++
++// This method checks if provided byte array contains byte with highest bit set.
++void MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
++    Label Loop, End, Nega, Done;
++
++    orr(result, R0, R0);
++    bge(R0, len, Done);
++
++    li(AT, 0x8080808080808080);
++
++    addi_d(len, len, -8);
++    blt(len, R0, End);
++
++  bind(Loop);
++    ld_d(result, ary1, 0);
++    andr(result, result, AT);
++    bnez(result, Nega);
++    beqz(len, Done);
++    addi_d(len, len, -8);
++    addi_d(ary1, ary1, 8);
++    bge(len, R0, Loop);
++
++  bind(End);
++    ld_d(result, ary1, 0);
++    slli_d(len, len, 3);
++    sub_d(len, R0, len);
++    sll_d(result, result, len);
++    andr(result, result, AT);
++    beqz(result, Done);
++
++  bind(Nega);
++    ori(result, R0, 1);
++
++  bind(Done);
++}
++
++// Compress char[] to byte[]. len must be positive int.
++// jtreg: TestStringIntrinsicRangeChecks.java
++void MacroAssembler::char_array_compress(Register src, Register dst,
++                                         Register len, Register result,
++                                         Register tmp1, Register tmp2,
++                                         Register tmp3) {
++  Label Loop, Done, Once, Fail;
++
++  move(result, len);
++  bge(R0, result, Done);
++
++  srli_w(AT, len, 2);
++  andi(len, len, 3);
++
++  li(tmp3, 0xff00ff00ff00ff00);
++
++  bind(Loop);
++    beqz(AT, Once);
++    ld_d(tmp1, src, 0);
++    andr(tmp2, tmp3, tmp1);          // not latin-1, stop here
++    bnez(tmp2, Fail);
++
++    // 0x00a100b200c300d4 -> 0x00000000a1b2c3d4
++    srli_d(tmp2, tmp1, 8);
++    orr(tmp2, tmp2, tmp1);           // 0x00a1a1b2b2c3c3d4
++    bstrpick_d(tmp1, tmp2, 47, 32);  // 0x0000a1b2
++    slli_d(tmp1, tmp1, 16);          // 0xa1b20000
++    bstrins_d(tmp1, tmp2, 15, 0);    // 0xa1b2c3d4
++
++    st_w(tmp1, dst, 0);
++    addi_w(AT, AT, -1);
++    addi_d(dst, dst, 4);
++    addi_d(src, src, 8);
++    b(Loop);
++
++  bind(Once);
++    beqz(len, Done);
++    ld_d(AT, src, 0);
++
++    bstrpick_d(tmp1, AT, 15, 0);
++    andr(tmp2, tmp3, tmp1);
++    bnez(tmp2, Fail);
++    st_b(tmp1, dst, 0);
++    addi_w(len, len, -1);
++
++    beqz(len, Done);
++    bstrpick_d(tmp1, AT, 31, 16);
++    andr(tmp2, tmp3, tmp1);
++    bnez(tmp2, Fail);
++    st_b(tmp1, dst, 1);
++    addi_w(len, len, -1);
++
++    beqz(len, Done);
++    bstrpick_d(tmp1, AT, 47, 32);
++    andr(tmp2, tmp3, tmp1);
++    bnez(tmp2, Fail);
++    st_b(tmp1, dst, 2);
++    b(Done);
++
++  bind(Fail);
++    move(result, R0);
++
++  bind(Done);
++}
++
++// Inflate byte[] to char[]. len must be positive int.
++// jtreg:test/jdk/sun/nio/cs/FindDecoderBugs.java
++void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
++                                        Register tmp1, Register tmp2) {
++  Label Loop, Once, Done;
++
++  bge(R0, len, Done);
++
++  srli_w(AT, len, 2);
++  andi(len, len, 3);
++
++  bind(Loop);
++    beqz(AT, Once);
++    ld_wu(tmp1, src, 0);
++
++    // 0x00000000a1b2c3d4 -> 0x00a100b200c300d4
++    bstrpick_d(tmp2, tmp1, 7, 0);
++    srli_d(tmp1, tmp1, 8);
++    bstrins_d(tmp2, tmp1, 23, 16);
++    srli_d(tmp1, tmp1, 8);
++    bstrins_d(tmp2, tmp1, 39, 32);
++    srli_d(tmp1, tmp1, 8);
++    bstrins_d(tmp2, tmp1, 55, 48);
++
++    st_d(tmp2, dst, 0);
++    addi_w(AT, AT, -1);
++    addi_d(dst, dst, 8);
++    addi_d(src, src, 4);
++    b(Loop);
++
++  bind(Once);
++    beqz(len, Done);
++    ld_wu(tmp1, src, 0);
++
++    bstrpick_d(tmp2, tmp1, 7, 0);
++    st_h(tmp2, dst, 0);
++    addi_w(len, len, -1);
++
++    beqz(len, Done);
++    bstrpick_d(tmp2, tmp1, 15, 8);
++    st_h(tmp2, dst, 2);
++    addi_w(len, len, -1);
++
++    beqz(len, Done);
++    bstrpick_d(tmp2, tmp1, 23, 16);
++    st_h(tmp2, dst, 4);
++
++  bind(Done);
++}
++
++void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
++                                            Register ch, Register result,
++                                            Register tmp1, Register tmp2,
++                                            Register tmp3)
++{
++  Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, NOMATCH, DONE;
++
++  beqz(cnt1, NOMATCH);
++
++  move(result, R0);
++  ori(tmp1, R0, 4);
++  blt(cnt1, tmp1, DO1_LOOP);
++
++  // UTF-16 char occupies 16 bits
++  // ch -> chchchch
++  bstrins_d(ch, ch, 31, 16);
++  bstrins_d(ch, ch, 63, 32);
++
++  li(tmp2, 0x0001000100010001);
++  li(tmp3, 0x7fff7fff7fff7fff);
++
++  bind(CH1_LOOP);
++    ld_d(AT, str1, 0);
++    xorr(AT, ch, AT);
++    sub_d(tmp1, AT, tmp2);
++    orr(AT, AT, tmp3);
++    andn(tmp1, tmp1, AT);
++    bnez(tmp1, HAS_ZERO);
++    addi_d(str1, str1, 8);
++    addi_d(result, result, 4);
++
++    // meet the end of string
++    beq(cnt1, result, NOMATCH);
++
++    addi_d(tmp1, result, 4);
++    bge(tmp1, cnt1, DO1_SHORT);
++    b(CH1_LOOP);
++
++  bind(HAS_ZERO);
++    ctz_d(tmp1, tmp1);
++    srli_d(tmp1, tmp1, 4);
++    add_d(result, result, tmp1);
++    b(DONE);
++
++  // restore ch
++  bind(DO1_SHORT);
++    bstrpick_d(ch, ch, 15, 0);
++
++  bind(DO1_LOOP);
++    ld_hu(tmp1, str1, 0);
++    beq(ch, tmp1, DONE);
++    addi_d(str1, str1, 2);
++    addi_d(result, result, 1);
++    blt(result, cnt1, DO1_LOOP);
++
++  bind(NOMATCH);
++    addi_d(result, R0, -1);
++
++  bind(DONE);
++}
++
++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
++  const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
++  STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
++  // The inverted mask is sign-extended
++  li(AT, inverted_jweak_mask);
++  andr(possibly_jweak, AT, possibly_jweak);
++}
++
++void MacroAssembler::resolve_jobject(Register value,
++                                     Register thread,
++                                     Register tmp) {
++  assert_different_registers(value, thread, tmp);
++  Label done, not_weak;
++  beq(value, R0, done);                // Use NULL as-is.
++  li(AT, JNIHandles::weak_tag_mask); // Test for jweak tag.
++  andr(AT, value, AT);
++  beq(AT, R0, not_weak);
++  // Resolve jweak.
++  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
++                 value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);
++  verify_oop(value);
++  b(done);
++  bind(not_weak);
++  // Resolve (untagged) jobject.
++  access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
++  verify_oop(value);
++  bind(done);
++}
++
++void MacroAssembler::lea(Register rd, Address src) {
++  Register dst   = rd;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index == noreg) {
++    if (is_simm(disp, 12)) {
++      addi_d(dst, base, disp);
++    } else {
++      lu12i_w(AT, split_low20(disp >> 12));
++      if (split_low12(disp))
++        ori(AT, AT, split_low12(disp));
++      add_d(dst, base, AT);
++    }
++  } else {
++    if (scale == 0) {
++      if (is_simm(disp, 12)) {
++        add_d(AT, base, index);
++        addi_d(dst, AT, disp);
++      } else {
++        lu12i_w(AT, split_low20(disp >> 12));
++        if (split_low12(disp))
++          ori(AT, AT, split_low12(disp));
++        add_d(AT, base, AT);
++        add_d(dst, AT, index);
++      }
++    } else {
++      if (is_simm(disp, 12)) {
++        alsl_d(AT, index, base, scale - 1);
++        addi_d(dst, AT, disp);
++      } else {
++        lu12i_w(AT, split_low20(disp >> 12));
++        if (split_low12(disp))
++          ori(AT, AT, split_low12(disp));
++        add_d(AT, AT, base);
++        alsl_d(dst, index, AT, scale - 1);
++      }
++    }
++  }
++}
++
++void MacroAssembler::lea(Register dst, AddressLiteral adr) {
++  code_section()->relocate(pc(), adr.rspec());
++  pcaddi(dst, (adr.target() - pc()) >> 2);
++}
++
++int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) {
++  int v = (dest_pos - inst_pos) >> 2;
++  switch(high(inst, 6)) {
++  case beq_op:
++  case bne_op:
++  case blt_op:
++  case bge_op:
++  case bltu_op:
++  case bgeu_op:
++    assert(is_simm16(v), "must be simm16");
++#ifndef PRODUCT
++    if(!is_simm16(v))
++    {
++      tty->print_cr("must be simm16");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    inst &= 0xfc0003ff;
++    inst |= ((v & 0xffff) << 10);
++    break;
++  case beqz_op:
++  case bnez_op:
++  case bccondz_op:
++    assert(is_simm(v, 21), "must be simm21");
++#ifndef PRODUCT
++    if(!is_simm(v, 21))
++    {
++      tty->print_cr("must be simm21");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    inst &= 0xfc0003e0;
++    inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x1f) );
++    break;
++  case b_op:
++  case bl_op:
++    assert(is_simm(v, 26), "must be simm26");
++#ifndef PRODUCT
++    if(!is_simm(v, 26))
++    {
++      tty->print_cr("must be simm26");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    inst &= 0xfc000000;
++    inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x3ff) );
++    break;
++  default:
++    ShouldNotReachHere();
++    break;
++  }
++  return inst;
++}
++
++void MacroAssembler::cmp_cmov(Register  op1,
++                              Register  op2,
++                              Register  dst,
++                              Register  src1,
++                              Register  src2,
++                              CMCompare cmp,
++                              bool      is_signed) {
++  switch (cmp) {
++    case EQ:
++      sub_d(AT, op1, op2);
++      if (dst == src2) {
++        masknez(dst, src2, AT);
++        maskeqz(AT, src1, AT);
++      } else {
++        maskeqz(dst, src1, AT);
++        masknez(AT, src2, AT);
++      }
++      break;
++
++    case NE:
++      sub_d(AT, op1, op2);
++      if (dst == src2) {
++        maskeqz(dst, src2, AT);
++        masknez(AT, src1, AT);
++      } else {
++        masknez(dst, src1, AT);
++        maskeqz(AT, src2, AT);
++      }
++      break;
++
++    case GT:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      if(dst == src2) {
++        maskeqz(dst, src2, AT);
++        masknez(AT, src1, AT);
++      } else {
++        masknez(dst, src1, AT);
++        maskeqz(AT, src2, AT);
++      }
++      break;
++    case GE:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      if(dst == src2) {
++        masknez(dst, src2, AT);
++        maskeqz(AT, src1, AT);
++      } else {
++        maskeqz(dst, src1, AT);
++        masknez(AT, src2, AT);
++      }
++      break;
++
++    case LT:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      if(dst == src2) {
++        maskeqz(dst, src2, AT);
++        masknez(AT, src1, AT);
++      } else {
++        masknez(dst, src1, AT);
++        maskeqz(AT, src2, AT);
++      }
++      break;
++    case LE:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      if(dst == src2) {
++        masknez(dst, src2, AT);
++        maskeqz(AT, src1, AT);
++      } else {
++        maskeqz(dst, src1, AT);
++        masknez(AT, src2, AT);
++      }
++      break;
++    default:
++      Unimplemented();
++  }
++  OR(dst, dst, AT);
++}
++
++void MacroAssembler::cmp_cmov(Register  op1,
++                              Register  op2,
++                              Register  dst,
++                              Register  src,
++                              CMCompare cmp,
++                              bool      is_signed) {
++  switch (cmp) {
++    case EQ:
++      sub_d(AT, op1, op2);
++      maskeqz(dst, dst, AT);
++      masknez(AT, src, AT);
++      break;
++
++    case NE:
++      sub_d(AT, op1, op2);
++      masknez(dst, dst, AT);
++      maskeqz(AT, src, AT);
++      break;
++
++    case GT:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      masknez(dst, dst, AT);
++      maskeqz(AT, src, AT);
++      break;
++
++    case GE:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      maskeqz(dst, dst, AT);
++      masknez(AT, src, AT);
++      break;
++
++    case LT:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      masknez(dst, dst, AT);
++      maskeqz(AT, src, AT);
++      break;
++
++    case LE:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      maskeqz(dst, dst, AT);
++      masknez(AT, src, AT);
++      break;
++
++    default:
++      Unimplemented();
++  }
++  OR(dst, dst, AT);
++}
++
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              Register      dst,
++                              Register      src,
++                              FloatRegister tmp1,
++                              FloatRegister tmp2,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  movgr2fr_d(tmp1, dst);
++  movgr2fr_d(tmp2, src);
++
++  switch(cmp) {
++    case EQ:
++      if (is_float) {
++        fcmp_ceq_s(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp1, tmp2, FCC0);
++      break;
++
++    case NE:
++      if (is_float) {
++        fcmp_ceq_s(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp2, tmp1, FCC0);
++      break;
++
++    case GT:
++      if (is_float) {
++        fcmp_cule_s(FCC0, op1, op2);
++      } else {
++        fcmp_cule_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp2, tmp1, FCC0);
++      break;
++
++    case GE:
++      if (is_float) {
++        fcmp_cult_s(FCC0, op1, op2);
++      } else {
++        fcmp_cult_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp2, tmp1, FCC0);
++      break;
++
++    case LT:
++      if (is_float) {
++        fcmp_cult_s(FCC0, op1, op2);
++      } else {
++        fcmp_cult_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp1, tmp2, FCC0);
++      break;
++
++    case LE:
++      if (is_float) {
++        fcmp_cule_s(FCC0, op1, op2);
++      } else {
++        fcmp_cule_d(FCC0, op1, op2);
++      }
++      fsel(tmp1, tmp1, tmp2, FCC0);
++      break;
++
++    default:
++      Unimplemented();
++  }
++
++  movfr2gr_d(dst, tmp1);
++}
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  switch(cmp) {
++    case EQ:
++      if (!is_float) {
++        fcmp_ceq_d(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_s(FCC0, op1, op2);
++      }
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case NE:
++      if (!is_float) {
++        fcmp_ceq_d(FCC0, op1, op2);
++      } else {
++        fcmp_ceq_s(FCC0, op1, op2);
++      }
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GT:
++      if (!is_float) {
++        fcmp_cule_d(FCC0, op1, op2);
++      } else {
++        fcmp_cule_s(FCC0, op1, op2);
++      }
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GE:
++      if (!is_float) {
++        fcmp_cult_d(FCC0, op1, op2);
++      } else {
++        fcmp_cult_s(FCC0, op1, op2);
++      }
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case LT:
++      if (!is_float) {
++        fcmp_cult_d(FCC0, op1, op2);
++      } else {
++        fcmp_cult_s(FCC0, op1, op2);
++      }
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case LE:
++      if (!is_float) {
++        fcmp_cule_d(FCC0, op1, op2);
++      } else {
++        fcmp_cule_s(FCC0, op1, op2);
++      }
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(Register      op1,
++                              Register      op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              FloatRegister tmp1,
++                              FloatRegister tmp2,
++                              CMCompare     cmp) {
++  movgr2fr_w(tmp1, R0);
++
++  switch (cmp) {
++    case EQ:
++      sub_d(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case NE:
++      sub_d(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GT:
++      slt(AT, op2, op1);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case GE:
++      slt(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    case LT:
++      slt(AT, op1, op2);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, src, dst, FCC0);
++      break;
++
++    case LE:
++      slt(AT, op2, op1);
++      movgr2fr_w(tmp2, AT);
++      fcmp_ceq_s(FCC0, tmp1, tmp2);
++      fsel(dst, dst, src, FCC0);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_BYTE:   st_b (reg, base, disp); break;
++    case STORE_CHAR:
++    case STORE_SHORT:  st_h (reg, base, disp); break;
++    case STORE_INT:    st_w (reg, base, disp); break;
++    case STORE_LONG:   st_d (reg, base, disp); break;
++    case LOAD_BYTE:    ld_b (reg, base, disp); break;
++    case LOAD_U_BYTE:  ld_bu(reg, base, disp); break;
++    case LOAD_SHORT:   ld_h (reg, base, disp); break;
++    case LOAD_U_SHORT: ld_hu(reg, base, disp); break;
++    case LOAD_INT:     ld_w (reg, base, disp); break;
++    case LOAD_U_INT:   ld_wu(reg, base, disp); break;
++    case LOAD_LONG:    ld_d (reg, base, disp); break;
++    case LOAD_LINKED_LONG:
++      ll_d(reg, base, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::loadstore(Register reg, Register base, Register disp, int type) {
++  switch (type) {
++    case STORE_BYTE:   stx_b (reg, base, disp); break;
++    case STORE_CHAR:
++    case STORE_SHORT:  stx_h (reg, base, disp); break;
++    case STORE_INT:    stx_w (reg, base, disp); break;
++    case STORE_LONG:   stx_d (reg, base, disp); break;
++    case LOAD_BYTE:    ldx_b (reg, base, disp); break;
++    case LOAD_U_BYTE:  ldx_bu(reg, base, disp); break;
++    case LOAD_SHORT:   ldx_h (reg, base, disp); break;
++    case LOAD_U_SHORT: ldx_hu(reg, base, disp); break;
++    case LOAD_INT:     ldx_w (reg, base, disp); break;
++    case LOAD_U_INT:   ldx_wu(reg, base, disp); break;
++    case LOAD_LONG:    ldx_d (reg, base, disp); break;
++    case LOAD_LINKED_LONG:
++      add_d(AT, base, disp);
++      ll_d(reg, AT, 0);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:    fst_s(reg, base, disp); break;
++    case STORE_DOUBLE:   fst_d(reg, base, disp); break;
++    case STORE_VECTORX:  vst  (reg, base, disp); break;
++    case STORE_VECTORY: xvst  (reg, base, disp); break;
++    case LOAD_FLOAT:     fld_s(reg, base, disp); break;
++    case LOAD_DOUBLE:    fld_d(reg, base, disp); break;
++    case LOAD_VECTORX:   vld  (reg, base, disp); break;
++    case LOAD_VECTORY:  xvld  (reg, base, disp); break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::loadstore(FloatRegister reg, Register base, Register disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:    fstx_s(reg, base, disp); break;
++    case STORE_DOUBLE:   fstx_d(reg, base, disp); break;
++    case STORE_VECTORX:  vstx  (reg, base, disp); break;
++    case STORE_VECTORY: xvstx  (reg, base, disp); break;
++    case LOAD_FLOAT:     fldx_s(reg, base, disp); break;
++    case LOAD_DOUBLE:    fldx_d(reg, base, disp); break;
++    case LOAD_VECTORX:   vldx  (reg, base, disp); break;
++    case LOAD_VECTORY:  xvldx  (reg, base, disp); break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++#ifdef COMPILER2
++void MacroAssembler::reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode) {
++  switch (type) {
++    case T_BYTE:
++      switch (opcode) {
++        case Op_AddReductionVI: vadd_b(vec1, vec2, vec3); break;
++        case Op_MulReductionVI: vmul_b(vec1, vec2, vec3); break;
++        case Op_MaxReductionV:  vmax_b(vec1, vec2, vec3); break;
++        case Op_MinReductionV:  vmin_b(vec1, vec2, vec3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    case T_SHORT:
++      switch (opcode) {
++        case Op_AddReductionVI: vadd_h(vec1, vec2, vec3); break;
++        case Op_MulReductionVI: vmul_h(vec1, vec2, vec3); break;
++        case Op_MaxReductionV:  vmax_h(vec1, vec2, vec3); break;
++        case Op_MinReductionV:  vmin_h(vec1, vec2, vec3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    case T_INT:
++      switch (opcode) {
++        case Op_AddReductionVI: vadd_w(vec1, vec2, vec3); break;
++        case Op_MulReductionVI: vmul_w(vec1, vec2, vec3); break;
++        case Op_MaxReductionV:  vmax_w(vec1, vec2, vec3); break;
++        case Op_MinReductionV:  vmin_w(vec1, vec2, vec3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    case T_LONG:
++      switch (opcode) {
++        case Op_AddReductionVL: vadd_d(vec1, vec2, vec3); break;
++        case Op_MulReductionVL: vmul_d(vec1, vec2, vec3); break;
++        case Op_MaxReductionV:  vmax_d(vec1, vec2, vec3); break;
++        case Op_MinReductionV:  vmin_d(vec1, vec2, vec3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode) {
++  switch (type) {
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      switch (opcode) {
++        case Op_AddReductionVI: add_w(reg1, reg2, reg3); break;
++        case Op_MulReductionVI: mul_w(reg1, reg2, reg3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    case T_LONG:
++      switch (opcode) {
++        case Op_AddReductionVL: add_d(reg1, reg2, reg3); break;
++        case Op_MulReductionVL: mul_d(reg1, reg2, reg3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode) {
++  switch (type) {
++    case T_FLOAT:
++      switch (opcode) {
++        case Op_AddReductionVF: fadd_s(reg1, reg2, reg3); break;
++        case Op_MulReductionVF: fmul_s(reg1, reg2, reg3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    case T_DOUBLE:
++      switch (opcode) {
++        case Op_AddReductionVD: fadd_d(reg1, reg2, reg3); break;
++        case Op_MulReductionVD: fmul_d(reg1, reg2, reg3); break;
++        default:
++          ShouldNotReachHere();
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size) {
++  if (vector_size == 32) {
++    xvpermi_d(tmp1, vsrc, 0b00001110);
++    reduce_ins_v(tmp1, vsrc, tmp1, type, opcode);
++    vpermi_w(tmp2, tmp1, 0b00001110);
++    reduce_ins_v(tmp1, tmp2, tmp1, type, opcode);
++  } else if (vector_size == 16) {
++    vpermi_w(tmp1, vsrc, 0b00001110);
++    reduce_ins_v(tmp1, vsrc, tmp1, type, opcode);
++  } else {
++    ShouldNotReachHere();
++  }
++
++  if (type != T_LONG) {
++    vshuf4i_w(tmp2, tmp1, 0b00000001);
++    reduce_ins_v(tmp1, tmp2, tmp1, type, opcode);
++    if (type != T_INT) {
++      vshuf4i_h(tmp2, tmp1, 0b00000001);
++      reduce_ins_v(tmp1, tmp2, tmp1, type, opcode);
++      if (type != T_SHORT) {
++        vshuf4i_b(tmp2, tmp1, 0b00000001);
++        reduce_ins_v(tmp1, tmp2, tmp1, type, opcode);
++      }
++    }
++  }
++
++  switch (type) {
++    case T_BYTE:  vpickve2gr_b(dst, tmp1, 0); break;
++    case T_SHORT: vpickve2gr_h(dst, tmp1, 0); break;
++    case T_INT:   vpickve2gr_w(dst, tmp1, 0); break;
++    case T_LONG:  vpickve2gr_d(dst, tmp1, 0); break;
++    default:
++      ShouldNotReachHere();
++  }
++  if (opcode == Op_MaxReductionV) {
++    slt(AT, dst, src);
++    masknez(dst, dst, AT);
++    maskeqz(AT, src, AT);
++    orr(dst, dst, AT);
++  } else if (opcode == Op_MinReductionV) {
++    slt(AT, src, dst);
++    masknez(dst, dst, AT);
++    maskeqz(AT, src, AT);
++    orr(dst, dst, AT);
++  } else {
++    reduce_ins_r(dst, dst, src, type, opcode);
++  }
++  switch (type) {
++    case T_BYTE:  ext_w_b(dst, dst); break;
++    case T_SHORT: ext_w_h(dst, dst); break;
++    default:
++      break;
++  }
++}
++
++void MacroAssembler::reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size) {
++  if (vector_size == 32) {
++    switch (type) {
++      case T_FLOAT:
++        reduce_ins_f(dst, vsrc, src, type, opcode);
++        xvpickve_w(tmp, vsrc, 1);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 2);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 3);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 4);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 5);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 6);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_w(tmp, vsrc, 7);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        break;
++      case T_DOUBLE:
++        reduce_ins_f(dst, vsrc, src, type, opcode);
++        xvpickve_d(tmp, vsrc, 1);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_d(tmp, vsrc, 2);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        xvpickve_d(tmp, vsrc, 3);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  } else if (vector_size == 16) {
++    switch (type) {
++      case T_FLOAT:
++        reduce_ins_f(dst, vsrc, src, type, opcode);
++        vpermi_w(tmp, vsrc, 0b00000001);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        vpermi_w(tmp, vsrc, 0b00000010);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        vpermi_w(tmp, vsrc, 0b00000011);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        break;
++      case T_DOUBLE:
++        reduce_ins_f(dst, vsrc, src, type, opcode);
++        vpermi_w(tmp, vsrc, 0b00001110);
++        reduce_ins_f(dst, tmp, dst, type, opcode);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++}
++#endif // COMPILER2
++
++/**
++ * Emits code to update CRC-32 with a byte value according to constants in table
++ *
++ * @param [in,out]crc   Register containing the crc.
++ * @param [in]val       Register containing the byte to fold into the CRC.
++ * @param [in]table     Register containing the table of crc constants.
++ *
++ * uint32_t crc;
++ * val = crc_table[(val ^ crc) & 0xFF];
++ * crc = val ^ (crc >> 8);
++**/
++void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
++  xorr(val, val, crc);
++  andi(val, val, 0xff);
++  ld_w(val, Address(table, val, Address::times_4, 0));
++  srli_w(crc, crc, 8);
++  xorr(crc, val, crc);
++}
++
++/**
++ * @param crc   register containing existing CRC (32-bit)
++ * @param buf   register pointing to input byte buffer (byte*)
++ * @param len   register containing number of bytes
++ * @param tmp   scratch register
++**/
++void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register tmp) {
++  Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit;
++  assert_different_registers(crc, buf, len, tmp);
++
++    nor(crc, crc, R0);
++
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by64_loop);
++    ld_d(tmp, buf, 0);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 8);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 16);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 24);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 32);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 40);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 48);
++    crc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 56);
++    crc_w_d_w(crc, tmp, crc);
++    addi_d(buf, buf, 64);
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by4_loop);
++    ld_w(tmp, buf, 0);
++    crc_w_w_w(crc, tmp, crc);
++    addi_d(buf, buf, 4);
++    addi_d(len, len, -4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    bge(R0, len, L_exit);
++
++  bind(CRC_by1_loop);
++    ld_b(tmp, buf, 0);
++    crc_w_b_w(crc, tmp, crc);
++    addi_d(buf, buf, 1);
++    addi_d(len, len, -1);
++    blt(R0, len, CRC_by1_loop);
++
++  bind(L_exit);
++    nor(crc, crc, R0);
++}
++
++/**
++ * @param crc   register containing existing CRC (32-bit)
++ * @param buf   register pointing to input byte buffer (byte*)
++ * @param len   register containing number of bytes
++ * @param tmp   scratch register
++**/
++void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register tmp) {
++  Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit;
++  assert_different_registers(crc, buf, len, tmp);
++
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by64_loop);
++    ld_d(tmp, buf, 0);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 8);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 16);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 24);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 32);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 40);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 48);
++    crcc_w_d_w(crc, tmp, crc);
++    ld_d(tmp, buf, 56);
++    crcc_w_d_w(crc, tmp, crc);
++    addi_d(buf, buf, 64);
++    addi_d(len, len, -64);
++    bge(len, R0, CRC_by64_loop);
++    addi_d(len, len, 64-4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    blt(R0, len, CRC_by1_loop);
++    b(L_exit);
++
++  bind(CRC_by4_loop);
++    ld_w(tmp, buf, 0);
++    crcc_w_w_w(crc, tmp, crc);
++    addi_d(buf, buf, 4);
++    addi_d(len, len, -4);
++    bge(len, R0, CRC_by4_loop);
++    addi_d(len, len, 4);
++    bge(R0, len, L_exit);
++
++  bind(CRC_by1_loop);
++    ld_b(tmp, buf, 0);
++    crcc_w_b_w(crc, tmp, crc);
++    addi_d(buf, buf, 1);
++    addi_d(len, len, -1);
++    blt(R0, len, CRC_by1_loop);
++
++  bind(L_exit);
++}
++
++#ifdef COMPILER2
++void MacroAssembler::cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed) {
++
++    switch(flag) {
++      case 0x01: //equal
++          beq(op1, op2, L);
++        break;
++      case 0x02: //not_equal
++          bne(op1, op2, L);
++        break;
++      case 0x03: //above
++        if (is_signed)
++          blt(op2, op1, L);
++        else
++          bltu(op2, op1, L);
++        break;
++      case 0x04: //above_equal
++        if (is_signed)
++          bge(op1, op2, L);
++        else
++          bgeu(op1, op2, L);
++        break;
++      case 0x05: //below
++        if (is_signed)
++          blt(op1, op2, L);
++        else
++          bltu(op1, op2, L);
++        break;
++      case 0x06: //below_equal
++        if (is_signed)
++          bge(op2, op1, L);
++        else
++          bgeu(op2, op1, L);
++        break;
++      default:
++        Unimplemented();
++    }
++}
++
++void MacroAssembler::cmp_branch_long(int flag, Register op1, Register op2, Label* L, bool is_signed) {
++    switch(flag) {
++      case 0x01: //equal
++        beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        if (is_signed)
++          blt_long(op2, op1, *L, true /* signed */);
++        else
++          blt_long(op2, op1, *L, false);
++        break;
++      case 0x04: //above_equal
++        if (is_signed)
++          bge_long(op1, op2, *L, true /* signed */);
++        else
++          bge_long(op1, op2, *L, false);
++        break;
++      case 0x05: //below
++        if (is_signed)
++          blt_long(op1, op2, *L, true /* signed */);
++        else
++          blt_long(op1, op2, *L, false);
++        break;
++      case 0x06: //below_equal
++        if (is_signed)
++          bge_long(op2, op1, *L, true /* signed */);
++        else
++          bge_long(op2, op1, *L, false);
++        break;
++      default:
++        Unimplemented();
++    }
++}
++
++void MacroAssembler::cmp_branchEqNe_off21(int flag, Register op1, Label& L) {
++    switch(flag) {
++      case 0x01: //equal
++        beqz(op1, L);
++        break;
++      case 0x02: //not_equal
++        bnez(op1, L);
++        break;
++      default:
++        Unimplemented();
++    }
++}
++#endif // COMPILER2
++
++void MacroAssembler::membar(Membar_mask_bits hint){
++  address prev = pc() - NativeInstruction::sync_instruction_size;
++  address last = code()->last_insn();
++  if (last != NULL && ((NativeInstruction*)last)->is_sync() && prev == last) {
++    code()->set_last_insn(NULL);
++    NativeMembar *membar = (NativeMembar*)prev;
++    // merged membar
++    // e.g. LoadLoad and LoadLoad|LoadStore to LoadLoad|LoadStore
++    membar->set_hint(membar->get_hint() & (~hint & 0xF));
++    block_comment("merged membar");
++  } else {
++    code()->set_last_insn(pc());
++    Assembler::membar(hint);
++  }
++}
++
++// Code for BigInteger::mulAdd intrinsic
++// out     = A0
++// in      = A1
++// offset  = A2  (already out.length-offset)
++// len     = A3
++// k       = A4
++//
++// pseudo code from java implementation:
++// long kLong = k & LONG_MASK;
++// carry = 0;
++// offset = out.length-offset - 1;
++// for (int j = len - 1; j >= 0; j--) {
++//     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
++//     out[offset--] = (int)product;
++//     carry = product >>> 32;
++// }
++// return (int)carry;
++void MacroAssembler::mul_add(Register out, Register in, Register offset,
++                             Register len, Register k) {
++  Label L_tail_loop, L_unroll, L_end;
++
++  move(SCR2, out);
++  move(out, R0); // should clear out
++  bge(R0, len, L_end);
++
++  alsl_d(offset, offset, SCR2, LogBytesPerInt - 1);
++  alsl_d(in, len, in, LogBytesPerInt - 1);
++
++  const int unroll = 16;
++  li(SCR2, unroll);
++  blt(len, SCR2, L_tail_loop);
++
++  bind(L_unroll);
++
++    addi_d(in, in, -unroll * BytesPerInt);
++    addi_d(offset, offset, -unroll * BytesPerInt);
++
++    for (int i = unroll - 1; i >= 0; i--) {
++      ld_wu(SCR1, in, i * BytesPerInt);
++      mulw_d_wu(SCR1, SCR1, k);
++      add_d(out, out, SCR1); // out as scratch
++      ld_wu(SCR1, offset, i * BytesPerInt);
++      add_d(SCR1, SCR1, out);
++      st_w(SCR1, offset, i * BytesPerInt);
++      srli_d(out, SCR1, 32); // keep carry
++    }
++
++    sub_w(len, len, SCR2);
++    bge(len, SCR2, L_unroll);
++
++  bge(R0, len, L_end); // check tail
++
++  bind(L_tail_loop);
++
++    addi_d(in, in, -BytesPerInt);
++    ld_wu(SCR1, in, 0);
++    mulw_d_wu(SCR1, SCR1, k);
++    add_d(out, out, SCR1); // out as scratch
++
++    addi_d(offset, offset, -BytesPerInt);
++    ld_wu(SCR1, offset, 0);
++    add_d(SCR1, SCR1, out);
++    st_w(SCR1, offset, 0);
++
++    srli_d(out, SCR1, 32); // keep carry
++
++    addi_w(len, len, -1);
++    blt(R0, len, L_tail_loop);
++
++  bind(L_end);
++}
++
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,825 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP
++
++#include "asm/assembler.hpp"
++#include "runtime/rtmLocking.hpp"
++#include "utilities/macros.hpp"
++
++// MacroAssembler extends Assembler by frequently used macros.
++//
++// Instructions for which a 'better' code sequence exists depending
++// on arguments should also go in here.
++
++class MacroAssembler: public Assembler {
++  friend class LIR_Assembler;
++  friend class Runtime1;      // as_Address()
++
++ public:
++  // Compare code
++  typedef enum {
++    EQ = 0x01,
++    NE = 0x02,
++    GT = 0x03,
++    GE = 0x04,
++    LT = 0x05,
++    LE = 0x06
++  } CMCompare;
++
++ protected:
++
++  // Support for VM calls
++  //
++  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  #define VIRTUAL virtual
++
++  VIRTUAL void call_VM_leaf_base(
++    address entry_point,               // the entry point
++    int     number_of_arguments        // the number of arguments to pop after the call
++  );
++
++  // This is the base routine called by the different versions of call_VM. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  //
++  // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base
++  // returns the register which contains the thread upon return. If a thread register has been
++  // specified, the return value will correspond to that register. If no last_java_sp is specified
++  // (noreg) than sp will be used instead.
++  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
++    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
++    Register java_thread,              // the thread if computed before     ; use noreg otherwise
++    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
++    address  entry_point,              // the entry point
++    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
++    bool     check_exceptions          // whether to check for pending exceptions after return
++  );
++
++  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
++
++  // helpers for FPU flag access
++  // tmp is a temporary register, if none is available use noreg
++
++ public:
++  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
++
++  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
++  // The implementation is only non-empty for the InterpreterMacroAssembler,
++  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  Address as_Address(AddressLiteral adr);
++  Address as_Address(ArrayAddress adr);
++
++  static intptr_t  i[32];
++  static float  f[32];
++  static void print(outputStream *s);
++
++  static int i_offset(unsigned int k);
++  static int f_offset(unsigned int k);
++
++  static void save_registers(MacroAssembler *masm);
++  static void restore_registers(MacroAssembler *masm);
++
++  // Support for NULL-checks
++  //
++  // Generates code that causes a NULL OS exception if the content of reg is NULL.
++  // If the accessed location is M[reg + offset] and the offset is known, provide the
++  // offset. No explicit code generation is needed if the offset is within a certain
++  // range (0 <= offset <= page_size).
++
++  void null_check(Register reg, int offset = -1);
++  static bool needs_explicit_null_check(intptr_t offset);
++
++  // Required platform-specific helpers for Label::patch_instructions.
++  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
++  static void pd_patch_instruction(address branch, address target);
++
++  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
++
++  // Support for inc/dec with optimal instruction selection depending on value
++  // void incrementl(Register reg, int value = 1);
++  // void decrementl(Register reg, int value = 1);
++
++
++  // Alignment
++  void align(int modulus);
++
++
++  // Stack frame creation/removal
++  void enter();
++  void leave();
++
++  // Frame creation and destruction shared between JITs.
++  void build_frame(int framesize);
++  void remove_frame(int framesize);
++
++  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
++  // The pointer will be loaded into the thread register.
++  void get_thread(Register thread);
++
++
++  // Support for VM calls
++  //
++  // It is imperative that all calls into the VM are handled via the call_VM macros.
++  // They make sure that the stack linkage is setup correctly. call_VM's correspond
++  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
++
++
++  void call_VM(Register oop_result,
++               address entry_point,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  // Overloadings with last_Java_sp
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               int number_of_arguments = 0,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, bool
++               check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  void get_vm_result  (Register oop_result, Register thread);
++  void get_vm_result_2(Register metadata_result, Register thread);
++  void call_VM_leaf(address entry_point,
++                    int number_of_arguments = 0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2, Register arg_3);
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void super_call_VM_leaf(address entry_point);
++  void super_call_VM_leaf(address entry_point, Register arg_1);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
++
++  // last Java Frame (fills frame anchor)
++  void set_last_Java_frame(Register thread,
++                           Register last_java_sp,
++                           Register last_java_fp,
++                           Label& last_java_pc);
++
++  // thread in the default location (S6)
++  void set_last_Java_frame(Register last_java_sp,
++                           Register last_java_fp,
++                           Label& last_java_pc);
++
++  void reset_last_Java_frame(Register thread, bool clear_fp);
++
++  // thread in the default location (S6)
++  void reset_last_Java_frame(bool clear_fp);
++
++  // jobjects
++  void clear_jweak_tag(Register possibly_jweak);
++  void resolve_jobject(Register value, Register thread, Register tmp);
++
++  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
++  void c2bool(Register x);
++
++  void resolve_oop_handle(Register result, Register tmp);
++  void load_mirror(Register dst, Register method, Register tmp);
++
++  // oop manipulations
++  void load_klass(Register dst, Register src);
++  void store_klass(Register dst, Register src);
++
++  void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
++                      Register tmp1, Register thread_tmp);
++  void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
++                       Register tmp1, Register tmp2);
++
++  void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
++                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
++                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
++                      Register tmp2 = noreg, DecoratorSet decorators = 0);
++
++  // Used for storing NULL. All other oop constants should be
++  // stored using routines that take a jobject.
++  void store_heap_oop_null(Address dst);
++
++  void load_prototype_header(Register dst, Register src);
++
++  void store_klass_gap(Register dst, Register src);
++
++  void encode_heap_oop(Register r);
++  void encode_heap_oop(Register dst, Register src);
++  void decode_heap_oop(Register r);
++  void decode_heap_oop(Register dst, Register src);
++  void encode_heap_oop_not_null(Register r);
++  void decode_heap_oop_not_null(Register r);
++  void encode_heap_oop_not_null(Register dst, Register src);
++  void decode_heap_oop_not_null(Register dst, Register src);
++
++  void encode_klass_not_null(Register r);
++  void decode_klass_not_null(Register r);
++  void encode_klass_not_null(Register dst, Register src);
++  void decode_klass_not_null(Register dst, Register src);
++
++  // if heap base register is used - reinit it with the correct value
++  void reinit_heapbase();
++
++  DEBUG_ONLY(void verify_heapbase(const char* msg);)
++
++  void set_narrow_klass(Register dst, Klass* k);
++  void set_narrow_oop(Register dst, jobject obj);
++
++  // Sign extension
++  void sign_extend_short(Register reg) { ext_w_h(reg, reg); }
++  void sign_extend_byte(Register reg)  { ext_w_b(reg, reg); }
++  void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++  void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++
++  // allocation
++  void eden_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void tlab_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void incr_allocated_bytes(Register thread,
++                            Register var_size_in_bytes, int con_size_in_bytes,
++                            Register t1 = noreg);
++  // interface method calling
++  void lookup_interface_method(Register recv_klass,
++                               Register intf_klass,
++                               RegisterOrConstant itable_index,
++                               Register method_result,
++                               Register scan_temp,
++                               Label& no_such_interface,
++                               bool return_method = true);
++
++  // virtual method calling
++  void lookup_virtual_method(Register recv_klass,
++                             RegisterOrConstant vtable_index,
++                             Register method_result);
++
++  // Test sub_klass against super_klass, with fast and slow paths.
++
++  // The fast path produces a tri-state answer: yes / no / maybe-slow.
++  // One of the three labels can be NULL, meaning take the fall-through.
++  // If super_check_offset is -1, the value is loaded up from super_klass.
++  // No registers are killed, except temp_reg.
++  void check_klass_subtype_fast_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     Label* L_slow_path,
++                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
++
++  // The rest of the type check; must be wired to a corresponding fast path.
++  // It does not repeat the fast path logic, so don't use it standalone.
++  // The temp_reg and temp2_reg can be noreg, if no temps are available.
++  // Updates the sub's secondary super cache as necessary.
++  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
++  void check_klass_subtype_slow_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Register temp2_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     bool set_cond_codes = false);
++
++  // Simplified, combined version, good for typical uses.
++  // Falls through on failure.
++  void check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success);
++
++
++  // Debugging
++
++  // only if +VerifyOops
++  void verify_oop(Register reg, const char* s = "broken oop");
++  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
++  void verify_oop_subroutine();
++  // TODO: verify method and klass metadata (compare against vptr?)
++  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
++  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
++
++  #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
++  #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
++
++  // only if +VerifyFPU
++  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
++
++  // prints msg, dumps registers and stops execution
++  void stop(const char* msg);
++
++  // prints msg and continues
++  void warn(const char* msg);
++
++  static void debug(char* msg/*, RegistersForDebugging* regs*/);
++  static void debug64(char* msg, int64_t pc, int64_t regs[]);
++
++  void untested()                                { stop("untested"); }
++
++  void unimplemented(const char* what = "");
++
++  void should_not_reach_here()                   { stop("should not reach here"); }
++
++  void print_CPU_state();
++
++  // Stack overflow checking
++  void bang_stack_with_offset(int offset) {
++    // stack grows down, caller passes positive offset
++    assert(offset > 0, "must bang with negative offset");
++    if (offset <= 2048) {
++      st_w(A0, SP, -offset);
++    } else if (offset <= 32768 && !(offset & 3)) {
++      stptr_w(A0, SP, -offset);
++    } else {
++      li(AT, offset);
++      sub_d(AT, SP, AT);
++      st_w(A0, AT, 0); 
++    } 
++  }
++
++  // Writes to stack successive pages until offset reached to check for
++  // stack overflow + shadow pages.  Also, clobbers tmp
++  void bang_stack_size(Register size, Register tmp);
++
++  // Check for reserved stack access in method being exited (for JIT)
++  void reserved_stack_check();
++
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset);
++
++  // Support for serializing memory accesses between threads
++  void serialize_memory(Register thread, Register tmp);
++
++  void safepoint_poll(Label& slow_path, Register thread_reg);
++  void safepoint_poll_acquire(Label& slow_path, Register thread_reg);
++
++  //void verify_tlab();
++  void verify_tlab(Register t1, Register t2);
++
++  // Biased locking support
++  // lock_reg and obj_reg must be loaded up with the appropriate values.
++  // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
++  // be killed; if not supplied, push/pop will be used internally to
++  // allocate a temporary (inefficient, avoid if possible).
++  // Optional slow case is for implementations (interpreter and C1) which branch to
++  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
++  // Returns offset of first potentially-faulting instruction for null
++  // check info (currently consumed only by C1). If
++  // swap_reg_contains_mark is true then returns -1 as it is assumed
++  // the calling code has already passed any potential faults.
++  int biased_locking_enter(Register lock_reg, Register obj_reg,
++                           Register swap_reg, Register tmp_reg,
++                           bool swap_reg_contains_mark,
++                           Label& done, Label* slow_case = NULL,
++                           BiasedLockingCounters* counters = NULL);
++  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
++#ifdef COMPILER2
++  void cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed);
++  void cmp_branch_long(int flag, Register op1, Register op2, Label* L, bool is_signed);
++  void cmp_branchEqNe_off21(int flag, Register op1, Label& L);
++  void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr);
++  void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr);
++#endif
++
++
++  // the follow two might use AT register, be sure you have no meanful data in AT before you call them
++  void increment(Register reg, int imm);
++  void decrement(Register reg, int imm);
++  void increment(Address addr, int imm = 1);
++  void decrement(Address addr, int imm = 1);
++  void shl(Register reg, int sa)        { slli_d(reg, reg, sa); }
++  void shr(Register reg, int sa)        { srli_d(reg, reg, sa); }
++  void sar(Register reg, int sa)        { srai_d(reg, reg, sa); }
++  // Helper functions for statistics gathering.
++  void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2);
++
++  // Calls
++  void call(address entry);
++  void call(address entry, relocInfo::relocType rtype);
++  void call(address entry, RelocationHolder& rh);
++  void call_long(address entry);
++
++  address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL);
++
++  static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
++
++  static bool far_branches() {
++    if (ForceUnreachable) {
++      return true;
++    } else {
++      return ReservedCodeCacheSize > branch_range;
++    }
++  }
++
++  // Emit the CompiledIC call idiom
++  address ic_call(address entry, jint method_index = 0);
++
++  // Jumps
++  void jmp(address entry);
++  void jmp(address entry, relocInfo::relocType rtype);
++  void jmp_far(Label& L); // patchable
++
++  /* branches may exceed 16-bit offset */
++  void b_far(address entry);
++  void b_far(Label& L);
++
++  void bne_far    (Register rs, Register rt, address entry);
++  void bne_far    (Register rs, Register rt, Label& L);
++
++  void beq_far    (Register rs, Register rt, address entry);
++  void beq_far    (Register rs, Register rt, Label& L);
++
++  void blt_far    (Register rs, Register rt, address entry, bool is_signed);
++  void blt_far    (Register rs, Register rt, Label& L, bool is_signed);
++
++  void bge_far    (Register rs, Register rt, address entry, bool is_signed);
++  void bge_far    (Register rs, Register rt, Label& L, bool is_signed);
++
++  // For C2 to support long branches
++  void beq_long   (Register rs, Register rt, Label& L);
++  void bne_long   (Register rs, Register rt, Label& L);
++  void blt_long   (Register rs, Register rt, Label& L, bool is_signed);
++  void bge_long   (Register rs, Register rt, Label& L, bool is_signed);
++  void bc1t_long  (Label& L);
++  void bc1f_long  (Label& L);
++
++  static bool patchable_branches() {
++    const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
++    return ReservedCodeCacheSize > branch_range;
++  }
++
++  static bool reachable_from_branch_short(jlong offs);
++
++  void patchable_jump_far(Register ra, jlong offs);
++  void patchable_jump(address target, bool force_patchable = false);
++  void patchable_call(address target, address call_size = 0);
++
++  // Floating
++  void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi,
++                          address pio2, address dsin_coef, address dcos_coef);
++
++  // Data
++
++  // Load and store values by size and signed-ness
++  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
++  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
++
++  // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs
++  inline void ld_ptr(Register rt, Address a) {
++    ld_d(rt, a);
++  }
++
++  inline void ld_ptr(Register rt, Register base, int offset16) {
++    ld_d(rt, base, offset16);
++  }
++
++  // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs
++  inline void st_ptr(Register rt, Address a) {
++    st_d(rt, a);
++  }
++
++  inline void st_ptr(Register rt, Register base, int offset16) {
++    st_d(rt, base, offset16);
++  }
++
++  void ld_ptr(Register rt, Register base, Register offset);
++  void st_ptr(Register rt, Register base, Register offset);
++
++  // swap the two byte of the low 16-bit halfword
++  // this directive will use AT, be sure the high 16-bit of reg is zero
++  void hswap(Register reg);
++  void huswap(Register reg);
++
++  // convert big endian integer to little endian integer
++  void swap(Register reg);
++
++  void cmpxchg(Address addr, Register oldval, Register newval, Register resflag,
++               bool retold, bool barrier);
++  void cmpxchg(Address addr, Register oldval, Register newval, Register tmp,
++               bool retold, bool barrier, Label& succ, Label* fail = NULL);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag,
++                 bool sign, bool retold, bool barrier);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                 bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL);
++
++  void extend_sign(Register rh, Register rl) { /*stop("extend_sign");*/ guarantee(0, "LA not implemented yet");}
++  void neg(Register reg) { /*dsubu(reg, R0, reg);*/ guarantee(0, "LA not implemented yet");}
++  void push (Register reg)      { addi_d(SP, SP, -8); st_d  (reg, SP, 0); }
++  void push (FloatRegister reg) { addi_d(SP, SP, -8); fst_d (reg, SP, 0); }
++  void pop  (Register reg)      { ld_d  (reg, SP, 0);  addi_d(SP, SP, 8); }
++  void pop  (FloatRegister reg) { fld_d (reg, SP, 0);  addi_d(SP, SP, 8); }
++  void pop  ()                  { addi_d(SP, SP, 8); }
++  void pop2 ()                  { addi_d(SP, SP, 16); }
++  void push2(Register reg1, Register reg2);
++  void pop2 (Register reg1, Register reg2);
++  //we need 2 fun to save and resotre general register
++  void pushad();
++  void popad();
++  void pushad_except_v0();
++  void popad_except_v0();
++  void push(RegSet regs) { if (regs.bits()) push(regs.bits()); }
++  void pop(RegSet regs) { if (regs.bits()) pop(regs.bits()); }
++
++  void li(Register rd, jlong value);
++  void li(Register rd, address addr) { li(rd, (long)addr); }
++  void patchable_li52(Register rd, jlong value);
++  void lipc(Register rd, Label& L);
++
++  void move(Register rd, Register rs)   { orr(rd, rs, R0); }
++  void move_u32(Register rd, Register rs)   { add_w(rd, rs, R0); }
++  void mov_metadata(Register dst, Metadata* obj);
++  void mov_metadata(Address dst, Metadata* obj);
++
++  // Load the base of the cardtable byte map into reg.
++  void load_byte_map_base(Register reg);
++
++  // Code for java.lang.StringCoding::hasNegatives() instrinsic.
++  void has_negatives(Register ary1, Register len, Register result);
++
++  // Code for java.lang.StringUTF16::compress intrinsic.
++  void char_array_compress(Register src, Register dst, Register len,
++                           Register result, Register tmp1,
++                           Register tmp2, Register tmp3);
++
++  // Code for java.lang.StringLatin1::inflate intrinsic.
++  void byte_array_inflate(Register src, Register dst, Register len,
++                          Register tmp1, Register tmp2);
++
++  // Find index of char in UTF-16 string
++  void string_indexof_char(Register str1, Register cnt1,
++                           Register ch, Register result,
++                           Register tmp1, Register tmp2,
++                           Register tmp3);
++
++  //FIXME
++  void empty_FPU_stack(){/*need implemented*/};
++
++#ifdef COMPILER2
++  // Compare strings.
++  void string_compare(Register str1, Register str2,
++                      Register cnt1, Register cnt2, Register result,
++                      int ae);
++
++  // Compare char[] or byte[] arrays.
++  void arrays_equals(Register str1, Register str2,
++                     Register cnt, Register tmp1, Register tmp2, Register result,
++                     bool is_char);
++#endif
++
++  // method handles (JSR 292)
++  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
++
++
++  // LA added:
++  void jr  (Register reg)        { jirl(R0, reg, 0); }
++  void jalr(Register reg)        { jirl(RA, reg, 0); }
++  void nop ()                    { andi(R0, R0, 0); }
++  void andr(Register rd, Register rj, Register rk) { AND(rd, rj, rk); }
++  void xorr(Register rd, Register rj, Register rk) { XOR(rd, rj, rk); }
++  void orr (Register rd, Register rj, Register rk) {  OR(rd, rj, rk); }
++  void lea (Register rd, Address src);
++  void lea(Register dst, AddressLiteral adr);
++  static int  patched_branch(int dest_pos, int inst, int inst_pos);
++
++  // Conditional move
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                Register        dst,
++                Register        src1,
++                Register        src2,
++                CMCompare       cmp = EQ,
++                bool      is_signed = true);
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                Register        dst,
++                Register        src,
++                CMCompare       cmp = EQ,
++                bool      is_signed = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                Register        dst,
++                Register        src,
++                FloatRegister   tmp1,
++                FloatRegister   tmp2,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                FloatRegister   tmp1,
++                FloatRegister   tmp2,
++                CMCompare       cmp = EQ);
++
++  // CRC32 code for java.util.zip.CRC32::update() instrinsic.
++  void update_byte_crc32(Register crc, Register val, Register table);
++
++  // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
++  void kernel_crc32(Register crc, Register buf, Register len, Register tmp);
++
++  // CRC32C code for java.util.zip.CRC32C::updateBytes() instrinsic.
++  void kernel_crc32c(Register crc, Register buf, Register len, Register tmp);
++
++  void membar(Membar_mask_bits hint);
++
++  void bind(Label& L) {
++    Assembler::bind(L);
++    code()->clear_last_insn();
++  }
++
++  // Code for java.math.BigInteger::mulAdd intrinsic.
++  void mul_add(Register out, Register in, Register offset,
++               Register len, Register k);
++
++#undef VIRTUAL
++
++public:
++// Memory Data Type
++#define INT_TYPE 0x100
++#define FLOAT_TYPE 0x200
++#define SIGNED_TYPE 0x10
++#define UNSIGNED_TYPE 0x20
++
++  typedef enum {
++    LOAD_BYTE        = INT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_CHAR        = INT_TYPE | SIGNED_TYPE | 0x2,
++    LOAD_SHORT       = INT_TYPE | SIGNED_TYPE | 0x3,
++    LOAD_INT         = INT_TYPE | SIGNED_TYPE | 0x4,
++    LOAD_LONG        = INT_TYPE | SIGNED_TYPE | 0x5,
++    STORE_BYTE       = INT_TYPE | SIGNED_TYPE | 0x6,
++    STORE_CHAR       = INT_TYPE | SIGNED_TYPE | 0x7,
++    STORE_SHORT      = INT_TYPE | SIGNED_TYPE | 0x8,
++    STORE_INT        = INT_TYPE | SIGNED_TYPE | 0x9,
++    STORE_LONG       = INT_TYPE | SIGNED_TYPE | 0xa,
++    LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb,
++
++    LOAD_U_BYTE      = INT_TYPE | UNSIGNED_TYPE | 0x1,
++    LOAD_U_SHORT     = INT_TYPE | UNSIGNED_TYPE | 0x2,
++    LOAD_U_INT       = INT_TYPE | UNSIGNED_TYPE | 0x3,
++
++    LOAD_FLOAT       = FLOAT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_DOUBLE      = FLOAT_TYPE | SIGNED_TYPE | 0x2,
++    LOAD_VECTORX     = FLOAT_TYPE | SIGNED_TYPE | 0x3,
++    LOAD_VECTORY     = FLOAT_TYPE | SIGNED_TYPE | 0x4,
++    STORE_FLOAT      = FLOAT_TYPE | SIGNED_TYPE | 0x5,
++    STORE_DOUBLE     = FLOAT_TYPE | SIGNED_TYPE | 0x6,
++    STORE_VECTORX    = FLOAT_TYPE | SIGNED_TYPE | 0x7,
++    STORE_VECTORY    = FLOAT_TYPE | SIGNED_TYPE | 0x8
++  } CMLoadStoreDataType;
++
++  void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) {
++    assert((type & INT_TYPE), "must be General reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++  void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) {
++    assert((type & FLOAT_TYPE), "must be Float reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++#ifdef COMPILER2
++  void reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size);
++  void reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size);
++#endif
++
++private:
++  void push(unsigned int bitset);
++  void pop(unsigned int bitset);
++
++  template <typename T>
++  void loadstore_t(T reg, int base, int index, int scale, int disp, int type) {
++    if (index != 0) {
++        assert(((scale==0)&&(disp==0)), "only support base+index");
++        loadstore(reg, as_Register(base), as_Register(index), type);
++    } else {
++      loadstore(reg, as_Register(base), disp, type);
++    }
++  }
++  void loadstore(Register reg, Register base, int disp, int type);
++  void loadstore(Register reg, Register base, Register disp, int type);
++  void loadstore(FloatRegister reg, Register base, int disp, int type);
++  void loadstore(FloatRegister reg, Register base, Register disp, int type);
++
++#ifdef COMPILER2
++  void reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode);
++  void reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode);
++  void reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode);
++#endif
++  void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef);
++  void generate_kernel_cos(FloatRegister x, address dcos_coef);
++  void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2);
++  void generate__kernel_rem_pio2(address two_over_pi, address pio2);
++};
++
++/**
++ * class SkipIfEqual:
++ *
++ * Instantiating this class will result in assembly code being output that will
++ * jump around any code emitted between the creation of the instance and it's
++ * automatic destruction at the end of a scope block, depending on the value of
++ * the flag passed to the constructor, which will be checked at run-time.
++ */
++class SkipIfEqual {
++private:
++  MacroAssembler* _masm;
++  Label _label;
++
++public:
++  inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value)
++    : _masm(masm) {
++    _masm->li(AT, (address)flag_addr);
++    _masm->ld_b(AT, AT, 0);
++    if (value) {
++      _masm->bne(AT, R0, _label);
++    } else {
++      _masm->beq(AT, R0, _label);
++    }
++  }
++
++  ~SkipIfEqual();
++};
++
++#ifdef ASSERT
++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
++#endif
++
++struct tableswitch {
++  Register _reg;
++  int _insn_index; jint _first_key; jint _last_key;
++  Label _after;
++  Label _branches;
++};
++
++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp
+--- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp
+--- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,1625 @@
++/* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT)
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "macroAssembler_loongarch.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// The following code is a optimized version of fdlibm sin/cos implementation
++// (C code is in share/runtime/sharedRuntimeTrig.cpp) adapted for LOONGARCH64.
++
++// Please refer to sin/cos approximation via polynomial and
++// trigonometric argument reduction techniques to the following literature:
++//
++// [1] Muller, Jean-Michel, Nicolas Brisebarre, Florent De Dinechin,
++// Claude-Pierre Jeannerod, Vincent Lefevre, Guillaume Melquiond,
++// Nathalie Revol, Damien Stehlé, and Serge Torres:
++// Handbook of floating-point arithmetic.
++// Springer Science & Business Media, 2009.
++// [2] K. C. Ng
++// Argument Reduction for Huge Arguments: Good to the Last Bit
++// July 13, 1992, SunPro
++//
++// HOW TO READ THIS CODE:
++// This code consists of several functions. Each function has following header:
++// 1) Description
++// 2) C-pseudo code with differences from fdlibm marked by comments starting
++//        with "NOTE". Check unmodified fdlibm code in
++//        share/runtime/SharedRuntimeTrig.cpp
++// 3) Brief textual description of changes between fdlibm and current
++//        implementation along with optimization notes (if applicable)
++// 4) Assumptions, input and output
++// 5) (Optional) additional notes about intrinsic implementation
++// Each function is separated in blocks which follow the pseudo-code structure
++//
++// HIGH-LEVEL ALGORITHM DESCRIPTION:
++//    - entry point: generate_dsin_dcos(...);
++//    - check corner cases: NaN, INF, tiny argument.
++//    - check if |x| < Pi/4. Then approximate sin/cos via polynomial (kernel_sin/kernel_cos)
++//    -- else proceed to argument reduction routine (__ieee754_rem_pio2) and
++//           use reduced argument to get result via kernel_sin/kernel_cos
++//
++// HIGH-LEVEL CHANGES BETWEEN INTRINSICS AND FDLIBM:
++// 1) two_over_pi table fdlibm representation is int[], while intrinsic version
++// has these int values converted to double representation to load converted
++// double values directly (see stubRoutines_aarch4::_two_over_pi)
++// 2) Several loops are unrolled and vectorized: see comments in code after
++// labels: SKIP_F_LOAD, RECOMP_FOR1_CHECK, RECOMP_FOR2
++// 3) fdlibm npio2_hw table now has "prefix" with constants used in
++// calculation. These constants are loaded from npio2_hw table instead of
++// constructing it in code (see stubRoutines_loongarch64.cpp)
++// 4) Polynomial coefficients for sin and cos are moved to table sin_coef
++// and cos_coef to use the same optimization as in 3). It allows to load most of
++// required constants via single instruction
++//
++//
++//
++///* __ieee754_rem_pio2(x,y)
++// *
++// * returns the remainder of x rem pi/2 in y[0]+y[1] (i.e. like x div pi/2)
++// * x is input argument, y[] is hi and low parts of reduced argument (x)
++// * uses __kernel_rem_pio2()
++// */
++// // use tables(see stubRoutines_loongarch64.cpp): two_over_pi and modified npio2_hw
++//
++// BEGIN __ieee754_rem_pio2 PSEUDO CODE
++//
++//static int __ieee754_rem_pio2(double x, double *y) {
++//  double z,w,t,r,fn;
++//  double tx[3];
++//  int e0,i,j,nx,n,ix,hx,i0;
++//
++//  i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
++//  hx = *(i0+(int*)&x);          /* high word of x */
++//  ix = hx&0x7fffffff;
++//  if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
++//    if(hx>0) {
++//      z = x - pio2_1;
++//      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
++//        y[0] = z - pio2_1t;
++//        y[1] = (z-y[0])-pio2_1t;
++//      } else {                /* near pi/2, use 33+33+53 bit pi */
++//        z -= pio2_2;
++//        y[0] = z - pio2_2t;
++//        y[1] = (z-y[0])-pio2_2t;
++//      }
++//      return 1;
++//    } else {    /* negative x */
++//      z = x + pio2_1;
++//      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
++//        y[0] = z + pio2_1t;
++//        y[1] = (z-y[0])+pio2_1t;
++//      } else {                /* near pi/2, use 33+33+53 bit pi */
++//        z += pio2_2;
++//        y[0] = z + pio2_2t;
++//        y[1] = (z-y[0])+pio2_2t;
++//      }
++//      return -1;
++//    }
++//  }
++//  if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
++//    t  = fabsd(x);
++//    n  = (int) (t*invpio2+half);
++//    fn = (double)n;
++//    r  = t-fn*pio2_1;
++//    w  = fn*pio2_1t;    /* 1st round good to 85 bit */
++//    // NOTE: y[0] = r-w; is moved from if/else below to be before "if"
++//    y[0] = r-w;
++//    if(n<32&&ix!=npio2_hw[n-1]) {
++//      // y[0] = r-w;       /* quick check no cancellation */ // NOTE: moved earlier
++//    } else {
++//      j  = ix>>20;
++//      // y[0] = r-w; // NOTE: moved earlier
++//      i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++//      if(i>16) {  /* 2nd iteration needed, good to 118 */
++//        t  = r;
++//        w  = fn*pio2_2;
++//        r  = t-w;
++//        w  = fn*pio2_2t-((t-r)-w);
++//        y[0] = r-w;
++//        i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++//        if(i>49)  {     /* 3rd iteration need, 151 bits acc */
++//          t  = r;       /* will cover all possible cases */
++//          w  = fn*pio2_3;
++//          r  = t-w;
++//          w  = fn*pio2_3t-((t-r)-w);
++//          y[0] = r-w;
++//        }
++//      }
++//    }
++//    y[1] = (r-y[0])-w;
++//    if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
++//    else         return n;
++//  }
++//  /*
++//   * all other (large) arguments
++//   */
++//  // NOTE: this check is removed, because it was checked in dsin/dcos
++//  // if(ix>=0x7ff00000) {          /* x is inf or NaN */
++//  //  y[0]=y[1]=x-x; return 0;
++//  // }
++//  /* set z = scalbn(|x|,ilogb(x)-23) */
++//  *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
++//  e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */
++//  *(i0+(int*)&z) = ix - (e0<<20);
++//
++//  // NOTE: "for" loop below in unrolled. See comments in asm code
++//  for(i=0;i<2;i++) {
++//    tx[i] = (double)((int)(z));
++//    z     = (z-tx[i])*two24A;
++//  }
++//
++//  tx[2] = z;
++//  nx = 3;
++//
++//  // NOTE: while(tx[nx-1]==zeroA) nx--;  is unrolled. See comments in asm code
++//  while(tx[nx-1]==zeroA) nx--;  /* skip zero term */
++//
++//  n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
++//  if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
++//  return n;
++//}
++//
++// END __ieee754_rem_pio2 PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic for __ieee754_rem_pio2:
++//     1. INF/NaN check for huge argument is removed in comparison with fdlibm
++//     code, because this check is already done in dcos/dsin code
++//     2. Most constants are now loaded from table instead of direct initialization
++//     3. Two loops are unrolled
++// Assumptions:
++//     1. Assume |X| >= PI/4
++//     2. Assume SCR1 = 0x3fe921fb00000000  (~ PI/4)
++//     3. Assume ix = A3
++// Input and output:
++//     1. Input: X = A0
++//     2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5
++// NOTE: general purpose register names match local variable names in C code
++// NOTE: fpu registers are actively reused. See comments in code about their usage
++void MacroAssembler::generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2) {
++  const int64_t PIO2_1t = 0x3DD0B4611A626331ULL;
++  const int64_t PIO2_2  = 0x3DD0B4611A600000ULL;
++  const int64_t PIO2_2t = 0x3BA3198A2E037073ULL;
++  Label X_IS_NEGATIVE, X_IS_MEDIUM_OR_LARGE, X_IS_POSITIVE_LONG_PI, LARGE_ELSE,
++        REDUCTION_DONE, X_IS_MEDIUM_BRANCH_DONE, X_IS_LARGE, NX_SET,
++        X_IS_NEGATIVE_LONG_PI;
++  Register X = A0, n = A2, ix = A3, jv = A4, tmp5 = A5, jx = A6,
++           tmp3 = A7, iqBase = T0, ih = T1, i = T2;
++  FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7,
++                vt = FT1, v24 = FT8, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v31 = FT15;
++
++  push2(S0, S1);
++
++    // initializing constants first
++    li(SCR1, 0x3ff921fb54400000); // PIO2_1
++    li(SCR2, 0x4002d97c); // 3*PI/4 high word
++    movgr2fr_d(v1, SCR1); // v1 = PIO2_1
++    bge(ix, SCR2, X_IS_MEDIUM_OR_LARGE);
++
++    block_comment("if(ix<0x4002d97c) {...  /* |x| ~< 3pi/4 */ "); {
++      blt(X, R0, X_IS_NEGATIVE);
++
++      block_comment("if(hx>0) {"); {
++        fsub_d(v2, v0, v1); // v2 = z = x - pio2_1
++        srli_d(SCR1, SCR1, 32);
++        li(n, 1);
++        beq(ix, SCR1, X_IS_POSITIVE_LONG_PI);
++
++        block_comment("case: hx > 0 &&  ix!=0x3ff921fb {"); { /* 33+53 bit pi is good enough */
++          li(SCR2, PIO2_1t);
++          movgr2fr_d(v27, SCR2);
++          fsub_d(v4, v2, v27); // v4 = y[0] = z - pio2_1t;
++          fsub_d(v5, v2, v4);
++          fsub_d(v5, v5, v27); // v5 = y[1] = (z-y[0])-pio2_1t
++          b(REDUCTION_DONE);
++        }
++
++        block_comment("case: hx > 0 &*& ix==0x3ff921fb {"); { /* near pi/2, use 33+33+53 bit pi */
++          bind(X_IS_POSITIVE_LONG_PI);
++            li(SCR1, PIO2_2);
++            li(SCR2, PIO2_2t);
++            movgr2fr_d(v27, SCR1);
++            movgr2fr_d(v6, SCR2);
++            fsub_d(v2, v2, v27); // z-= pio2_2
++            fsub_d(v4, v2, v6);  // y[0] = z - pio2_2t
++            fsub_d(v5, v2, v4);
++            fsub_d(v5, v5, v6);  // v5 = (z - y[0]) - pio2_2t
++            b(REDUCTION_DONE);
++        }
++      }
++
++      block_comment("case: hx <= 0)"); {
++        bind(X_IS_NEGATIVE);
++          fadd_d(v2, v0, v1); // v2 = z = x + pio2_1
++          srli_d(SCR1, SCR1, 32);
++          li(n, -1);
++          beq(ix, SCR1, X_IS_NEGATIVE_LONG_PI);
++
++          block_comment("case: hx <= 0 && ix!=0x3ff921fb) {"); { /* 33+53 bit pi is good enough */
++            li(SCR2, PIO2_1t);
++            movgr2fr_d(v27, SCR2);
++            fadd_d(v4, v2, v27); // v4 = y[0] = z + pio2_1t;
++            fsub_d(v5, v2, v4);
++            fadd_d(v5, v5, v27); // v5 = y[1] = (z-y[0]) + pio2_1t
++            b(REDUCTION_DONE);
++          }
++
++          block_comment("case: hx <= 0 && ix==0x3ff921fb"); { /* near pi/2, use 33+33+53 bit pi */
++            bind(X_IS_NEGATIVE_LONG_PI);
++              li(SCR1, PIO2_2);
++              li(SCR2, PIO2_2t);
++              movgr2fr_d(v27, SCR1);
++              movgr2fr_d(v6, SCR2);
++              fadd_d(v2, v2, v27); // z += pio2_2
++              fadd_d(v4, v2, v6);  // y[0] = z + pio2_2t
++              fsub_d(v5, v2, v4);
++              fadd_d(v5, v5, v6);  // v5 = (z - y[0]) + pio2_2t
++              b(REDUCTION_DONE);
++          }
++      }
++  }
++  bind(X_IS_MEDIUM_OR_LARGE);
++    li(SCR1, 0x413921fb);
++    blt(SCR1, ix, X_IS_LARGE); // ix < = 0x413921fb ?
++
++    block_comment("|x| ~<= 2^19*(pi/2), medium size"); {
++      li(ih, npio2_hw);
++      fld_d(v4, ih, 0);
++      fld_d(v5, ih, 8);
++      fld_d(v6, ih, 16);
++      fld_d(v7, ih, 24);
++      fabs_d(v31, v0);           // v31 = t = |x|
++      addi_d(ih, ih, 64);
++      fmadd_d(v2, v31, v5, v4);  // v2 = t * invpio2 + half (invpio2 = 53 bits of 2/pi, half = 0.5)
++      ftintrz_w_d(vt, v2);       // n = (int) v2
++      movfr2gr_s(n, vt);
++      vfrintrz_d(v2, v2);
++      fnmsub_d(v3, v2, v6, v31); // v3 = r = t - fn * pio2_1
++      fmul_d(v26, v2, v7);       // v26 = w = fn * pio2_1t
++      fsub_d(v4, v3, v26);       // y[0] = r - w. Calculated before branch
++      li(SCR1, 32);
++      blt(SCR1, n, LARGE_ELSE);
++      addi_w(tmp5, n, -1);       // tmp5 = n - 1
++      alsl_d(tmp5, tmp5, ih, 2 - 1);
++      ld_w(jv, tmp5, 0);
++      bne(ix, jv, X_IS_MEDIUM_BRANCH_DONE);
++
++      block_comment("else block for if(n<32&&ix!=npio2_hw[n-1])"); {
++        bind(LARGE_ELSE);
++          movfr2gr_d(jx, v4);
++          srli_d(tmp5, ix, 20);                    // j = ix >> 20
++          slli_d(jx, jx, 1);
++          srli_d(tmp3, jx, 32 + 20 + 1);           // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++          sub_d(tmp3, tmp5, tmp3);
++
++          block_comment("if(i>16)"); {
++            li(SCR1, 16);
++            bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE);
++            // i > 16. 2nd iteration needed
++            fld_d(v6, ih, -32);
++            fld_d(v7, ih, -24);
++            fmov_d(v28, v3);                        // t = r
++            fmul_d(v29, v2, v6);                    // w = v29 = fn * pio2_2
++            fsub_d(v3, v28, v29);                   // r = t - w
++            fsub_d(v31, v28, v3);                   // v31 = (t - r)
++            fsub_d(v31, v29, v31);                  // v31 = w - (t - r) = - ((t - r) - w)
++            fmadd_d(v26, v2, v7, v31);              // v26 = w = fn*pio2_2t - ((t - r) - w)
++            fsub_d(v4, v3, v26);                    // y[0] = r - w
++            movfr2gr_d(jx, v4);
++            slli_d(jx, jx, 1);
++            srli_d(tmp3, jx, 32 + 20 + 1);          // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++            sub_d(tmp3, tmp5, tmp3);
++
++            block_comment("if(i>49)"); {
++              li(SCR1, 49);
++              bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE);
++              // 3rd iteration need, 151 bits acc
++              fld_d(v6, ih, -16);
++              fld_d(v7, ih, -8);
++              fmov_d(v28, v3);                      // save "r"
++              fmul_d(v29, v2, v6);                  // v29 = fn * pio2_3
++              fsub_d(v3, v28, v29);                 // r = r - w
++              fsub_d(v31, v28, v3);                 // v31 = (t - r)
++              fsub_d(v31, v29, v31);                // v31 = w - (t - r) = - ((t - r) - w)
++              fmadd_d(v26, v2, v7, v31);            // v26 = w = fn*pio2_3t - ((t - r) - w)
++              fsub_d(v4, v3, v26);                  // y[0] = r - w
++            }
++          }
++      }
++    block_comment("medium x tail"); {
++      bind(X_IS_MEDIUM_BRANCH_DONE);
++        fsub_d(v5, v3, v4);                         // v5 = y[1] = (r - y[0])
++        fsub_d(v5, v5, v26);                        // v5 = y[1] = (r - y[0]) - w
++        blt(R0, X, REDUCTION_DONE);
++        fneg_d(v4, v4);
++        sub_w(n, R0, n);
++        fneg_d(v5, v5);
++        b(REDUCTION_DONE);
++    }
++  }
++
++  block_comment("all other (large) arguments"); {
++    bind(X_IS_LARGE);
++      srli_d(SCR1, ix, 20);                        // ix >> 20
++      li(tmp5, 0x4170000000000000);
++      addi_w(SCR1, SCR1, -1046);                   // e0
++      movgr2fr_d(v24, tmp5);                       // init two24A value
++      slli_w(jv, SCR1, 20);                        // ix - (e0<<20)
++      sub_w(jv, ix, jv);
++      slli_d(jv, jv, 32);
++      addi_w(SCR2, SCR1, -3);
++      bstrins_d(jv, X, 31, 0);                     // jv = z
++      li(i, 24);
++      movgr2fr_d(v26, jv);                         // v26 = z
++
++      block_comment("unrolled for(i=0;i<2;i++) {tx[i] = (double)((int)(z));z = (z-tx[i])*two24A;}"); {
++        // tx[0,1,2] = v6,v7,v26
++        vfrintrz_d(v6, v26);                       // v6 = (double)((int)v26)
++        div_w(jv, SCR2, i);                        // jv = (e0 - 3)/24
++        fsub_d(v26, v26, v6);
++        addi_d(SP, SP, -560);
++        fmul_d(v26, v26, v24);
++        vfrintrz_d(v7, v26);                       // v7 = (double)((int)v26)
++        li(jx, 2); // calculate jx as nx - 1, which is initially 2. Not a part of unrolled loop
++        fsub_d(v26, v26, v7);
++      }
++
++      block_comment("nx calculation with unrolled while(tx[nx-1]==zeroA) nx--;"); {
++        vxor_v(vt, vt, vt);
++        fcmp_cne_d(FCC0, v26, vt);                 // if NE then jx == 2. else it's 1 or 0
++        addi_d(iqBase, SP, 480);                   // base of iq[]
++        fmul_d(v3, v26, v24);
++        bcnez(FCC0, NX_SET);
++        fcmp_cne_d(FCC0, v7, vt);                  // v7 == 0 => jx = 0. Else jx = 1
++        movcf2gr(jx, FCC0);
++      }
++    bind(NX_SET);
++      generate__kernel_rem_pio2(two_over_pi, pio2);
++      // now we have y[0] = v4, y[1] = v5 and n = r2
++      bge(X, R0, REDUCTION_DONE);
++      fneg_d(v4, v4);
++      fneg_d(v5, v5);
++      sub_w(n, R0, n);
++  }
++  bind(REDUCTION_DONE);
++
++  pop2(S0, S1);
++}
++
++///*
++// * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
++// * double x[],y[]; int e0,nx,prec; int ipio2[];
++// *
++// * __kernel_rem_pio2 return the last three digits of N with
++// *              y = x - N*pi/2
++// * so that |y| < pi/2.
++// *
++// * The method is to compute the integer (mod 8) and fraction parts of
++// * (2/pi)*x without doing the full multiplication. In general we
++// * skip the part of the product that are known to be a huge integer (
++// * more accurately, = 0 mod 8 ). Thus the number of operations are
++// * independent of the exponent of the input.
++// *
++// * NOTE: 2/pi int representation is converted to double
++// * // (2/pi) is represented by an array of 24-bit integers in ipio2[].
++// *
++// * Input parameters:
++// *      x[]     The input value (must be positive) is broken into nx
++// *              pieces of 24-bit integers in double precision format.
++// *              x[i] will be the i-th 24 bit of x. The scaled exponent
++// *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
++// *              match x's up to 24 bits.
++// *
++// *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
++// *                      e0 = ilogb(z)-23
++// *                      z  = scalbn(z,-e0)
++// *              for i = 0,1,2
++// *                      x[i] = floor(z)
++// *                      z    = (z-x[i])*2**24
++// *
++// *
++// *      y[]     ouput result in an array of double precision numbers.
++// *              The dimension of y[] is:
++// *                      24-bit  precision       1
++// *                      53-bit  precision       2
++// *                      64-bit  precision       2
++// *                      113-bit precision       3
++// *              The actual value is the sum of them. Thus for 113-bit
++// *              precsion, one may have to do something like:
++// *
++// *              long double t,w,r_head, r_tail;
++// *              t = (long double)y[2] + (long double)y[1];
++// *              w = (long double)y[0];
++// *              r_head = t+w;
++// *              r_tail = w - (r_head - t);
++// *
++// *      e0      The exponent of x[0]
++// *
++// *      nx      dimension of x[]
++// *
++// *      prec    an interger indicating the precision:
++// *                      0       24  bits (single)
++// *                      1       53  bits (double)
++// *                      2       64  bits (extended)
++// *                      3       113 bits (quad)
++// *
++// *      NOTE: ipio2[] array below is converted to double representation
++// *      //ipio2[]
++// *      //        integer array, contains the (24*i)-th to (24*i+23)-th
++// *      //        bit of 2/pi after binary point. The corresponding
++// *      //        floating value is
++// *
++// *                      ipio2[i] * 2^(-24(i+1)).
++// *
++// * Here is the description of some local variables:
++// *
++// *      jk      jk+1 is the initial number of terms of ipio2[] needed
++// *              in the computation. The recommended value is 2,3,4,
++// *              6 for single, double, extended,and quad.
++// *
++// *      jz      local integer variable indicating the number of
++// *              terms of ipio2[] used.
++// *
++// *      jx      nx - 1
++// *
++// *      jv      index for pointing to the suitable ipio2[] for the
++// *              computation. In general, we want
++// *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
++// *              is an integer. Thus
++// *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
++// *              Hence jv = max(0,(e0-3)/24).
++// *
++// *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
++// *
++// *      q[]     double array with integral value, representing the
++// *              24-bits chunk of the product of x and 2/pi.
++// *
++// *      q0      the corresponding exponent of q[0]. Note that the
++// *              exponent for q[i] would be q0-24*i.
++// *
++// *      PIo2[]  double precision array, obtained by cutting pi/2
++// *              into 24 bits chunks.
++// *
++// *      f[]     ipio2[] in floating point
++// *
++// *      iq[]    integer array by breaking up q[] in 24-bits chunk.
++// *
++// *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
++// *
++// *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
++// *              it also indicates the *sign* of the result.
++// *
++// */
++//
++// Use PIo2 table(see stubRoutines_loongarch64.cpp)
++//
++// BEGIN __kernel_rem_pio2 PSEUDO CODE
++//
++//static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, /* NOTE: converted to double */ const double *ipio2 // const int *ipio2) {
++//  int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
++//  double z,fw,f[20],fq[20],q[20];
++//
++//  /* initialize jk*/
++//  // jk = init_jk[prec]; // NOTE: prec==2 for double. jk is always 4.
++//  jp = jk; // NOTE: always 4
++//
++//  /* determine jx,jv,q0, note that 3>q0 */
++//  jx =  nx-1;
++//  jv = (e0-3)/24; if(jv<0) jv=0;
++//  q0 =  e0-24*(jv+1);
++//
++//  /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
++//  j = jv-jx; m = jx+jk;
++//
++//  // NOTE: split into two for-loops: one with zeroB and one with ipio2[j]. It
++//  //       allows the use of wider loads/stores
++//  for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; //(double) ipio2[j];
++//
++//  // NOTE: unrolled and vectorized "for". See comments in asm code
++//  /* compute q[0],q[1],...q[jk] */
++//  for (i=0;i<=jk;i++) {
++//    for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
++//  }
++//
++//  jz = jk;
++//recompute:
++//  /* distill q[] into iq[] reversingly */
++//  for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
++//    fw    =  (double)((int)(twon24* z));
++//    iq[i] =  (int)(z-two24B*fw);
++//    z     =  q[j-1]+fw;
++//  }
++//
++//  /* compute n */
++//  z  = scalbnA(z,q0);           /* actual value of z */
++//  z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */
++//  n  = (int) z;
++//  z -= (double)n;
++//  ih = 0;
++//  if(q0>0) {    /* need iq[jz-1] to determine n */
++//    i  = (iq[jz-1]>>(24-q0)); n += i;
++//    iq[jz-1] -= i<<(24-q0);
++//    ih = iq[jz-1]>>(23-q0);
++//  }
++//  else if(q0==0) ih = iq[jz-1]>>23;
++//  else if(z>=0.5) ih=2;
++//
++//  if(ih>0) {    /* q > 0.5 */
++//    n += 1; carry = 0;
++//    for(i=0;i<jz ;i++) {        /* compute 1-q */
++//      j = iq[i];
++//      if(carry==0) {
++//        if(j!=0) {
++//          carry = 1; iq[i] = 0x1000000- j;
++//        }
++//      } else  iq[i] = 0xffffff - j;
++//    }
++//    if(q0>0) {          /* rare case: chance is 1 in 12 */
++//      switch(q0) {
++//      case 1:
++//        iq[jz-1] &= 0x7fffff; break;
++//      case 2:
++//        iq[jz-1] &= 0x3fffff; break;
++//      }
++//    }
++//    if(ih==2) {
++//      z = one - z;
++//      if(carry!=0) z -= scalbnA(one,q0);
++//    }
++//  }
++//
++//  /* check if recomputation is needed */
++//  if(z==zeroB) {
++//    j = 0;
++//    for (i=jz-1;i>=jk;i--) j |= iq[i];
++//    if(j==0) { /* need recomputation */
++//      for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
++//
++//      for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
++//        f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i];
++//        for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
++//        q[i] = fw;
++//      }
++//      jz += k;
++//      goto recompute;
++//    }
++//  }
++//
++//  /* chop off zero terms */
++//  if(z==0.0) {
++//    jz -= 1; q0 -= 24;
++//    while(iq[jz]==0) { jz--; q0-=24;}
++//  } else { /* break z into 24-bit if necessary */
++//    z = scalbnA(z,-q0);
++//    if(z>=two24B) {
++//      fw = (double)((int)(twon24*z));
++//      iq[jz] = (int)(z-two24B*fw);
++//      jz += 1; q0 += 24;
++//      iq[jz] = (int) fw;
++//    } else iq[jz] = (int) z ;
++//  }
++//
++//  /* convert integer "bit" chunk to floating-point value */
++//  fw = scalbnA(one,q0);
++//  for(i=jz;i>=0;i--) {
++//    q[i] = fw*(double)iq[i]; fw*=twon24;
++//  }
++//
++//  /* compute PIo2[0,...,jp]*q[jz,...,0] */
++//  for(i=jz;i>=0;i--) {
++//    for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
++//    fq[jz-i] = fw;
++//  }
++//
++//  // NOTE: switch below is eliminated, because prec is always 2 for doubles
++//  /* compress fq[] into y[] */
++//  //switch(prec) {
++//  //case 0:
++//  //  fw = 0.0;
++//  //  for (i=jz;i>=0;i--) fw += fq[i];
++//  //  y[0] = (ih==0)? fw: -fw;
++//  //  break;
++//  //case 1:
++//  //case 2:
++//    fw = 0.0;
++//    for (i=jz;i>=0;i--) fw += fq[i];
++//    y[0] = (ih==0)? fw: -fw;
++//    fw = fq[0]-fw;
++//    for (i=1;i<=jz;i++) fw += fq[i];
++//    y[1] = (ih==0)? fw: -fw;
++//  //  break;
++//  //case 3:       /* painful */
++//  //  for (i=jz;i>0;i--) {
++//  //    fw      = fq[i-1]+fq[i];
++//  // fq[i]  += fq[i-1]-fw;
++//  //    fq[i-1] = fw;
++//  //  }
++//  //  for (i=jz;i>1;i--) {
++//  //    fw      = fq[i-1]+fq[i];
++//  //    fq[i]  += fq[i-1]-fw;
++//  //    fq[i-1] = fw;
++//  //  }
++//  //  for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
++//  //  if(ih==0) {
++//  //    y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
++//  //  } else {
++//  //    y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
++//  //  }
++//  //}
++//  return n&7;
++//}
++//
++// END __kernel_rem_pio2 PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. One loop is unrolled and vectorized (see comments in code)
++//     2. One loop is split into 2 loops (see comments in code)
++//     3. Non-double code is removed(last switch). Sevaral variables became
++//         constants because of that (see comments in code)
++//     4. Use of jx, which is nx-1 instead of nx
++// Assumptions:
++//     1. Assume |X| >= PI/4
++// Input and output:
++//     1. Input: X = A0, jx == nx - 1 == A6, e0 == SCR1
++//     2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5
++// NOTE: general purpose register names match local variable names in C code
++// NOTE: fpu registers are actively reused. See comments in code about their usage
++void MacroAssembler::generate__kernel_rem_pio2(address two_over_pi, address pio2) {
++  Label Q_DONE, JX_IS_0, JX_IS_2, COMP_INNER_LOOP, RECOMP_FOR2, Q0_ZERO_CMP_LT,
++        RECOMP_CHECK_DONE_NOT_ZERO, Q0_ZERO_CMP_DONE, COMP_FOR, Q0_ZERO_CMP_EQ,
++        INIT_F_ZERO, RECOMPUTE, IH_FOR_INCREMENT, IH_FOR_STORE, RECOMP_CHECK_DONE,
++        Z_IS_LESS_THAN_TWO24B, Z_IS_ZERO, FW_Y1_NO_NEGATION,
++        RECOMP_FW_UPDATED, Z_ZERO_CHECK_DONE, FW_FOR1, IH_AFTER_SWITCH, IH_HANDLED,
++        CONVERTION_FOR, FW_Y0_NO_NEGATION, FW_FOR1_DONE, FW_FOR2, FW_FOR2_DONE,
++        IH_FOR, SKIP_F_LOAD, RECOMP_FOR1, RECOMP_FIRST_FOR, INIT_F_COPY,
++        RECOMP_FOR1_CHECK;
++  Register tmp2 = A1, n = A2, jv = A4, tmp5 = A5, jx = A6,
++           tmp3 = A7, iqBase = T0, ih = T1, i = T2, tmp1 = T3,
++           jz = S0, j = T5, twoOverPiBase = T6, tmp4 = S1, qBase = T8;
++  FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7,
++                vt = FT1, v17 = FT2, v18 = FT3, v19 = FT4, v20 = FT5, v21 = FT6, v22 = FT7, v24 = FT8,
++                v25 = FT9, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v30 = FT14, v31 = FT15;
++    // jp = jk == init_jk[prec] = init_jk[2] == {2,3,4,6}[2] == 4
++    // jx = nx - 1
++    li(twoOverPiBase, two_over_pi);
++    slti(SCR2, jv, 0);
++    addi_w(tmp4, jx, 4); // tmp4 = m = jx + jk = jx + 4. jx is in {0,1,2} so m is in [4,5,6]
++    masknez(jv, jv, SCR2);
++    if (UseLASX)
++      xvxor_v(v26, v26, v26);
++    else
++      vxor_v(v26, v26, v26);
++    addi_w(tmp5, jv, 1);                    // jv+1
++    sub_w(j, jv, jx);
++    addi_d(qBase, SP, 320);                 // base of q[]
++    mul_w(SCR2, i, tmp5);                   // q0 =  e0-24*(jv+1)
++    sub_w(SCR1, SCR1, SCR2);
++    // use double f[20], fq[20], q[20], iq[20] on stack, which is
++    // (20 + 20 + 20) x 8 + 20 x 4 = 560 bytes. From lower to upper addresses it
++    // will contain f[20], fq[20], q[20], iq[20]
++    // now initialize f[20] indexes 0..m (inclusive)
++    // for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j];
++    move(tmp5, SP);
++
++    block_comment("for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j];"); {
++        xorr(i, i, i);
++        bge(j, R0, INIT_F_COPY);
++      bind(INIT_F_ZERO);
++        if (UseLASX) {
++          xvst(v26, tmp5, 0);
++        } else {
++          vst(v26, tmp5, 0);
++          vst(v26, tmp5, 16);
++        }
++        addi_d(tmp5, tmp5, 32);
++        addi_w(i, i, 4);
++        addi_w(j, j, 4);
++        blt(j, R0, INIT_F_ZERO);
++        sub_w(i, i, j);
++        move(j, R0);
++      bind(INIT_F_COPY);
++        alsl_d(tmp1, j, twoOverPiBase, 3 - 1); // ipio2[j] start address
++        if (UseLASX) {
++          xvld(v18, tmp1, 0);
++          xvld(v19, tmp1, 32);
++        } else {
++          vld(v18, tmp1, 0);
++          vld(v19, tmp1, 16);
++          vld(v20, tmp1, 32);
++          vld(v21, tmp1, 48);
++        }
++        alsl_d(tmp5, i, SP, 3 - 1);
++        if (UseLASX) {
++          xvst(v18, tmp5, 0);
++          xvst(v19, tmp5, 32);
++        } else {
++          vst(v18, tmp5, 0);
++          vst(v19, tmp5, 16);
++          vst(v20, tmp5, 32);
++          vst(v21, tmp5, 48);
++        }
++    }
++    // v18..v21 can actually contain f[0..7]
++    beqz(i, SKIP_F_LOAD); // i == 0 => f[i] == f[0] => already loaded
++    if (UseLASX) {
++      xvld(v18, SP, 0);   // load f[0..7]
++      xvld(v19, SP, 32);
++    } else {
++      vld(v18, SP, 0);    // load f[0..7]
++      vld(v19, SP, 16);
++      vld(v20, SP, 32);
++      vld(v21, SP, 48);
++    }
++  bind(SKIP_F_LOAD);
++    // calculate 2^q0 and 2^-q0, which we'll need further.
++    // q0 is exponent. So, calculate biased exponent(q0+1023)
++    sub_w(tmp4, R0, SCR1);
++    addi_w(tmp5, SCR1, 1023);
++    addi_w(tmp4, tmp4, 1023);
++    // Unroll following for(s) depending on jx in [0,1,2]
++    // for (i=0;i<=jk;i++) {
++    //   for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
++    // }
++    // Unrolling for jx == 0 case:
++    //   q[0] = x[0] * f[0]
++    //   q[1] = x[0] * f[1]
++    //   q[2] = x[0] * f[2]
++    //   q[3] = x[0] * f[3]
++    //   q[4] = x[0] * f[4]
++    //
++    // Vectorization for unrolled jx == 0 case:
++    //   {q[0], q[1]} = {f[0], f[1]} * x[0]
++    //   {q[2], q[3]} = {f[2], f[3]} * x[0]
++    //   q[4] = f[4] * x[0]
++    //
++    // Unrolling for jx == 1 case:
++    //   q[0] = x[0] * f[1] + x[1] * f[0]
++    //   q[1] = x[0] * f[2] + x[1] * f[1]
++    //   q[2] = x[0] * f[3] + x[1] * f[2]
++    //   q[3] = x[0] * f[4] + x[1] * f[3]
++    //   q[4] = x[0] * f[5] + x[1] * f[4]
++    //
++    // Vectorization for unrolled jx == 1 case:
++    //   {q[0], q[1]} = {f[0], f[1]} * x[1]
++    //   {q[2], q[3]} = {f[2], f[3]} * x[1]
++    //   q[4] = f[4] * x[1]
++    //   {q[0], q[1]} += {f[1], f[2]} * x[0]
++    //   {q[2], q[3]} += {f[3], f[4]} * x[0]
++    //   q[4] += f[5] * x[0]
++    //
++    // Unrolling for jx == 2 case:
++    //   q[0] = x[0] * f[2] + x[1] * f[1] + x[2] * f[0]
++    //   q[1] = x[0] * f[3] + x[1] * f[2] + x[2] * f[1]
++    //   q[2] = x[0] * f[4] + x[1] * f[3] + x[2] * f[2]
++    //   q[3] = x[0] * f[5] + x[1] * f[4] + x[2] * f[3]
++    //   q[4] = x[0] * f[6] + x[1] * f[5] + x[2] * f[4]
++    //
++    // Vectorization for unrolled jx == 2 case:
++    //   {q[0], q[1]} = {f[0], f[1]} * x[2]
++    //   {q[2], q[3]} = {f[2], f[3]} * x[2]
++    //   q[4] = f[4] * x[2]
++    //   {q[0], q[1]} += {f[1], f[2]} * x[1]
++    //   {q[2], q[3]} += {f[3], f[4]} * x[1]
++    //   q[4] += f[5] * x[1]
++    //   {q[0], q[1]} += {f[2], f[3]} * x[0]
++    //   {q[2], q[3]} += {f[4], f[5]} * x[0]
++    //   q[4] += f[6] * x[0]
++  block_comment("unrolled and vectorized computation of q[0]..q[jk]"); {
++      li(SCR2, 1);
++      slli_d(tmp5, tmp5, 52);                  // now it's 2^q0 double value
++      slli_d(tmp4, tmp4, 52);                  // now it's 2^-q0 double value
++      if (UseLASX)
++        xvpermi_d(v6, v6, 0);
++      else
++        vreplvei_d(v6, v6, 0);
++      blt(jx, SCR2, JX_IS_0);
++      addi_d(i, SP, 8);
++      if (UseLASX) {
++        xvld(v26, i, 0);                       // load f[1..4]
++        xvpermi_d(v3, v3, 0);
++        xvpermi_d(v7, v7, 0);
++        xvpermi_d(v20, v19, 85);
++        xvpermi_d(v21, v19, 170);
++      } else {
++        vld(v26, i, 0);                        // load f[1..4]
++        vld(v27, i, 16);
++        vreplvei_d(v3, v3, 0);
++        vreplvei_d(v7, v7, 0);
++        vreplvei_d(vt, v20, 1);
++        vreplvei_d(v21, v21, 0);
++      }
++      blt(SCR2, jx, JX_IS_2);
++      // jx == 1
++      if (UseLASX) {
++        xvfmul_d(v28, v18, v7);                // f[0,3] * x[1]
++        fmul_d(v30, v19, v7);                  // f[4] * x[1]
++        xvfmadd_d(v28, v26, v6, v28);
++        fmadd_d(v30, v6, v20, v30);            // v30 += f[5] * x[0]
++      } else {
++        vfmul_d(v28, v18, v7);                 // f[0,1] * x[1]
++        vfmul_d(v29, v19, v7);                 // f[2,3] * x[1]
++        fmul_d(v30, v20, v7);                  // f[4] * x[1]
++        vfmadd_d(v28, v26, v6, v28);
++        vfmadd_d(v29, v27, v6, v29);
++        fmadd_d(v30, v6, vt, v30);             // v30 += f[5] * x[0]
++      }
++      b(Q_DONE);
++    bind(JX_IS_2);
++      if (UseLASX) {
++        xvfmul_d(v28, v18, v3);                // f[0,3] * x[2]
++        fmul_d(v30, v19, v3);                  // f[4] * x[2]
++        xvfmadd_d(v28, v26, v7, v28);
++        fmadd_d(v30, v7, v20, v30);            // v30 += f[5] * x[1]
++        xvpermi_q(v18, v19, 3);
++        xvfmadd_d(v28, v18, v6, v28);
++      } else {
++        vfmul_d(v28, v18, v3);                 // f[0,1] * x[2]
++        vfmul_d(v29, v19, v3);                 // f[2,3] * x[2]
++        fmul_d(v30, v20, v3);                  // f[4] * x[2]
++        vfmadd_d(v28, v26, v7, v28);
++        vfmadd_d(v29, v27, v7, v29);
++        fmadd_d(v30, v7, vt, v30);             // v30 += f[5] * x[1]
++        vfmadd_d(v28, v19, v6, v28);
++        vfmadd_d(v29, v20, v6, v29);
++      }
++      fmadd_d(v30, v6, v21, v30);              // v30 += f[6] * x[0]
++      b(Q_DONE);
++    bind(JX_IS_0);
++      if (UseLASX) {
++        xvfmul_d(v28, v18, v6);                // f[0,1] * x[0]
++        fmul_d(v30, v19, v6);                  // f[4] * x[0]
++      } else {
++        vfmul_d(v28, v18, v6);                 // f[0,1] * x[0]
++        vfmul_d(v29, v19, v6);                 // f[2,3] * x[0]
++        fmul_d(v30, v20, v6);                  // f[4] * x[0]
++      }
++    bind(Q_DONE);
++      if (UseLASX) {
++        xvst(v28, qBase, 0);                   // save calculated q[0]...q[jk]
++      } else {
++        vst(v28, qBase, 0);                    // save calculated q[0]...q[jk]
++        vst(v29, qBase, 16);
++      }
++      fst_d(v30, qBase, 32);
++  }
++  li(i, 0x3E70000000000000);
++  li(jz, 4);
++  movgr2fr_d(v17, i);                          // v17 = twon24
++  movgr2fr_d(v30, tmp5);                       // 2^q0
++  vldi(v21, -960);                             // 0.125 (0x3fc0000000000000)
++  vldi(v20, -992);                             // 8.0   (0x4020000000000000)
++  movgr2fr_d(v22, tmp4);                       // 2^-q0
++
++  block_comment("recompute loop"); {
++    bind(RECOMPUTE);
++      //  for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
++      //    fw    =  (double)((int)(twon24* z));
++      //    iq[i] =  (int)(z-two24A*fw);
++      //    z     =  q[j-1]+fw;
++      //  }
++      block_comment("distill q[] into iq[] reversingly"); {
++          xorr(i, i, i);
++          move(j, jz);
++          alsl_d(tmp2, jz, qBase, 3 - 1);                  // q[jz] address
++          fld_d(v18, tmp2, 0);                             // z = q[j] and moving address to q[j-1]
++          addi_d(tmp2, tmp2, -8);
++        bind(RECOMP_FIRST_FOR);
++          fld_d(v27, tmp2, 0);
++          addi_d(tmp2, tmp2, -8);
++          fmul_d(v29, v17, v18);                           // twon24*z
++          vfrintrz_d(v29, v29);                            // (double)(int)
++          fnmsub_d(v28, v24, v29, v18);                    // v28 = z-two24A*fw
++          ftintrz_w_d(vt, v28);                            // (int)(z-two24A*fw)
++          alsl_d(SCR2, i, iqBase, 2 - 1);
++          fst_s(vt, SCR2, 0);
++          fadd_d(v18, v27, v29);
++          addi_w(i, i, 1);
++          addi_w(j, j, -1);
++          blt(R0, j, RECOMP_FIRST_FOR);
++      }
++      // compute n
++      fmul_d(v18, v18, v30);
++      fmul_d(v2, v18, v21);
++      vfrintrm_d(v2, v2);                                  // v2 = floor(v2) == rounding towards -inf
++      fnmsub_d(v18, v2, v20, v18);                         // z -= 8.0*floor(z*0.125);
++      li(ih, 2);
++      vfrintrz_d(v2, v18);                                 // v2 = (double)((int)z)
++      ftintrz_w_d(vt, v18);                                // n  = (int) z;
++      movfr2gr_s(n, vt);
++      fsub_d(v18, v18, v2);                                // z -= (double)n;
++
++      block_comment("q0-dependent initialization"); {
++          blt(SCR1, R0, Q0_ZERO_CMP_LT);                   // if (q0 > 0)
++          addi_w(j, jz, -1);                               // j = jz - 1
++          alsl_d(SCR2, j, iqBase, 2 - 1);
++          ld_w(tmp2, SCR2, 0);                             // tmp2 = iq[jz-1]
++          beq(SCR1, R0, Q0_ZERO_CMP_EQ);
++          li(tmp4, 24);
++          sub_w(tmp4, tmp4, SCR1);                         // == 24 - q0
++          srl_w(i, tmp2, tmp4);                            // i = iq[jz-1] >> (24-q0)
++          sll_w(tmp5, i, tmp4);
++          sub_w(tmp2, tmp2, tmp5);                         // iq[jz-1] -= i<<(24-q0);
++          alsl_d(SCR2, j, iqBase, 2 - 1);
++          st_w(tmp2, SCR2, 0);                             // store iq[jz-1]
++          addi_w(SCR2, tmp4, -1);                          // == 23 - q0
++          add_w(n, n, i);                                  // n+=i
++          srl_w(ih, tmp2, SCR2);                           // ih = iq[jz-1] >> (23-q0)
++          b(Q0_ZERO_CMP_DONE);
++        bind(Q0_ZERO_CMP_EQ);
++          srli_d(ih, tmp2, 23);                            // ih = iq[z-1] >> 23
++          b(Q0_ZERO_CMP_DONE);
++        bind(Q0_ZERO_CMP_LT);
++          vldi(v4, -928);                                  // 0.5 (0x3fe0000000000000)
++          fcmp_clt_d(FCC0, v18, v4);
++          movcf2gr(SCR2, FCC0);
++          masknez(ih, ih, SCR2);                           // if (z<0.5) ih = 0
++      }
++    bind(Q0_ZERO_CMP_DONE);
++      bge(R0, ih, IH_HANDLED);
++
++    block_comment("if(ih>) {"); {
++      // use rscratch2 as carry
++
++      block_comment("for(i=0;i<jz ;i++) {...}"); {
++          addi_w(n, n, 1);
++          xorr(i, i, i);
++          xorr(SCR2, SCR2, SCR2);
++        bind(IH_FOR);
++          alsl_d(j, i, iqBase, 2 - 1);
++          ld_w(j, j, 0);                                   // j = iq[i]
++          li(tmp3, 0x1000000);
++          sub_w(tmp3, tmp3, SCR2);
++          bnez(SCR2, IH_FOR_STORE);
++          beqz(j, IH_FOR_INCREMENT);
++          li(SCR2, 1);
++        bind(IH_FOR_STORE);
++          sub_w(tmp3, tmp3, j);
++          alsl_d(tmp1, i, iqBase, 2 - 1);
++          st_w(tmp3, tmp1, 0);                             // iq[i] = 0xffffff - j
++        bind(IH_FOR_INCREMENT);
++          addi_w(i, i, 1);
++          blt(i, jz, IH_FOR);
++      }
++
++      block_comment("if(q0>0) {"); {
++        bge(R0, SCR1, IH_AFTER_SWITCH);
++        // tmp3 still has iq[jz-1] value. no need to reload
++        // now, zero high tmp3 bits (rscratch1 number of bits)
++        li(j, 0xffffffff);
++        addi_w(i, jz, -1);                                 // set i to jz-1
++        srl_d(j, j, SCR1);
++        srli_w(tmp1, j, 8);
++        andr(tmp3, tmp3, tmp1);                            // we have 24-bit-based constants
++        alsl_d(tmp1, i, iqBase, 2 - 1);
++        st_w(tmp3, tmp1, 0);                               // save iq[jz-1]
++      }
++      bind(IH_AFTER_SWITCH);
++        li(tmp1, 2);
++        bne(ih, tmp1, IH_HANDLED);
++
++        block_comment("if(ih==2) {"); {
++          vldi(v25, -912);                                 // 1.0 (0x3ff0000000000000)
++          fsub_d(v18, v25, v18);                           // z = one - z;
++          beqz(SCR2, IH_HANDLED);
++          fsub_d(v18, v18, v30);                           // z -= scalbnA(one,q0);
++        }
++    }
++    bind(IH_HANDLED);
++      // check if recomputation is needed
++      vxor_v(vt, vt, vt);
++      fcmp_cne_d(FCC0, v18, vt);
++      bcnez(FCC0, RECOMP_CHECK_DONE_NOT_ZERO);
++
++      block_comment("if(z==zeroB) {"); {
++
++        block_comment("for (i=jz-1;i>=jk;i--) j |= iq[i];"); {
++            addi_w(i, jz, -1);
++            xorr(j, j, j);
++            b(RECOMP_FOR1_CHECK);
++          bind(RECOMP_FOR1);
++            alsl_d(tmp1, i, iqBase, 2 - 1);
++            ld_w(tmp1, tmp1, 0);
++            orr(j, j, tmp1);
++            addi_w(i, i, -1);
++          bind(RECOMP_FOR1_CHECK);
++            li(SCR2, 4);
++            bge(i, SCR2, RECOMP_FOR1);
++        }
++        bnez(j, RECOMP_CHECK_DONE);
++
++        block_comment("if(j==0) {"); {
++            // for(k=1;iq[jk-k]==0;k++); // let's unroll it. jk == 4. So, read
++            // iq[3], iq[2], iq[1], iq[0] until non-zero value
++            ld_d(tmp1, iqBase, 0);                 // iq[0..3]
++            ld_d(tmp3, iqBase, 8);
++            li(j, 2);
++            masknez(tmp1, tmp1, tmp3);             // set register for further consideration
++            orr(tmp1, tmp1, tmp3);
++            masknez(j, j, tmp3);                   // set initial k. Use j as k
++            srli_d(SCR2, tmp1, 32);
++            sltu(SCR2, R0, SCR2);
++            addi_w(i, jz, 1);
++            add_w(j, j, SCR2);
++
++          block_comment("for(i=jz+1;i<=jz+k;i++) {...}"); {
++              add_w(jz, i, j); // i = jz+1, j = k-1. j+i = jz+k (which is a new jz)
++            bind(RECOMP_FOR2);
++              add_w(tmp1, jv, i);
++              alsl_d(SCR2, tmp1, twoOverPiBase, 3 - 1);
++              fld_d(v29, SCR2, 0);
++              add_w(tmp2, jx, i);
++              alsl_d(SCR2, tmp2, SP, 3 - 1);
++              fst_d(v29, SCR2, 0);
++              // f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i];
++              // since jx = 0, 1 or 2 we can unroll it:
++              // for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
++              // f[jx+i-j] == (for first iteration) f[jx+i], which is already v29
++              alsl_d(tmp2, tmp2, SP, 3 - 1);     // address of f[jx+i]
++              fld_d(v4, tmp2, -16);              // load f[jx+i-2] and f[jx+i-1]
++              fld_d(v5, tmp2, -8);
++              fmul_d(v26, v6, v29); // initial fw
++              beqz(jx, RECOMP_FW_UPDATED);
++              fmadd_d(v26, v7, v5, v26);
++              li(SCR2, 1);
++              beq(jx, SCR2, RECOMP_FW_UPDATED);
++              fmadd_d(v26, v3, v4, v26);
++            bind(RECOMP_FW_UPDATED);
++              alsl_d(SCR2, i, qBase, 3 - 1);
++              fst_d(v26, SCR2, 0);               // q[i] = fw;
++              addi_w(i, i, 1);
++              bge(jz, i, RECOMP_FOR2);           // jz here is "old jz" + k
++          }
++            b(RECOMPUTE);
++        }
++      }
++    }
++    bind(RECOMP_CHECK_DONE);
++      // chop off zero terms
++      vxor_v(vt, vt, vt);
++      fcmp_ceq_d(FCC0, v18, vt);
++      bcnez(FCC0, Z_IS_ZERO);
++
++      block_comment("else block of if(z==0.0) {"); {
++        bind(RECOMP_CHECK_DONE_NOT_ZERO);
++          fmul_d(v18, v18, v22);
++          fcmp_clt_d(FCC0, v18, v24);                        // v24 is stil two24A
++          bcnez(FCC0, Z_IS_LESS_THAN_TWO24B);
++          fmul_d(v1, v18, v17);                              // twon24*z
++          vfrintrz_d(v1, v1);                                // v1 = (double)(int)(v1)
++          fnmsub_d(v2, v24, v1, v18);
++          ftintrz_w_d(vt, v1);                               // (int)fw
++          movfr2gr_s(tmp3, vt);
++          ftintrz_w_d(vt, v2);                               // double to int
++          movfr2gr_s(tmp2, vt);
++          alsl_d(SCR2, jz, iqBase, 2 - 1);
++          st_w(tmp2, SCR2, 0);
++          addi_w(SCR1, SCR1, 24);
++          addi_w(jz, jz, 1);
++          st_w(tmp3, SCR2, 0);                               // iq[jz] = (int) fw
++          b(Z_ZERO_CHECK_DONE);
++        bind(Z_IS_LESS_THAN_TWO24B);
++          ftintrz_w_d(vt, v18);                              // (int)z
++          movfr2gr_s(tmp3, vt);
++          alsl_d(SCR2, jz, iqBase, 2 - 1);
++          st_w(tmp3, SCR2, 0);                               // iq[jz] = (int) z
++          b(Z_ZERO_CHECK_DONE);
++      }
++
++      block_comment("if(z==0.0) {"); {
++        bind(Z_IS_ZERO);
++          addi_w(jz, jz, -1);
++          alsl_d(SCR2, jz, iqBase, 2 - 1);
++          ld_w(tmp1, SCR2, 0);
++          addi_w(SCR1, SCR1, -24);
++          beqz(tmp1, Z_IS_ZERO);
++      }
++      bind(Z_ZERO_CHECK_DONE);
++        // convert integer "bit" chunk to floating-point value
++        // v17 = twon24
++        // update v30, which was scalbnA(1.0, <old q0>);
++        addi_w(tmp2, SCR1, 1023); // biased exponent
++        slli_d(tmp2, tmp2, 52);   // put at correct position
++        move(i, jz);
++        movgr2fr_d(v30, tmp2);
++
++        block_comment("for(i=jz;i>=0;i--) {q[i] = fw*(double)iq[i]; fw*=twon24;}"); {
++          bind(CONVERTION_FOR);
++            alsl_d(SCR2, i, iqBase, 2 - 1);
++            fld_s(v31, SCR2, 0);
++            vffintl_d_w(v31, v31);
++            fmul_d(v31, v31, v30);
++            alsl_d(SCR2, i, qBase, 3 - 1);
++            fst_d(v31, SCR2, 0);
++            fmul_d(v30, v30, v17);
++            addi_w(i, i, -1);
++            bge(i, R0, CONVERTION_FOR);
++        }
++        addi_d(SCR2, SP, 160);   // base for fq
++        // reusing twoOverPiBase
++        li(twoOverPiBase, pio2);
++
++      block_comment("compute PIo2[0,...,jp]*q[jz,...,0]. for(i=jz;i>=0;i--) {...}"); {
++          move(i, jz);
++          move(tmp2, R0); // tmp2 will keep jz - i == 0 at start
++        bind(COMP_FOR);
++          // for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
++          vxor_v(v30, v30, v30);
++          alsl_d(tmp5, i, qBase, 3 - 1); // address of q[i+k] for k==0
++          li(tmp3, 4);
++          slti(tmp4, tmp2, 5);
++          alsl_d(tmp1, i, qBase, 3 - 1); // used as q[i] address
++          masknez(tmp3, tmp3, tmp4);     // min(jz - i, jp);
++          maskeqz(tmp4, tmp2, tmp4);
++          orr(tmp3, tmp3, tmp4);
++          move(tmp4, R0);                // used as k
++
++          block_comment("for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];"); {
++            bind(COMP_INNER_LOOP);
++              alsl_d(tmp5, tmp4, tmp1, 3 - 1);
++              fld_d(v18, tmp5, 0);                                      // q[i+k]
++              alsl_d(tmp5, tmp4, twoOverPiBase, 3 - 1);
++              fld_d(v19, tmp5, 0);                                      // PIo2[k]
++              fmadd_d(v30, v18, v19, v30);                              // fw += PIo2[k]*q[i+k];
++              addi_w(tmp4, tmp4, 1);                                    // k++
++              bge(tmp3, tmp4, COMP_INNER_LOOP);
++          }
++          alsl_d(tmp5, tmp2, SCR2, 3 - 1);
++          fst_d(v30, tmp5, 0);                                          // fq[jz-i]
++          addi_d(tmp2, tmp2, 1);
++          addi_w(i, i, -1);
++          bge(i, R0, COMP_FOR);
++      }
++
++      block_comment("switch(prec) {...}. case 2:"); {
++        // compress fq into y[]
++        // remember prec == 2
++
++        block_comment("for (i=jz;i>=0;i--) fw += fq[i];"); {
++            vxor_v(v4, v4, v4);
++            move(i, jz);
++          bind(FW_FOR1);
++            alsl_d(tmp5, i, SCR2, 3 - 1);
++            fld_d(v1, tmp5, 0);
++            addi_w(i, i, -1);
++            fadd_d(v4, v4, v1);
++            bge(i, R0, FW_FOR1);
++        }
++        bind(FW_FOR1_DONE);
++          // v1 contains fq[0]. so, keep it so far
++          fsub_d(v5, v1, v4); // fw = fq[0] - fw
++          beqz(ih, FW_Y0_NO_NEGATION);
++          fneg_d(v4, v4);
++        bind(FW_Y0_NO_NEGATION);
++
++        block_comment("for (i=1;i<=jz;i++) fw += fq[i];"); {
++            li(i, 1);
++            blt(jz, i, FW_FOR2_DONE);
++          bind(FW_FOR2);
++            alsl_d(tmp5, i, SCR2, 3 - 1);
++            fld_d(v1, tmp5, 0);
++            addi_w(i, i, 1);
++            fadd_d(v5, v5, v1);
++            bge(jz, i, FW_FOR2);
++        }
++        bind(FW_FOR2_DONE);
++          beqz(ih, FW_Y1_NO_NEGATION);
++          fneg_d(v5, v5);
++        bind(FW_Y1_NO_NEGATION);
++          addi_d(SP, SP, 560);
++      }
++}
++
++///* __kernel_sin( x, y, iy)
++// * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
++// * Input x is assumed to be bounded by ~pi/4 in magnitude.
++// * Input y is the tail of x.
++// * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
++// *
++// * Algorithm
++// *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
++// *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
++// *      3. sin(x) is approximated by a polynomial of degree 13 on
++// *         [0,pi/4]
++// *                               3            13
++// *              sin(x) ~ x + S1*x + ... + S6*x
++// *         where
++// *
++// *      |sin(x)         2     4     6     8     10     12  |     -58
++// *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
++// *      |  x                                               |
++// *
++// *      4. sin(x+y) = sin(x) + sin'(x')*y
++// *                  ~ sin(x) + (1-x*x/2)*y
++// *         For better accuracy, let
++// *                   3      2      2      2      2
++// *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
++// *         then                   3    2
++// *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
++// */
++//static const double
++//S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
++//S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
++//S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
++//S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
++//S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
++//S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
++//
++// NOTE: S1..S6 were moved into a table: StubRoutines::la::_dsin_coef
++//
++// BEGIN __kernel_sin PSEUDO CODE
++//
++//static double __kernel_sin(double x, double y, bool iy)
++//{
++//        double z,r,v;
++//
++//        // NOTE: not needed. moved to dsin/dcos
++//        //int ix;
++//        //ix = high(x)&0x7fffffff;                /* high word of x */
++//
++//        // NOTE: moved to dsin/dcos
++//        //if(ix<0x3e400000)                       /* |x| < 2**-27 */
++//        //   {if((int)x==0) return x;}            /* generate inexact */
++//
++//        z       =  x*x;
++//        v       =  z*x;
++//        r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
++//        if(iy==0) return x+v*(S1+z*r);
++//        else      return x-((z*(half*y-v*r)-y)-v*S1);
++//}
++//
++// END __kernel_sin PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos
++//     2. Constants are now loaded from table dsin_coef
++//     3. C code parameter "int iy" was modified to "bool iyIsOne", because
++//         iy is always 0 or 1. Also, iyIsOne branch was moved into
++//         generation phase instead of taking it during code execution
++// Input ans output:
++//     1. Input for generated function: X argument = x
++//     2. Input for generator: x = register to read argument from, iyIsOne
++//         = flag to use low argument low part or not, dsin_coef = coefficients
++//         table address
++//     3. Return sin(x) value in FA0
++void MacroAssembler::generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef) {
++  FloatRegister y = FA5, z = FA6, v = FA7, r = FT0, s1 = FT1, s2 = FT2,
++                s3 = FT3, s4 = FT4, s5 = FT5, s6 = FT6, half = FT7;
++  li(SCR2, dsin_coef);
++  fld_d(s5, SCR2, 32);
++  fld_d(s6, SCR2, 40);
++  fmul_d(z, x, x); // z =  x*x;
++  fld_d(s1, SCR2, 0);
++  fld_d(s2, SCR2, 8);
++  fld_d(s3, SCR2, 16);
++  fld_d(s4, SCR2, 24);
++  fmul_d(v, z, x); // v =  z*x;
++
++  block_comment("calculate r =  S2+z*(S3+z*(S4+z*(S5+z*S6)))"); {
++    fmadd_d(r, z, s6, s5);
++    // initialize "half" in current block to utilize 2nd FPU. However, it's
++    // not a part of this block
++    vldi(half, -928);       // 0.5 (0x3fe0000000000000)
++    fmadd_d(r, z, r, s4);
++    fmadd_d(r, z, r, s3);
++    fmadd_d(r, z, r, s2);
++  }
++
++  if (!iyIsOne) {
++    // return x+v*(S1+z*r);
++    fmadd_d(s1, z, r, s1);
++    fmadd_d(FA0, v, s1, x);
++  } else {
++    // return x-((z*(half*y-v*r)-y)-v*S1);
++    fmul_d(s6, half, y);    // half*y
++    fnmsub_d(s6, v, r, s6); // half*y-v*r
++    fnmsub_d(s6, z, s6, y); // y - z*(half*y-v*r) = - (z*(half*y-v*r)-y)
++    fmadd_d(s6, v, s1, s6); // - (z*(half*y-v*r)-y) + v*S1 == -((z*(half*y-v*r)-y)-v*S1)
++    fadd_d(FA0, x, s6);
++  }
++}
++
++///*
++// * __kernel_cos( x,  y )
++// * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
++// * Input x is assumed to be bounded by ~pi/4 in magnitude.
++// * Input y is the tail of x.
++// *
++// * Algorithm
++// *      1. Since cos(-x) = cos(x), we need only to consider positive x.
++// *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
++// *      3. cos(x) is approximated by a polynomial of degree 14 on
++// *         [0,pi/4]
++// *                                       4            14
++// *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
++// *         where the remez error is
++// *
++// *      |              2     4     6     8     10    12     14 |     -58
++// *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
++// *      |                                                      |
++// *
++// *                     4     6     8     10    12     14
++// *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
++// *             cos(x) = 1 - x*x/2 + r
++// *         since cos(x+y) ~ cos(x) - sin(x)*y
++// *                        ~ cos(x) - x*y,
++// *         a correction term is necessary in cos(x) and hence
++// *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
++// *         For better accuracy when x > 0.3, let qx = |x|/4 with
++// *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
++// *         Then
++// *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
++// *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
++// *         magnitude of the latter is at least a quarter of x*x/2,
++// *         thus, reducing the rounding error in the subtraction.
++// */
++//
++//static const double
++//C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
++//C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
++//C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
++//C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
++//C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
++//C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
++//
++// NOTE: C1..C6 were moved into a table: StubRoutines::la::_dcos_coef
++//
++// BEGIN __kernel_cos PSEUDO CODE
++//
++//static double __kernel_cos(double x, double y)
++//{
++//  double a,h,z,r,qx=0;
++//
++//  // NOTE: ix is already initialized in dsin/dcos. Reuse value from register
++//  //int ix;
++//  //ix = high(x)&0x7fffffff;              /* ix = |x|'s high word*/
++//
++//  // NOTE: moved to dsin/dcos
++//  //if(ix<0x3e400000) {                   /* if x < 2**27 */
++//  //  if(((int)x)==0) return one;         /* generate inexact */
++//  //}
++//
++//  z  = x*x;
++//  r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
++//  if(ix < 0x3FD33333)                   /* if |x| < 0.3 */
++//    return one - (0.5*z - (z*r - x*y));
++//  else {
++//    if(ix > 0x3fe90000) {               /* x > 0.78125 */
++//      qx = 0.28125;
++//    } else {
++//      set_high(&qx, ix-0x00200000); /* x/4 */
++//      set_low(&qx, 0);
++//    }
++//    h = 0.5*z-qx;
++//    a = one-qx;
++//    return a - (h - (z*r-x*y));
++//  }
++//}
++//
++// END __kernel_cos PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos
++//     2. Constants are now loaded from table dcos_coef
++// Input and output:
++//     1. Input for generated function: X argument = x
++//     2. Input for generator: x = register to read argument from, dcos_coef
++//        = coefficients table address
++//     3. Return cos(x) value in FA0
++void MacroAssembler::generate_kernel_cos(FloatRegister x, address dcos_coef) {
++  Register ix = A3;
++  FloatRegister qx = FA1, h = FA2, a = FA3, y = FA5, z = FA6, r = FA7, C1 = FT0,
++        C2 = FT1, C3 = FT2, C4 = FT3, C5 = FT4, C6 = FT5, one = FT6, half = FT7;
++  Label IX_IS_LARGE, SET_QX_CONST, DONE, QX_SET;
++    li(SCR2, dcos_coef);
++    fld_d(C1, SCR2, 0);
++    fld_d(C2, SCR2, 8);
++    fld_d(C3, SCR2, 16);
++    fld_d(C4, SCR2, 24);
++    fld_d(C5, SCR2, 32);
++    fld_d(C6, SCR2, 40);
++    fmul_d(z, x, x);                               // z=x^2
++    block_comment("calculate r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))))"); {
++      fmadd_d(r, z, C6, C5);
++      vldi(half, -928);                            // 0.5 (0x3fe0000000000000)
++      fmadd_d(r, z, r, C4);
++      fmul_d(y, x, y);
++      fmadd_d(r, z, r, C3);
++      li(SCR1, 0x3FD33333);
++      fmadd_d(r, z, r, C2);
++      fmul_d(x, z, z);                             // x = z^2
++      fmadd_d(r, z, r, C1);                        // r = C1+z(C2+z(C4+z(C5+z*C6)))
++    }
++    // need to multiply r by z to have "final" r value
++    vldi(one, -912);                               // 1.0 (0x3ff0000000000000)
++    bge(ix, SCR1, IX_IS_LARGE);
++    block_comment("if(ix < 0x3FD33333) return one - (0.5*z - (z*r - x*y))"); {
++      // return 1.0 - (0.5*z - (z*r - x*y)) = 1.0 - (0.5*z + (x*y - z*r))
++      fnmsub_d(FA0, x, r, y);
++      fmadd_d(FA0, half, z, FA0);
++      fsub_d(FA0, one, FA0);
++      b(DONE);
++    }
++  block_comment("if(ix >= 0x3FD33333)"); {
++    bind(IX_IS_LARGE);
++      li(SCR2, 0x3FE90000);
++      blt(SCR2, ix, SET_QX_CONST);
++      block_comment("set_high(&qx, ix-0x00200000); set_low(&qx, 0);"); {
++        li(SCR2, 0x00200000);
++        sub_w(SCR2, ix, SCR2);
++        slli_d(SCR2, SCR2, 32);
++        movgr2fr_d(qx, SCR2);
++      }
++      b(QX_SET);
++    bind(SET_QX_CONST);
++      block_comment("if(ix > 0x3fe90000) qx = 0.28125;"); {
++        vldi(qx, -942);         // 0.28125 (0x3fd2000000000000)
++      }
++    bind(QX_SET);
++      fmsub_d(C6, x, r, y);     // z*r - xy
++      fmsub_d(h, half, z, qx);  // h = 0.5*z - qx
++      fsub_d(a, one, qx);       // a = 1-qx
++      fsub_d(C6, h, C6);        // = h - (z*r - x*y)
++      fsub_d(FA0, a, C6);
++  }
++  bind(DONE);
++}
++
++// generate_dsin_dcos creates stub for dsin and dcos
++// Generation is done via single call because dsin and dcos code is almost the
++// same(see C code below). These functions work as follows:
++// 1) handle corner cases: |x| ~< pi/4, x is NaN or INF, |x| < 2**-27
++// 2) perform argument reduction if required
++// 3) call kernel_sin or kernel_cos which approximate sin/cos via polynomial
++//
++// BEGIN dsin/dcos PSEUDO CODE
++//
++//dsin_dcos(jdouble x, bool isCos) {
++//  double y[2],z=0.0;
++//  int n, ix;
++//
++//  /* High word of x. */
++//  ix = high(x);
++//
++//  /* |x| ~< pi/4 */
++//  ix &= 0x7fffffff;
++//  if(ix <= 0x3fe921fb) return isCos ? __kernel_cos : __kernel_sin(x,z,0);
++//
++//  /* sin/cos(Inf or NaN) is NaN */
++//  else if (ix>=0x7ff00000) return x-x;
++//  else if (ix<0x3e400000) {                   /* if ix < 2**27 */
++//    if(((int)x)==0) return isCos ? one : x;         /* generate inexact */
++//  }
++//  /* argument reduction needed */
++//  else {
++//    n = __ieee754_rem_pio2(x,y);
++//    switch(n&3) {
++//    case 0: return isCos ?  __kernel_cos(y[0],y[1])      :  __kernel_sin(y[0],y[1], true);
++//    case 1: return isCos ? -__kernel_sin(y[0],y[1],true) :  __kernel_cos(y[0],y[1]);
++//    case 2: return isCos ? -__kernel_cos(y[0],y[1])      : -__kernel_sin(y[0],y[1], true);
++//    default:
++//      return isCos ? __kernel_sin(y[0],y[1],1) : -__kernel_cos(y[0],y[1]);
++//    }
++//  }
++//}
++// END dsin/dcos PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. Moved ix < 2**27 from kernel_sin/kernel_cos into dsin/dcos
++//     2. Final switch use equivalent bit checks(tbz/tbnz)
++// Input ans output:
++//     1. Input for generated function: X = A0
++//     2. Input for generator: isCos = generate sin or cos, npio2_hw = address
++//         of npio2_hw table, two_over_pi = address of two_over_pi table,
++//         pio2 = address if pio2 table, dsin_coef = address if dsin_coef table,
++//         dcos_coef = address of dcos_coef table
++//     3. Return result in FA0
++// NOTE: general purpose register names match local variable names in C code
++void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw,
++                                        address two_over_pi, address pio2,
++                                        address dsin_coef, address dcos_coef) {
++  Label DONE, ARG_REDUCTION, TINY_X, RETURN_SIN, EARLY_CASE;
++  Register X = A0, absX = A1, n = A2, ix = A3;
++  FloatRegister y0 = FA4, y1 = FA5;
++
++    block_comment("check |x| ~< pi/4, NaN, Inf and |x| < 2**-27 cases"); {
++      movfr2gr_d(X, FA0);
++      li(SCR2, 0x3e400000);
++      li(SCR1, 0x3fe921fb);                          // high word of pi/4.
++      bstrpick_d(absX, X, 62, 0);                    // absX
++      li(T0, 0x7ff0000000000000);
++      srli_d(ix, absX, 32);                          // set ix
++      blt(ix, SCR2, TINY_X);                         // handle tiny x (|x| < 2^-27)
++      bge(SCR1, ix, EARLY_CASE);                     // if(ix <= 0x3fe921fb) return
++      blt(absX, T0, ARG_REDUCTION);
++      // X is NaN or INF(i.e. 0x7FF* or 0xFFF*). Return NaN (mantissa != 0).
++      // Set last bit unconditionally to make it NaN
++      ori(T0, T0, 1);
++      movgr2fr_d(FA0, T0);
++      jr(RA);
++    }
++  block_comment("kernel_sin/kernel_cos: if(ix<0x3e400000) {<fast return>}"); {
++    bind(TINY_X);
++      if (isCos) {
++        vldi(FA0, -912);                             // 1.0 (0x3ff0000000000000)
++      }
++      jr(RA);
++  }
++  bind(ARG_REDUCTION); /* argument reduction needed */
++    block_comment("n = __ieee754_rem_pio2(x,y);"); {
++      generate__ieee754_rem_pio2(npio2_hw, two_over_pi, pio2);
++    }
++    block_comment("switch(n&3) {case ... }"); {
++      if (isCos) {
++        srli_w(T0, n, 1);
++        xorr(absX, n, T0);
++        andi(T0, n, 1);
++        bnez(T0, RETURN_SIN);
++      } else {
++        andi(T0, n, 1);
++        beqz(T0, RETURN_SIN);
++      }
++      generate_kernel_cos(y0, dcos_coef);
++      if (isCos) {
++        andi(T0, absX, 1);
++        beqz(T0, DONE);
++      } else {
++        andi(T0, n, 2);
++        beqz(T0, DONE);
++      }
++      fneg_d(FA0, FA0);
++      jr(RA);
++    bind(RETURN_SIN);
++      generate_kernel_sin(y0, true, dsin_coef);
++      if (isCos) {
++        andi(T0, absX, 1);
++        beqz(T0, DONE);
++      } else {
++        andi(T0, n, 2);
++        beqz(T0, DONE);
++      }
++      fneg_d(FA0, FA0);
++      jr(RA);
++    }
++  bind(EARLY_CASE);
++    vxor_v(y1, y1, y1);
++    if (isCos) {
++      generate_kernel_cos(FA0, dcos_coef);
++    } else {
++      generate_kernel_sin(FA0, false, dsin_coef);
++    }
++  bind(DONE);
++    jr(RA);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,564 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "classfile/javaClasses.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "utilities/preserveException.hpp"
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T8 RT8
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#define STOP(error) block_comment(error); __ stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
++  if (VerifyMethodHandles)
++    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
++                 "MH argument is a Class");
++  __ ld_d(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
++}
++
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++  assert(x != 0, "%s should be nonzero", xname);
++  return x;
++}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //ASSERT
++
++#ifdef ASSERT
++void MethodHandles::verify_klass(MacroAssembler* _masm,
++                                 Register obj, SystemDictionary::WKID klass_id,
++                                 const char* error_message) {
++}
++
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
++  Label L;
++  BLOCK_COMMENT("verify_ref_kind {");
++  __ ld_w(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
++  __ srai_w(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
++  __ li(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
++  __ andr(temp, temp, AT);
++  __ li(AT, ref_kind);
++  __ beq(temp, AT, L);
++  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
++    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
++    if (ref_kind == JVM_REF_invokeVirtual ||
++        ref_kind == JVM_REF_invokeSpecial)
++      // could do this for all ref_kinds, but would explode assembly code size
++      trace_method_handle(_masm, buf);
++    __ STOP(buf);
++  }
++  BLOCK_COMMENT("} verify_ref_kind");
++  __ bind(L);
++}
++
++#endif //ASSERT
++
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                            bool for_compiler_entry) {
++  assert(method == Rmethod, "interpreter calling convention");
++
++  Label L_no_such_method;
++  __ beq(method, R0, L_no_such_method);
++
++  __ verify_method_ptr(method);
++
++  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++    Register rthread = TREG;
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    __ ld_bu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset()));
++    __ beq(AT, R0, run_compiled_code);
++    __ ld_d(T4, method, in_bytes(Method::interpreter_entry_offset()));
++    __ jr(T4);
++    __ BIND(run_compiled_code);
++  }
++
++  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
++                                                     Method::from_interpreted_offset();
++  __ ld_d(T4, method, in_bytes(entry_offset));
++  __ jr(T4);
++
++  __ bind(L_no_such_method);
++  address wrong_method = StubRoutines::throw_AbstractMethodError_entry();
++  __ jmp(wrong_method, relocInfo::runtime_call_type);
++}
++
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++                                        Register recv, Register method_temp,
++                                        Register temp2,
++                                        bool for_compiler_entry) {
++  BLOCK_COMMENT("jump_to_lambda_form {");
++  // This is the initial entry point of a lazy method handle.
++  // After type checking, it picks up the invoker from the LambdaForm.
++  assert_different_registers(recv, method_temp, temp2);
++  assert(recv != noreg, "required register");
++  assert(method_temp == Rmethod, "required register for loading method");
++
++  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
++
++  // Load the invoker, as MH -> MH.form -> LF.vmentry
++  __ verify_oop(recv);
++  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())));
++  __ verify_oop(method_temp);
++  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg);
++
++  if (VerifyMethodHandles && !for_compiler_entry) {
++    // make sure recv is already on stack
++    __ ld_d(temp2, Address(method_temp, Method::const_offset()));
++    __ load_sized_value(temp2,
++                        Address(temp2, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    Label L;
++    Address recv_addr = __ argument_address(temp2, -1);
++    __ ld_d(AT, recv_addr);
++    __ beq(recv, AT, L);
++
++    recv_addr = __ argument_address(temp2, -1);
++    __ ld_d(V0, recv_addr);
++    __ STOP("receiver not on stack");
++    __ BIND(L);
++  }
++
++  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
++  BLOCK_COMMENT("} jump_to_lambda_form");
++}
++
++
++// Code generation
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++                                                                vmIntrinsics::ID iid) {
++  const bool not_for_compiler_entry = false;  // this is the interpreter entry
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  if (iid == vmIntrinsics::_invokeGeneric ||
++      iid == vmIntrinsics::_compiledLambdaForm) {
++    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++    // They all allow an appendix argument.
++    __ stop("empty stubs make SG sick");
++    return NULL;
++  }
++
++  // Rmethod: Method*
++  // T4: argument locator (parameter slot count, added to sp)
++  // S7: used as temp to hold mh or receiver
++  Register t4_argp   = T4;   // argument list ptr, live on error paths
++  Register s7_mh     = S7;   // MH receiver; dies quickly and is recycled
++  Register rm_method = Rmethod;   // eventual target of this invocation
++
++  // here's where control starts out:
++  __ align(CodeEntryAlignment);
++  address entry_point = __ pc();
++
++  if (VerifyMethodHandles) {
++    assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++    Label L;
++    BLOCK_COMMENT("verify_intrinsic_id {");
++    __ ld_hu(AT, rm_method, Method::intrinsic_id_offset_in_bytes());
++    guarantee(Assembler::is_simm(iid, 12), "Oops, iid is not simm12! Change the instructions.");
++    __ addi_d(AT, AT, -1 * (int) iid);
++    __ beq(AT, R0, L);
++    if (iid == vmIntrinsics::_linkToVirtual ||
++        iid == vmIntrinsics::_linkToSpecial) {
++      // could do this for all kinds, but would explode assembly code size
++      trace_method_handle(_masm, "bad Method*::intrinsic_id");
++    }
++    __ STOP("bad Method*::intrinsic_id");
++    __ bind(L);
++    BLOCK_COMMENT("} verify_intrinsic_id");
++  }
++
++  // First task:  Find out how big the argument list is.
++  Address t4_first_arg_addr;
++  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++    __ ld_d(t4_argp, Address(rm_method, Method::const_offset()));
++    __ load_sized_value(t4_argp,
++                        Address(t4_argp, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    t4_first_arg_addr = __ argument_address(t4_argp, -1);
++  } else {
++    DEBUG_ONLY(t4_argp = noreg);
++  }
++
++  if (!is_signature_polymorphic_static(iid)) {
++    __ ld_d(s7_mh, t4_first_arg_addr);
++    DEBUG_ONLY(t4_argp = noreg);
++  }
++
++  // t4_first_arg_addr is live!
++
++  trace_method_handle_interpreter_entry(_masm, iid);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry);
++
++  } else {
++    // Adjust argument list by popping the trailing MemberName argument.
++    Register r_recv = noreg;
++    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++      __ ld_d(r_recv = T2, t4_first_arg_addr);
++    }
++    DEBUG_ONLY(t4_argp = noreg);
++    Register rm_member = rm_method;  // MemberName ptr; incoming method ptr is dead now
++    __ pop(rm_member);         // extract last argument
++    generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry);
++  }
++
++  return entry_point;
++}
++
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++                                                    vmIntrinsics::ID iid,
++                                                    Register receiver_reg,
++                                                    Register member_reg,
++                                                    bool for_compiler_entry) {
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  Register rm_method = Rmethod;   // eventual target of this invocation
++  // temps used in this code are not used in *either* compiled or interpreted calling sequences
++  Register j_rarg0 = T0;
++  Register j_rarg1 = A0;
++  Register j_rarg2 = A1;
++  Register j_rarg3 = A2;
++  Register j_rarg4 = A3;
++  Register j_rarg5 = A4;
++
++  Register temp1 = T8;
++  Register temp2 = T4;
++  Register temp3 = T5;
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
++    assert_different_registers(temp1,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp2,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp3,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++  }
++  else {
++    assert_different_registers(temp1, temp2, temp3, saved_last_sp_register());  // don't trash lastSP
++  }
++  assert_different_registers(temp1, temp2, temp3, receiver_reg);
++  assert_different_registers(temp1, temp2, temp3, member_reg);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    // indirect through MH.form.vmentry.vmtarget
++    jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry);
++
++  } else {
++    // The method is a member invoker used by direct method handles.
++    if (VerifyMethodHandles) {
++      // make sure the trailing argument really is a MemberName (caller responsibility)
++      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
++                   "MemberName required for invokeVirtual etc.");
++    }
++
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()));
++    Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes()));
++
++    Register temp1_recv_klass = temp1;
++    if (iid != vmIntrinsics::_linkToStatic) {
++      __ verify_oop(receiver_reg);
++      if (iid == vmIntrinsics::_linkToSpecial) {
++        // Don't actually load the klass; just null-check the receiver.
++        __ null_check(receiver_reg);
++      } else {
++        // load receiver klass itself
++        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      BLOCK_COMMENT("check_receiver {");
++      // The receiver for the MemberName must be in receiver_reg.
++      // Check the receiver against the MemberName.clazz
++      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++        // Did not load it above...
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++        Label L_ok;
++        Register temp2_defc = temp2;
++        __ load_heap_oop(temp2_defc, member_clazz, temp3);
++        load_klass_from_Class(_masm, temp2_defc);
++        __ verify_klass_ptr(temp2_defc);
++        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
++        // If we get here, the type check failed!
++        __ STOP("receiver class disagrees with MemberName.clazz");
++        __ bind(L_ok);
++      }
++      BLOCK_COMMENT("} check_receiver");
++    }
++    if (iid == vmIntrinsics::_linkToSpecial ||
++        iid == vmIntrinsics::_linkToStatic) {
++      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
++    }
++
++    // Live registers at this point:
++    //  member_reg - MemberName that was the trailing argument
++    //  temp1_recv_klass - klass of stacked receiver, if needed
++
++    Label L_incompatible_class_change_error;
++    switch (iid) {
++    case vmIntrinsics::_linkToSpecial:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
++      }
++      __ load_heap_oop(rm_method, member_vmtarget);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToStatic:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
++      }
++      __ load_heap_oop(rm_method, member_vmtarget);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToVirtual:
++    {
++      // same as TemplateTable::invokevirtual,
++      // minus the CP setup and profiling:
++
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
++      }
++
++      // pick out the vtable index from the MemberName, and then we can discard it:
++      Register temp2_index = temp2;
++      __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
++      if (VerifyMethodHandles) {
++        Label L_index_ok;
++        __ blt(R0, temp2_index, L_index_ok);
++        __ STOP("no virtual index");
++        __ BIND(L_index_ok);
++      }
++
++      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
++
++      // get target Method* & entry point
++      __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method);
++      break;
++    }
++
++    case vmIntrinsics::_linkToInterface:
++    {
++      // same as TemplateTable::invokeinterface
++      // (minus the CP setup and profiling, with different argument motion)
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
++      }
++
++      Register temp3_intf = temp3;
++      __ load_heap_oop(temp3_intf, member_clazz);
++      load_klass_from_Class(_masm, temp3_intf);
++      __ verify_klass_ptr(temp3_intf);
++
++      Register rm_index = rm_method;
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg);
++      if (VerifyMethodHandles) {
++        Label L;
++        __ bge(rm_index, R0, L);
++        __ STOP("invalid vtable index for MH.invokeInterface");
++        __ bind(L);
++      }
++
++      // given intf, index, and recv klass, dispatch to the implementation method
++      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
++                                 // note: next two args must be the same:
++                                 rm_index, rm_method,
++                                 temp2,
++                                 L_incompatible_class_change_error);
++      break;
++    }
++
++    default:
++      fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
++      break;
++    }
++
++    // Live at this point:
++    //   rm_method
++
++    // After figuring out which concrete method to call, jump into it.
++    // Note that this works in the interpreter with no data motion.
++    // But the compiled version will require that r_recv be shifted out.
++    __ verify_method_ptr(rm_method);
++    jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry);
++
++    if (iid == vmIntrinsics::_linkToInterface) {
++      __ bind(L_incompatible_class_change_error);
++      address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry();
++      __ jmp(icce_entry, relocInfo::runtime_call_type);
++    }
++  }
++}
++
++#ifndef PRODUCT
++void trace_method_handle_stub(const char* adaptername,
++                              oop mh,
++                              intptr_t* saved_regs,
++                              intptr_t* entry_sp) {
++  // called as a leaf from native code: do not block the JVM!
++  bool has_mh = (strstr(adaptername, "/static") == NULL &&
++                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
++  const char* mh_reg_name = has_mh ? "s7_mh" : "s7";
++  tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT,
++                adaptername, mh_reg_name,
++                p2i(mh), p2i(entry_sp));
++
++  if (Verbose) {
++    tty->print_cr("Registers:");
++    const int saved_regs_count = RegisterImpl::number_of_registers;
++    for (int i = 0; i < saved_regs_count; i++) {
++      Register r = as_Register(i);
++      // The registers are stored in reverse order on the stack (by pusha).
++      tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]);
++      if ((i + 1) % 4 == 0) {
++        tty->cr();
++      } else {
++        tty->print(", ");
++      }
++    }
++    tty->cr();
++
++    {
++     // dumping last frame with frame::describe
++
++      JavaThread* p = JavaThread::active();
++
++      ResourceMark rm;
++      PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
++      FrameValues values;
++
++      // Note: We want to allow trace_method_handle from any call site.
++      // While trace_method_handle creates a frame, it may be entered
++      // without a PC on the stack top (e.g. not just after a call).
++      // Walking that frame could lead to failures due to that invalid PC.
++      // => carefully detect that frame when doing the stack walking
++
++      // Current C frame
++      frame cur_frame = os::current_frame();
++
++      // Robust search of trace_calling_frame (independant of inlining).
++      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
++      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
++      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
++      while (trace_calling_frame.fp() < saved_regs) {
++        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
++      }
++
++      // safely create a frame and call frame::describe
++      intptr_t *dump_sp = trace_calling_frame.sender_sp();
++      intptr_t *dump_fp = trace_calling_frame.link();
++
++      bool walkable = has_mh; // whether the traced frame shoud be walkable
++
++      if (walkable) {
++        // The previous definition of walkable may have to be refined
++        // if new call sites cause the next frame constructor to start
++        // failing. Alternatively, frame constructors could be
++        // modified to support the current or future non walkable
++        // frames (but this is more intrusive and is not considered as
++        // part of this RFE, which will instead use a simpler output).
++        frame dump_frame = frame(dump_sp, dump_fp);
++        dump_frame.describe(values, 1);
++      } else {
++        // Stack may not be walkable (invalid PC above FP):
++        // Add descriptions without building a Java frame to avoid issues
++        values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
++        values.describe(-1, dump_sp, "sp for #1");
++      }
++      values.describe(-1, entry_sp, "raw top of stack");
++
++      tty->print_cr("Stack layout:");
++      values.print(p);
++    }
++    if (has_mh && oopDesc::is_oop(mh)) {
++      mh->print();
++      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
++        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
++          java_lang_invoke_MethodHandle::form(mh)->print();
++      }
++    }
++  }
++}
++
++// The stub wraps the arguments in a struct on the stack to avoid
++// dealing with the different calling conventions for passing 6
++// arguments.
++struct MethodHandleStubArguments {
++  const char* adaptername;
++  oopDesc* mh;
++  intptr_t* saved_regs;
++  intptr_t* entry_sp;
++};
++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {
++  trace_method_handle_stub(args->adaptername,
++                           args->mh,
++                           args->saved_regs,
++                           args->entry_sp);
++}
++
++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
++}
++#endif //PRODUCT
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// Platform-specific definitions for method handles.
++// These definitions are inlined into class MethodHandles.
++
++// Adapters
++enum /* platform_dependent_constants */ {
++  adapter_code_size = 32000 DEBUG_ONLY(+ 150000)
++};
++
++// Additional helper methods for MethodHandles code generation:
++public:
++  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
++
++  static void verify_klass(MacroAssembler* _masm,
++                           Register obj, SystemDictionary::WKID klass_id,
++                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
++
++  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
++    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
++                 "reference is a MH");
++  }
++
++  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
++
++  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
++  // Takes care of special dispatch from single stepping too.
++  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                      bool for_compiler_entry);
++
++  static void jump_to_lambda_form(MacroAssembler* _masm,
++                                  Register recv, Register method_temp,
++                                  Register temp2,
++                                  bool for_compiler_entry);
++
++  static Register saved_last_sp_register() {
++    // Should be in sharedRuntime, not here.
++   return R3;
++  }
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,511 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/codeCache.hpp"
++#include "code/compiledIC.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/ostream.hpp"
++
++#ifndef PRODUCT
++#include "compiler/disassembler.hpp"
++#endif
++
++#include <sys/mman.h>
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++void NativeInstruction::wrote(int offset) {
++  ICache::invalidate_word(addr_at(offset));
++}
++
++void NativeInstruction::set_long_at(int offset, long i) {
++  address addr = addr_at(offset);
++  *(long*)addr = i;
++  ICache::invalidate_range(addr, 8);
++}
++
++bool NativeInstruction::is_int_branch() {
++  int op = Assembler::high(insn_word(), 6);
++  return op == Assembler::beqz_op || op == Assembler::bnez_op ||
++         op == Assembler::beq_op  || op == Assembler::bne_op  ||
++         op == Assembler::blt_op  || op == Assembler::bge_op  ||
++         op == Assembler::bltu_op || op == Assembler::bgeu_op;
++}
++
++bool NativeInstruction::is_float_branch() {
++  return Assembler::high(insn_word(), 6) == Assembler::bccondz_op;
++}
++
++bool NativeInstruction::is_lu12iw_lu32id() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 7)   == Assembler::lu32i_d_op;
++}
++
++bool NativeInstruction::is_pcaddu12i_add() const {
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddu12i_op &&
++         Assembler::high(int_at(4), 10)   == Assembler::addi_d_op;
++}
++
++bool NativeCall::is_bl() const {
++  return Assembler::high(int_at(0), 6) == Assembler::bl_op;
++}
++
++void NativeCall::verify() {
++  assert(is_bl(), "not a NativeCall");
++}
++
++address NativeCall::target_addr_for_bl(address orig_addr) const {
++  address addr = orig_addr ? orig_addr : addr_at(0);
++
++  // bl
++  if (is_bl()) {
++    return addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) |
++                              ((int_at(0) >> 10) & 0xffff)) << 2);
++  }
++
++  fatal("not a NativeCall");
++  return NULL;
++}
++
++address NativeCall::destination() const {
++  address addr = (address)this;
++  address destination = target_addr_for_bl();
++  // Do we use a trampoline stub for this call?
++  // Trampoline stubs are located behind the main code.
++  if (destination > addr) {
++    // Filter out recursive method invocation (call to verified/unverified entry point).
++    CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
++    assert(cb && cb->is_nmethod(), "sanity");
++    nmethod *nm = (nmethod *)cb;
++    NativeInstruction* ni = nativeInstruction_at(destination);
++    if (nm->stub_contains(destination) && ni->is_NativeCallTrampolineStub_at()) {
++      // Yes we do, so get the destination from the trampoline stub.
++      const address trampoline_stub_addr = destination;
++      destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
++    }
++  }
++  return destination;
++}
++
++// Similar to replace_mt_safe, but just changes the destination. The
++// important thing is that free-running threads are able to execute this
++// call instruction at all times.
++//
++// Used in the runtime linkage of calls; see class CompiledIC.
++//
++// Add parameter assert_lock to switch off assertion
++// during code generation, where no patching lock is needed.
++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
++  assert(!assert_lock ||
++         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
++         "concurrent code patching");
++
++  ResourceMark rm;
++  address addr_call = addr_at(0);
++  bool reachable = MacroAssembler::reachable_from_branch_short(dest - addr_call);
++  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
++
++  // Patch the call.
++  if (!reachable) {
++    address trampoline_stub_addr = get_trampoline();
++    assert (trampoline_stub_addr != NULL, "we need a trampoline");
++    guarantee(Assembler::is_simm((trampoline_stub_addr - addr_call) >> 2, 26), "cannot reach trampoline stub");
++
++    // Patch the constant in the call's trampoline stub.
++    NativeInstruction* ni = nativeInstruction_at(dest);
++    assert (! ni->is_NativeCallTrampolineStub_at(), "chained trampolines");
++    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
++    dest = trampoline_stub_addr;
++  }
++  set_destination(dest);
++}
++
++address NativeCall::get_trampoline() {
++  address call_addr = addr_at(0);
++
++  CodeBlob *code = CodeCache::find_blob(call_addr);
++  assert(code != NULL, "Could not find the containing code blob");
++
++  address bl_destination
++    = nativeCall_at(call_addr)->target_addr_for_bl();
++  NativeInstruction* ni = nativeInstruction_at(bl_destination);
++  if (code->contains(bl_destination) &&
++      ni->is_NativeCallTrampolineStub_at())
++    return bl_destination;
++
++  if (code->is_nmethod()) {
++    return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
++  }
++
++  return NULL;
++}
++
++void NativeCall::set_destination(address dest) {
++  address addr_call = addr_at(0);
++  CodeBuffer cb(addr_call, instruction_size);
++  MacroAssembler masm(&cb);
++  assert(is_call_at(addr_call), "unexpected call type");
++  jlong offs = dest - addr_call;
++  masm.bl(offs >> 2);
++  ICache::invalidate_range(addr_call, instruction_size);
++}
++
++// Generate a trampoline for a branch to dest.  If there's no need for a
++// trampoline, simply patch the call directly to dest.
++address NativeCall::trampoline_jump(CodeBuffer &cbuf, address dest) {
++  MacroAssembler a(&cbuf);
++  address stub = NULL;
++
++  if (a.far_branches()
++      && ! is_NativeCallTrampolineStub_at()) {
++    stub = a.emit_trampoline_stub(instruction_address() - cbuf.insts()->start(), dest);
++  }
++
++  if (stub == NULL) {
++    // If we generated no stub, patch this call directly to dest.
++    // This will happen if we don't need far branches or if there
++    // already was a trampoline.
++    set_destination(dest);
++  }
++
++  return stub;
++}
++
++void NativeCall::print() {
++  tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT,
++                p2i(instruction_address()), p2i(destination()));
++}
++
++// Inserts a native call instruction at a given pc
++void NativeCall::insert(address code_pos, address entry) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// MT-safe patching of a call instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) {
++  Unimplemented();
++}
++
++bool NativeFarCall::is_short() const {
++  return Assembler::high(int_at(0), 10) == Assembler::andi_op &&
++         Assembler::low(int_at(0), 22) == 0 &&
++         Assembler::high(int_at(4), 6) == Assembler::bl_op;
++}
++
++bool NativeFarCall::is_far() const {
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op &&
++         Assembler::high(int_at(4), 6) == Assembler::jirl_op      &&
++         Assembler::low(int_at(4), 5)  == RA->encoding();
++}
++
++address NativeFarCall::destination(address orig_addr) const {
++  address addr = orig_addr ? orig_addr : addr_at(0);
++
++  if (is_short()) {
++  // short
++    return addr + BytesPerInstWord +
++           (Assembler::simm26(((int_at(4) & 0x3ff) << 16) |
++                              ((int_at(4) >> 10) & 0xffff)) << 2);
++  }
++
++  if (is_far()) {
++  // far
++    return addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) +
++           (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2);
++  }
++
++  fatal("not a NativeFarCall");
++  return NULL;
++}
++
++void NativeFarCall::set_destination(address dest) {
++  address addr_call = addr_at(0);
++  CodeBuffer cb(addr_call, instruction_size);
++  MacroAssembler masm(&cb);
++  assert(is_far_call_at(addr_call), "unexpected call type");
++  masm.patchable_call(dest, addr_call);
++  ICache::invalidate_range(addr_call, instruction_size);
++}
++
++void NativeFarCall::verify() {
++  assert(is_short() || is_far(), "not a NativeFarcall");
++}
++
++//-------------------------------------------------------------------
++
++bool NativeMovConstReg::is_lu12iw_ori_lu32id() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::ori_op     &&
++         Assembler::high(int_at(8), 7)   == Assembler::lu32i_d_op;
++}
++
++bool NativeMovConstReg::is_lu12iw_lu32id_nop() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 7)   == Assembler::lu32i_d_op &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++bool NativeMovConstReg::is_lu12iw_2nop() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::andi_op    &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++bool NativeMovConstReg::is_lu12iw_ori_nop() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::ori_op     &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++bool NativeMovConstReg::is_addid_2nop() const {
++  return Assembler::high(int_at(0), 10)  == Assembler::addi_d_op &&
++         Assembler::high(int_at(4), 10)  == Assembler::andi_op   &&
++         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
++}
++
++void NativeMovConstReg::verify() {
++  assert(is_li52(), "not a mov reg, imm52");
++}
++
++void NativeMovConstReg::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
++                p2i(instruction_address()), data());
++}
++
++intptr_t NativeMovConstReg::data() const {
++  if (is_lu12iw_ori_lu32id()) {
++    return Assembler::merge((intptr_t)((int_at(4)  >> 10) & 0xfff),
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff),
++                            (intptr_t)((int_at(8)  >> 5)  & 0xfffff));
++  }
++
++  if (is_lu12iw_lu32id_nop()) {
++    return Assembler::merge((intptr_t)0,
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff),
++                            (intptr_t)((int_at(4)  >> 5)  & 0xfffff));
++  }
++
++  if (is_lu12iw_2nop()) {
++    return Assembler::merge((intptr_t)0,
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff));
++  }
++
++  if (is_lu12iw_ori_nop()) {
++    return Assembler::merge((intptr_t)((int_at(4)  >> 10) & 0xfff),
++                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff));
++  }
++
++  if (is_addid_2nop()) {
++    return Assembler::simm12((int_at(0) >> 10) & 0xfff);
++  }
++
++#ifndef PRODUCT
++  Disassembler::decode(addr_at(0), addr_at(0) + 16, tty);
++#endif
++  fatal("not a mov reg, imm52");
++  return 0; // unreachable
++}
++
++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) {
++  CodeBuffer cb(addr_at(0), instruction_size);
++  MacroAssembler masm(&cb);
++  masm.patchable_li52(as_Register(int_at(0) & 0x1f), x);
++  ICache::invalidate_range(addr_at(0), instruction_size);
++
++  // Find and replace the oop/metadata corresponding to this
++  // instruction in oops section.
++  CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address());
++  nmethod* nm = blob->as_nmethod_or_null();
++  if (nm != NULL) {
++    o = o ? o : x;
++    RelocIterator iter(nm, instruction_address(), next_instruction_address());
++    while (iter.next()) {
++      if (iter.type() == relocInfo::oop_type) {
++        oop* oop_addr = iter.oop_reloc()->oop_addr();
++        *oop_addr = cast_to_oop(o);
++        break;
++      } else if (iter.type() == relocInfo::metadata_type) {
++        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
++        *metadata_addr = (Metadata*)o;
++        break;
++      }
++    }
++  }
++}
++
++//-------------------------------------------------------------------
++
++int NativeMovRegMem::offset() const{
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++  return 0; // mute compiler
++}
++
++void NativeMovRegMem::set_offset(int x) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++void NativeMovRegMem::verify() {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++
++void NativeMovRegMem::print() {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++bool NativeInstruction::is_sigill_zombie_not_entrant() {
++  return uint_at(0) == NativeIllegalInstruction::instruction_code;
++}
++
++void NativeIllegalInstruction::insert(address code_pos) {
++  *(juint*)code_pos = instruction_code;
++  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++void NativeJump::verify() {
++  assert(is_short() || is_far(), "not a general jump instruction");
++}
++
++bool NativeJump::is_short() {
++  return Assembler::high(insn_word(), 6) == Assembler::b_op;
++}
++
++bool NativeJump::is_far() {
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op &&
++         Assembler::high(int_at(4), 6) == Assembler::jirl_op      &&
++         Assembler::low(int_at(4), 5)  == R0->encoding();
++}
++
++address NativeJump::jump_destination(address orig_addr) {
++  address addr = orig_addr ? orig_addr : addr_at(0);
++  address ret = (address)-1;
++
++  // short
++  if (is_short()) {
++    ret = addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) |
++                                    ((int_at(0) >> 10) & 0xffff)) << 2);
++    return ret == instruction_address() ? (address)-1 : ret;
++  }
++
++  // far
++  if (is_far()) {
++    ret = addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) +
++           (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2);
++    return ret == instruction_address() ? (address)-1 : ret;
++  }
++
++  fatal("not a jump");
++  return NULL;
++}
++
++void NativeJump::set_jump_destination(address dest) {
++  OrderAccess::fence();
++
++  CodeBuffer cb(addr_at(0), instruction_size);
++  MacroAssembler masm(&cb);
++  masm.patchable_jump(dest);
++  ICache::invalidate_range(addr_at(0), instruction_size);
++}
++
++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// MT-safe patching of a long jump instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
++  //TODO: LA
++  guarantee(0, "LA not implemented yet");
++}
++
++// Must ensure atomicity
++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
++  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
++  jlong offs = dest - verified_entry;
++
++  if (MacroAssembler::reachable_from_branch_short(offs)) {
++    CodeBuffer cb(verified_entry, 1 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.b(dest);
++  } else {
++    // We use an illegal instruction for marking a method as
++    // not_entrant or zombie
++    NativeIllegalInstruction::insert(verified_entry);
++  }
++  ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord);
++}
++
++bool NativeInstruction::is_dtrace_trap() {
++  //return (*(int32_t*)this & 0xff) == 0xcc;
++  Unimplemented();
++  return false;
++}
++
++bool NativeInstruction::is_safepoint_poll() {
++  //
++  // 390     li   T2, 0x0000000000400000 #@loadConP
++  // 394     st_w    [SP + #12], V1    # spill 9
++  // 398     Safepoint @ [T2] : poll for GC @ safePoint_poll        # spec.benchmarks.compress.Decompressor::decompress @ bci:224  L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1
++  //
++  //  0x000000ffe5815130: lu12i_w  t2, 0x40
++  //  0x000000ffe5815134: st_w  v1, 0xc(sp)    ; OopMap{a6=Oop off=920}
++  //                                           ;*goto
++  //                                           ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++  //  0x000000ffe5815138: ld_w  at, 0x0(t2)    ;*goto       <---  PC
++  //                                           ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++
++  // Since there may be some spill instructions between the safePoint_poll and loadConP,
++  // we check the safepoint instruction like this.
++  return Assembler::high(insn_word(), 10) == Assembler::ld_w_op &&
++         Assembler::low(insn_word(), 5)   == AT->encoding();
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,528 @@
++/*
++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP
++#define CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP
++
++#include "asm/assembler.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/os.hpp"
++#include "runtime/orderAccess.hpp"
++#include "runtime/safepointMechanism.hpp"
++
++// We have interfaces for the following instructions:
++// - NativeInstruction
++// - - NativeCall
++// - - NativeMovConstReg
++// - - NativeMovConstRegPatching
++// - - NativeMovRegMem
++// - - NativeMovRegMemPatching
++// - - NativeIllegalOpCode
++// - - NativeGeneralJump
++// - - NativePushConst
++// - - NativeTstRegMem
++
++// The base class for different kinds of native instruction abstractions.
++// Provides the primitive operations to manipulate code relative to this.
++
++class NativeInstruction {
++  friend class Relocation;
++
++ public:
++  enum loongarch_specific_constants {
++    nop_instruction_code        =    0,
++    nop_instruction_size        =    4,
++    sync_instruction_code       =    0xf,
++    sync_instruction_size       =    4
++  };
++
++  bool is_nop()                        { guarantee(0, "LA not implemented yet"); return long_at(0) == nop_instruction_code; }
++  bool is_sync()                       { return Assembler::high(insn_word(), 17) == Assembler::dbar_op; }
++  bool is_dtrace_trap();
++  inline bool is_call();
++  inline bool is_far_call();
++  inline bool is_illegal();
++  bool is_jump();
++  bool is_safepoint_poll();
++
++  // Helper func for jvmci
++  bool is_lu12iw_lu32id() const;
++  bool is_pcaddu12i_add() const;
++
++  // LoongArch has no instruction to generate a illegal instrucion exception?
++  // But `break  11` is not illegal instruction for LoongArch.
++  static int illegal_instruction();
++
++  bool is_int_branch();
++  bool is_float_branch();
++
++  inline bool is_NativeCallTrampolineStub_at();
++  //We use an illegal instruction for marking a method as not_entrant or zombie.
++  bool is_sigill_zombie_not_entrant();
++
++ protected:
++  address addr_at(int offset) const    { return address(this) + offset; }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(BytesPerInstWord); }
++  address prev_instruction_address() const  { return addr_at(-BytesPerInstWord); }
++
++  s_char sbyte_at(int offset) const    { return *(s_char*) addr_at(offset); }
++  u_char ubyte_at(int offset) const    { return *(u_char*) addr_at(offset); }
++
++  jint int_at(int offset) const         { return *(jint*) addr_at(offset); }
++  juint uint_at(int offset) const       { return *(juint*) addr_at(offset); }
++
++  intptr_t ptr_at(int offset) const    { return *(intptr_t*) addr_at(offset); }
++
++  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
++  int  long_at(int offset) const       { return *(jint*)addr_at(offset); }
++
++
++  void set_char_at(int offset, char c)        { *addr_at(offset) = (u_char)c; wrote(offset); }
++  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i;  wrote(offset); }
++  void set_ptr_at (int offset, intptr_t  ptr) { *(intptr_t*) addr_at(offset) = ptr;  wrote(offset); }
++  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o;  wrote(offset); }
++  void set_long_at(int offset, long  i);
++
++  int  insn_word() const { return long_at(0); }
++
++  void wrote(int offset);
++
++ public:
++
++  // unit test stuff
++  static void test() {}                 // override for testing
++
++  inline friend NativeInstruction* nativeInstruction_at(address address);
++};
++
++inline NativeInstruction* nativeInstruction_at(address address) {
++  NativeInstruction* inst = (NativeInstruction*)address;
++#ifdef ASSERT
++  //inst->verify();
++#endif
++  return inst;
++}
++
++inline NativeCall* nativeCall_at(address address);
++
++// The NativeCall is an abstraction for accessing/manipulating native call
++// instructions (used to manipulate inline caches, primitive & dll calls, etc.).
++class NativeCall: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset    = 0,
++    instruction_size      = 1 * BytesPerInstWord,
++    return_address_offset = 1 * BytesPerInstWord,
++    displacement_offset   = 0
++  };
++
++  // We have only bl.
++  bool is_bl() const;
++
++  address instruction_address() const { return addr_at(instruction_offset); }
++
++  address next_instruction_address() const {
++    return addr_at(return_address_offset);
++  }
++
++  address return_address() const {
++    return next_instruction_address();
++  }
++
++  address target_addr_for_bl(address orig_addr = 0) const;
++  address destination() const;
++  void set_destination(address dest);
++
++  void verify_alignment() {}
++  void verify();
++  void print();
++
++  // Creation
++  inline friend NativeCall* nativeCall_at(address address);
++  inline friend NativeCall* nativeCall_before(address return_address);
++
++  static bool is_call_at(address instr) {
++    return nativeInstruction_at(instr)->is_call();
++  }
++
++  static bool is_call_before(address return_address) {
++    return is_call_at(return_address - return_address_offset);
++  }
++
++  // MT-safe patching of a call instruction.
++  static void insert(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++
++  // Similar to replace_mt_safe, but just changes the destination.  The
++  // important thing is that free-running threads are able to execute
++  // this call instruction at all times.  If the call is an immediate bl
++  // instruction we can simply rely on atomicity of 32-bit writes to
++  // make sure other threads will see no intermediate states.
++
++  // We cannot rely on locks here, since the free-running threads must run at
++  // full speed.
++  //
++  // Used in the runtime linkage of calls; see class CompiledIC.
++
++  // The parameter assert_lock disables the assertion during code generation.
++  void set_destination_mt_safe(address dest, bool assert_lock = true);
++
++  address get_trampoline();
++  address trampoline_jump(CodeBuffer &cbuf, address dest);
++};
++
++inline NativeCall* nativeCall_at(address address) {
++  NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++inline NativeCall* nativeCall_before(address return_address) {
++  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++// The NativeFarCall is an abstraction for accessing/manipulating native
++// call-anywhere instructions.
++// Used to call native methods which may be loaded anywhere in the address
++// space, possibly out of reach of a call instruction.
++class NativeFarCall: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset    = 0,
++    instruction_size      = 2 * BytesPerInstWord
++  };
++
++  address instruction_address() const { return addr_at(instruction_offset); }
++
++  // We use MacroAssembler::patchable_call() for implementing a
++  // call-anywhere instruction.
++  bool is_short() const;
++  bool is_far() const;
++
++  // Checks whether instr points at a NativeFarCall instruction.
++  static bool is_far_call_at(address address) {
++    return nativeInstruction_at(address)->is_far_call();
++  }
++
++  // Returns the NativeFarCall's destination.
++  address destination(address orig_addr = 0) const;
++
++  // Sets the NativeFarCall's destination, not necessarily mt-safe.
++  // Used when relocating code.
++  void set_destination(address dest);
++
++  void verify();
++};
++
++// Instantiates a NativeFarCall object starting at the given instruction
++// address and returns the NativeFarCall object.
++inline NativeFarCall* nativeFarCall_at(address address) {
++  NativeFarCall* call = (NativeFarCall*)address;
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++// An interface for accessing/manipulating native set_oop imm, reg instructions
++// (used to manipulate inlined data references, etc.).
++class NativeMovConstReg: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset    =    0,
++    instruction_size          =    3 * BytesPerInstWord,
++    next_instruction_offset   =    3 * BytesPerInstWord,
++  };
++
++  int     insn_word() const                 { return long_at(instruction_offset); }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(next_instruction_offset); }
++  intptr_t data() const;
++  void    set_data(intptr_t x, intptr_t o = 0);
++
++  bool is_li52() const {
++    return is_lu12iw_ori_lu32id() ||
++           is_lu12iw_lu32id_nop() ||
++           is_lu12iw_2nop() ||
++           is_lu12iw_ori_nop() ||
++           is_addid_2nop();
++  }
++  bool is_lu12iw_ori_lu32id() const;
++  bool is_lu12iw_lu32id_nop() const;
++  bool is_lu12iw_2nop() const;
++  bool is_lu12iw_ori_nop() const;
++  bool is_addid_2nop() const;
++  void  verify();
++  void  print();
++
++  // unit test stuff
++  static void test() {}
++
++  // Creation
++  inline friend NativeMovConstReg* nativeMovConstReg_at(address address);
++  inline friend NativeMovConstReg* nativeMovConstReg_before(address address);
++};
++
++inline NativeMovConstReg* nativeMovConstReg_at(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++inline NativeMovConstReg* nativeMovConstReg_before(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovConstRegPatching: public NativeMovConstReg {
++ private:
++    friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) {
++    NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++class NativeMovRegMem: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset = 0,
++    instruction_size = 4,
++    hiword_offset   = 4,
++    ldst_offset     = 12,
++    immediate_size  = 4,
++    ldst_size       = 16
++  };
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++
++  int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; }
++
++  int   offset() const;
++
++  void  set_offset(int x);
++
++  void  add_offset_in_bytes(int add_offset)     { set_offset ( ( offset() + add_offset ) ); }
++
++  void verify();
++  void print ();
++
++  // unit test stuff
++  static void test() {}
++
++ private:
++  inline friend NativeMovRegMem* nativeMovRegMem_at (address address);
++};
++
++inline NativeMovRegMem* nativeMovRegMem_at (address address) {
++  NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovRegMemPatching: public NativeMovRegMem {
++ private:
++  friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) {
++    NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++
++// Handles all kinds of jump on Loongson.
++//   short:
++//     b offs26
++//     nop
++//
++//   far:
++//     pcaddu18i reg, si20
++//     jirl  r0, reg, si18
++//
++class NativeJump: public NativeInstruction {
++ public:
++  enum loongarch_specific_constants {
++    instruction_offset = 0,
++    instruction_size   = 2 * BytesPerInstWord
++  };
++
++  bool is_short();
++  bool is_far();
++
++  address instruction_address() const { return addr_at(instruction_offset); }
++  address jump_destination(address orig_addr = 0);
++  void  set_jump_destination(address dest);
++
++  // Creation
++  inline friend NativeJump* nativeJump_at(address address);
++
++  // Insertion of native jump instruction
++  static void insert(address code_pos, address entry) { Unimplemented(); }
++  // MT-safe insertion of native jump at verified method entry
++  static void check_verified_entry_alignment(address entry, address verified_entry){}
++  static void patch_verified_entry(address entry, address verified_entry, address dest);
++
++  void verify();
++};
++
++inline NativeJump* nativeJump_at(address address) {
++  NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeGeneralJump: public NativeJump {
++ public:
++  // Creation
++  inline friend NativeGeneralJump* nativeGeneralJump_at(address address);
++
++  // Insertion of native general jump instruction
++  static void insert_unconditional(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++};
++
++inline NativeGeneralJump* nativeGeneralJump_at(address address) {
++  NativeGeneralJump* jump = (NativeGeneralJump*)(address);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeIllegalInstruction: public NativeInstruction {
++public:
++  enum loongarch_specific_constants {
++    instruction_code        = 0xbadc0de0, // TODO: LA
++                                          // Temporary LoongArch reserved instruction
++    instruction_size        = 4,
++    instruction_offset      = 0,
++    next_instruction_offset = 4
++  };
++
++  // Insert illegal opcode as specific address
++  static void insert(address code_pos);
++};
++
++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); }
++
++inline bool NativeInstruction::is_call() {
++  NativeCall *call = (NativeCall*)instruction_address();
++  return call->is_bl();
++}
++
++inline bool NativeInstruction::is_far_call() {
++  NativeFarCall *call = (NativeFarCall*)instruction_address();
++
++  // short
++  if (call->is_short()) {
++    return true;
++  }
++
++  // far
++  if (call->is_far()) {
++    return true;
++  }
++
++  return false;
++}
++
++inline bool NativeInstruction::is_jump()
++{
++  NativeGeneralJump *jump = (NativeGeneralJump*)instruction_address();
++
++  // short
++  if (jump->is_short()) {
++    return true;
++  }
++
++  // far
++  if (jump->is_far()) {
++    return true;
++  }
++
++  return false;
++}
++
++// Call trampoline stubs.
++class NativeCallTrampolineStub : public NativeInstruction {
++ public:
++
++  enum la_specific_constants {
++    instruction_size            =    6 * 4,
++    instruction_offset          =    0,
++    data_offset                 =    4 * 4,
++    next_instruction_offset     =    6 * 4
++  };
++
++  address destination() const {
++    return (address)ptr_at(data_offset);
++  }
++
++  void set_destination(address new_destination) {
++    set_ptr_at(data_offset, (intptr_t)new_destination);
++    OrderAccess::fence();
++  }
++};
++
++// Note: Other stubs must not begin with this pattern.
++inline bool NativeInstruction::is_NativeCallTrampolineStub_at() {
++  // pcaddi
++  // ld_d
++  // jirl
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddi_op &&
++         Assembler::high(int_at(4), 10) == Assembler::ld_d_op &&
++         Assembler::high(int_at(8), 6) == Assembler::jirl_op      &&
++         Assembler::low(int_at(8), 5)  == R0->encoding();
++}
++
++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
++  NativeInstruction* ni = nativeInstruction_at(addr);
++  assert(ni->is_NativeCallTrampolineStub_at(), "no call trampoline found");
++  return (NativeCallTrampolineStub*)addr;
++}
++
++class NativeMembar : public NativeInstruction {
++public:
++  unsigned int get_hint() { return Assembler::low(insn_word(), 4); }
++  void set_hint(int hint) { Assembler::patch(addr_at(0), 4, hint); }
++};
++
++#endif // CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,103 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/register.hpp"
++#include "register_loongarch.hpp"
++#ifdef TARGET_ARCH_MODEL_loongarch_32
++# include "interp_masm_loongarch_32.hpp"
++#endif
++#ifdef TARGET_ARCH_MODEL_loongarch_64
++# include "interp_masm_loongarch_64.hpp"
++#endif
++
++REGISTER_DEFINITION(Register, noreg);
++REGISTER_DEFINITION(Register, r0);
++REGISTER_DEFINITION(Register, r1);
++REGISTER_DEFINITION(Register, r2);
++REGISTER_DEFINITION(Register, r3);
++REGISTER_DEFINITION(Register, r4);
++REGISTER_DEFINITION(Register, r5);
++REGISTER_DEFINITION(Register, r6);
++REGISTER_DEFINITION(Register, r7);
++REGISTER_DEFINITION(Register, r8);
++REGISTER_DEFINITION(Register, r9);
++REGISTER_DEFINITION(Register, r10);
++REGISTER_DEFINITION(Register, r11);
++REGISTER_DEFINITION(Register, r12);
++REGISTER_DEFINITION(Register, r13);
++REGISTER_DEFINITION(Register, r14);
++REGISTER_DEFINITION(Register, r15);
++REGISTER_DEFINITION(Register, r16);
++REGISTER_DEFINITION(Register, r17);
++REGISTER_DEFINITION(Register, r18);
++REGISTER_DEFINITION(Register, r19);
++REGISTER_DEFINITION(Register, r20);
++REGISTER_DEFINITION(Register, r21);
++REGISTER_DEFINITION(Register, r22);
++REGISTER_DEFINITION(Register, r23);
++REGISTER_DEFINITION(Register, r24);
++REGISTER_DEFINITION(Register, r25);
++REGISTER_DEFINITION(Register, r26);
++REGISTER_DEFINITION(Register, r27);
++REGISTER_DEFINITION(Register, r28);
++REGISTER_DEFINITION(Register, r29);
++REGISTER_DEFINITION(Register, r30);
++REGISTER_DEFINITION(Register, r31);
++
++REGISTER_DEFINITION(FloatRegister, fnoreg);
++REGISTER_DEFINITION(FloatRegister, f0);
++REGISTER_DEFINITION(FloatRegister, f1);
++REGISTER_DEFINITION(FloatRegister, f2);
++REGISTER_DEFINITION(FloatRegister, f3);
++REGISTER_DEFINITION(FloatRegister, f4);
++REGISTER_DEFINITION(FloatRegister, f5);
++REGISTER_DEFINITION(FloatRegister, f6);
++REGISTER_DEFINITION(FloatRegister, f7);
++REGISTER_DEFINITION(FloatRegister, f8);
++REGISTER_DEFINITION(FloatRegister, f9);
++REGISTER_DEFINITION(FloatRegister, f10);
++REGISTER_DEFINITION(FloatRegister, f11);
++REGISTER_DEFINITION(FloatRegister, f12);
++REGISTER_DEFINITION(FloatRegister, f13);
++REGISTER_DEFINITION(FloatRegister, f14);
++REGISTER_DEFINITION(FloatRegister, f15);
++REGISTER_DEFINITION(FloatRegister, f16);
++REGISTER_DEFINITION(FloatRegister, f17);
++REGISTER_DEFINITION(FloatRegister, f18);
++REGISTER_DEFINITION(FloatRegister, f19);
++REGISTER_DEFINITION(FloatRegister, f20);
++REGISTER_DEFINITION(FloatRegister, f21);
++REGISTER_DEFINITION(FloatRegister, f22);
++REGISTER_DEFINITION(FloatRegister, f23);
++REGISTER_DEFINITION(FloatRegister, f24);
++REGISTER_DEFINITION(FloatRegister, f25);
++REGISTER_DEFINITION(FloatRegister, f26);
++REGISTER_DEFINITION(FloatRegister, f27);
++REGISTER_DEFINITION(FloatRegister, f28);
++REGISTER_DEFINITION(FloatRegister, f29);
++REGISTER_DEFINITION(FloatRegister, f30);
++REGISTER_DEFINITION(FloatRegister, f31);
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/register_loongarch.cpp b/src/hotspot/cpu/loongarch/register_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/register_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/register_loongarch.cpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "register_loongarch.hpp"
++
++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * RegisterImpl::max_slots_per_register;
++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
++                                                                  FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
++
++
++const char* RegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "zero", "ra", "tp", "sp", "a0/v0", "a1/v1", "a2", "a3",
++    "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
++    "t4", "t5", "t6", "t7", "t8", "x", "fp", "s0",
++    "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
++
++const char* FloatRegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "f0",  "f1",   "f2",  "f3",   "f4",  "f5",   "f6",  "f7",
++    "f8",  "f9",  "f10", "f11",  "f12", "f13",  "f14", "f15",
++    "f16", "f17",  "f18", "f19", "f20", "f21",  "f22", "f23",
++    "f24", "f25",  "f26", "f27",  "f28", "f29",  "f30", "f31",
++  };
++  return is_valid() ? names[encoding()] : "fnoreg";
++}
++
++const char* ConditionalFlagRegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "fcc0",  "fcc1",   "fcc2",  "fcc3",   "fcc4",  "fcc5",   "fcc6",  "fcc7",
++  };
++  return is_valid() ? names[encoding()] : "fccnoreg";
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/register_loongarch.hpp b/src/hotspot/cpu/loongarch/register_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/register_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/register_loongarch.hpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,495 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_REGISTER_LOONGARCH_HPP
++#define CPU_LOONGARCH_REGISTER_LOONGARCH_HPP
++
++#include "asm/register.hpp"
++#include "utilities/formatBuffer.hpp"
++
++class VMRegImpl;
++typedef VMRegImpl* VMReg;
++
++// Use Register as shortcut
++class RegisterImpl;
++typedef RegisterImpl* Register;
++
++inline Register as_Register(int encoding) {
++  return (Register)(intptr_t) encoding;
++}
++
++class RegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers     = 32,
++    max_slots_per_register  = 2
++  };
++
++  // derived registers, offsets, and addresses
++  Register successor() const                          { return as_Register(encoding() + 1); }
++
++  // construction
++  inline friend Register as_Register(int encoding);
++
++  VMReg as_VMReg();
++
++  // accessors
++  int   encoding() const                         { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; }
++  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++};
++
++// The integer registers of the LoongArch architecture
++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
++
++
++CONSTANT_REGISTER_DECLARATION(Register, r0,    (0));
++CONSTANT_REGISTER_DECLARATION(Register, r1,    (1));
++CONSTANT_REGISTER_DECLARATION(Register, r2,    (2));
++CONSTANT_REGISTER_DECLARATION(Register, r3,    (3));
++CONSTANT_REGISTER_DECLARATION(Register, r4,    (4));
++CONSTANT_REGISTER_DECLARATION(Register, r5,    (5));
++CONSTANT_REGISTER_DECLARATION(Register, r6,    (6));
++CONSTANT_REGISTER_DECLARATION(Register, r7,    (7));
++CONSTANT_REGISTER_DECLARATION(Register, r8,    (8));
++CONSTANT_REGISTER_DECLARATION(Register, r9,    (9));
++CONSTANT_REGISTER_DECLARATION(Register, r10,   (10));
++CONSTANT_REGISTER_DECLARATION(Register, r11,   (11));
++CONSTANT_REGISTER_DECLARATION(Register, r12,   (12));
++CONSTANT_REGISTER_DECLARATION(Register, r13,   (13));
++CONSTANT_REGISTER_DECLARATION(Register, r14,   (14));
++CONSTANT_REGISTER_DECLARATION(Register, r15,   (15));
++CONSTANT_REGISTER_DECLARATION(Register, r16,   (16));
++CONSTANT_REGISTER_DECLARATION(Register, r17,   (17));
++CONSTANT_REGISTER_DECLARATION(Register, r18,   (18));
++CONSTANT_REGISTER_DECLARATION(Register, r19,   (19));
++CONSTANT_REGISTER_DECLARATION(Register, r20,   (20));
++CONSTANT_REGISTER_DECLARATION(Register, r21,   (21));
++CONSTANT_REGISTER_DECLARATION(Register, r22,   (22));
++CONSTANT_REGISTER_DECLARATION(Register, r23,   (23));
++CONSTANT_REGISTER_DECLARATION(Register, r24,   (24));
++CONSTANT_REGISTER_DECLARATION(Register, r25,   (25));
++CONSTANT_REGISTER_DECLARATION(Register, r26,   (26));
++CONSTANT_REGISTER_DECLARATION(Register, r27,   (27));
++CONSTANT_REGISTER_DECLARATION(Register, r28,   (28));
++CONSTANT_REGISTER_DECLARATION(Register, r29,   (29));
++CONSTANT_REGISTER_DECLARATION(Register, r30,   (30));
++CONSTANT_REGISTER_DECLARATION(Register, r31,   (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define NOREG ((Register)(noreg_RegisterEnumValue))
++
++#define R0  ((Register)(r0_RegisterEnumValue))
++#define R1  ((Register)(r1_RegisterEnumValue))
++#define R2  ((Register)(r2_RegisterEnumValue))
++#define R3  ((Register)(r3_RegisterEnumValue))
++#define R4  ((Register)(r4_RegisterEnumValue))
++#define R5  ((Register)(r5_RegisterEnumValue))
++#define R6  ((Register)(r6_RegisterEnumValue))
++#define R7  ((Register)(r7_RegisterEnumValue))
++#define R8  ((Register)(r8_RegisterEnumValue))
++#define R9  ((Register)(r9_RegisterEnumValue))
++#define R10 ((Register)(r10_RegisterEnumValue))
++#define R11 ((Register)(r11_RegisterEnumValue))
++#define R12 ((Register)(r12_RegisterEnumValue))
++#define R13 ((Register)(r13_RegisterEnumValue))
++#define R14 ((Register)(r14_RegisterEnumValue))
++#define R15 ((Register)(r15_RegisterEnumValue))
++#define R16 ((Register)(r16_RegisterEnumValue))
++#define R17 ((Register)(r17_RegisterEnumValue))
++#define R18 ((Register)(r18_RegisterEnumValue))
++#define R19 ((Register)(r19_RegisterEnumValue))
++#define R20 ((Register)(r20_RegisterEnumValue))
++#define R21 ((Register)(r21_RegisterEnumValue))
++#define R22 ((Register)(r22_RegisterEnumValue))
++#define R23 ((Register)(r23_RegisterEnumValue))
++#define R24 ((Register)(r24_RegisterEnumValue))
++#define R25 ((Register)(r25_RegisterEnumValue))
++#define R26 ((Register)(r26_RegisterEnumValue))
++#define R27 ((Register)(r27_RegisterEnumValue))
++#define R28 ((Register)(r28_RegisterEnumValue))
++#define R29 ((Register)(r29_RegisterEnumValue))
++#define R30 ((Register)(r30_RegisterEnumValue))
++#define R31 ((Register)(r31_RegisterEnumValue))
++
++
++#define RA           R1
++#define TP           R2
++#define SP           R3
++#define A0           R4
++#define A1           R5
++#define A2           R6
++#define A3           R7
++#define A4           R8
++#define A5           R9
++#define A6           R10
++#define A7           R11
++#define RT0          R12
++#define RT1          R13
++#define RT2          R14
++#define RT3          R15
++#define RT4          R16
++#define RT5          R17
++#define RT6          R18
++#define RT7          R19
++#define RT8          R20
++#define RX           R21
++#define FP           R22
++#define S0           R23
++#define S1           R24
++#define S2           R25
++#define S3           R26
++#define S4           R27
++#define S5           R28
++#define S6           R29
++#define S7           R30
++#define S8           R31
++
++#define c_rarg0       RT0
++#define c_rarg1       RT1
++#define Rmethod       S3
++#define Rsender       S4
++#define Rnext         S1
++
++#define V0       A0
++#define V1       A1
++
++#define SCR1     RT7
++#define SCR2     RT4
++
++//for interpreter frame
++// bytecode pointer register
++#define BCP            S0
++// local variable pointer register
++#define LVP            S7
++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM
++// be sure to save and restore its value in call_stub
++#define TSR            S2
++
++#define OPT_THREAD 1
++
++#define TREG           S6
++
++#define S5_heapbase    S5
++
++#define FSR            V0
++#define SSR            T6
++#define FSF            FV0
++
++#define RECEIVER       T0
++#define IC_Klass       T1
++
++#define SHIFT_count    T3
++
++// ---------- Scratch Register ----------
++#define AT             RT7
++#define fscratch       F23
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Use FloatRegister as shortcut
++class FloatRegisterImpl;
++typedef FloatRegisterImpl* FloatRegister;
++
++inline FloatRegister as_FloatRegister(int encoding) {
++  return (FloatRegister)(intptr_t) encoding;
++}
++
++// The implementation of floating point registers for the LoongArch architecture
++class FloatRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers     = 32,
++    save_slots_per_register = 2,
++    slots_per_lsx_register  = 4,
++    slots_per_lasx_register = 8,
++    max_slots_per_register  = 8
++  };
++
++  // construction
++  inline friend FloatRegister as_FloatRegister(int encoding);
++
++  VMReg as_VMReg();
++
++  // derived registers, offsets, and addresses
++  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
++
++  // accessors
++  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
++  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++
++};
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue))
++#define F0     ((FloatRegister)(    f0_FloatRegisterEnumValue))
++#define F1     ((FloatRegister)(    f1_FloatRegisterEnumValue))
++#define F2     ((FloatRegister)(    f2_FloatRegisterEnumValue))
++#define F3     ((FloatRegister)(    f3_FloatRegisterEnumValue))
++#define F4     ((FloatRegister)(    f4_FloatRegisterEnumValue))
++#define F5     ((FloatRegister)(    f5_FloatRegisterEnumValue))
++#define F6     ((FloatRegister)(    f6_FloatRegisterEnumValue))
++#define F7     ((FloatRegister)(    f7_FloatRegisterEnumValue))
++#define F8     ((FloatRegister)(    f8_FloatRegisterEnumValue))
++#define F9     ((FloatRegister)(    f9_FloatRegisterEnumValue))
++#define F10    ((FloatRegister)(   f10_FloatRegisterEnumValue))
++#define F11    ((FloatRegister)(   f11_FloatRegisterEnumValue))
++#define F12    ((FloatRegister)(   f12_FloatRegisterEnumValue))
++#define F13    ((FloatRegister)(   f13_FloatRegisterEnumValue))
++#define F14    ((FloatRegister)(   f14_FloatRegisterEnumValue))
++#define F15    ((FloatRegister)(   f15_FloatRegisterEnumValue))
++#define F16    ((FloatRegister)(   f16_FloatRegisterEnumValue))
++#define F17    ((FloatRegister)(   f17_FloatRegisterEnumValue))
++#define F18    ((FloatRegister)(   f18_FloatRegisterEnumValue))
++#define F19    ((FloatRegister)(   f19_FloatRegisterEnumValue))
++#define F20    ((FloatRegister)(   f20_FloatRegisterEnumValue))
++#define F21    ((FloatRegister)(   f21_FloatRegisterEnumValue))
++#define F22    ((FloatRegister)(   f22_FloatRegisterEnumValue))
++#define F23    ((FloatRegister)(   f23_FloatRegisterEnumValue))
++#define F24    ((FloatRegister)(   f24_FloatRegisterEnumValue))
++#define F25    ((FloatRegister)(   f25_FloatRegisterEnumValue))
++#define F26    ((FloatRegister)(   f26_FloatRegisterEnumValue))
++#define F27    ((FloatRegister)(   f27_FloatRegisterEnumValue))
++#define F28    ((FloatRegister)(   f28_FloatRegisterEnumValue))
++#define F29    ((FloatRegister)(   f29_FloatRegisterEnumValue))
++#define F30    ((FloatRegister)(   f30_FloatRegisterEnumValue))
++#define F31    ((FloatRegister)(   f31_FloatRegisterEnumValue))
++
++#define FA0    F0
++#define FA1    F1
++#define FA2    F2
++#define FA3    F3
++#define FA4    F4
++#define FA5    F5
++#define FA6    F6
++#define FA7    F7
++
++#define FV0    F0
++#define FV1    F1
++
++#define FT0    F8
++#define FT1    F9
++#define FT2    F10
++#define FT3    F11
++#define FT4    F12
++#define FT5    F13
++#define FT6    F14
++#define FT7    F15
++#define FT8    F16
++#define FT9    F17
++#define FT10   F18
++#define FT11   F19
++#define FT12   F20
++#define FT13   F21
++#define FT14   F22
++#define FT15   F23
++
++#define FS0    F24
++#define FS1    F25
++#define FS2    F26
++#define FS3    F27
++#define FS4    F28
++#define FS5    F29
++#define FS6    F30
++#define FS7    F31
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Use ConditionalFlagRegister as shortcut
++class ConditionalFlagRegisterImpl;
++typedef ConditionalFlagRegisterImpl* ConditionalFlagRegister;
++
++inline ConditionalFlagRegister as_ConditionalFlagRegister(int encoding) {
++  return (ConditionalFlagRegister)(intptr_t) encoding;
++}
++
++// The implementation of floating point registers for the LoongArch architecture
++class ConditionalFlagRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++//    conditionalflag_arg_base      = 12,
++    number_of_registers = 8
++  };
++
++  // construction
++  inline friend ConditionalFlagRegister as_ConditionalFlagRegister(int encoding);
++
++  VMReg as_VMReg();
++
++  // derived registers, offsets, and addresses
++  ConditionalFlagRegister successor() const                          { return as_ConditionalFlagRegister(encoding() + 1); }
++
++  // accessors
++  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
++  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++
++};
++
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fccnoreg , (-1));
++
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc7     , ( 7));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define FCCNOREG ((ConditionalFlagRegister)(fccnoreg_ConditionalFlagRegisterEnumValue))
++#define FCC0     ((ConditionalFlagRegister)(    fcc0_ConditionalFlagRegisterEnumValue))
++#define FCC1     ((ConditionalFlagRegister)(    fcc1_ConditionalFlagRegisterEnumValue))
++#define FCC2     ((ConditionalFlagRegister)(    fcc2_ConditionalFlagRegisterEnumValue))
++#define FCC3     ((ConditionalFlagRegister)(    fcc3_ConditionalFlagRegisterEnumValue))
++#define FCC4     ((ConditionalFlagRegister)(    fcc4_ConditionalFlagRegisterEnumValue))
++#define FCC5     ((ConditionalFlagRegister)(    fcc5_ConditionalFlagRegisterEnumValue))
++#define FCC6     ((ConditionalFlagRegister)(    fcc6_ConditionalFlagRegisterEnumValue))
++#define FCC7     ((ConditionalFlagRegister)(    fcc7_ConditionalFlagRegisterEnumValue))
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Need to know the total number of registers of all sorts for SharedInfo.
++// Define a class that exports it.
++class ConcreteRegisterImpl : public AbstractRegisterImpl {
++ public:
++  enum {
++  // A big enough number for C2: all the registers plus flags
++  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
++  // There is no requirement that any ordering here matches any ordering c2 gives
++  // it's optoregs.
++    number_of_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
++                          FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers
++  };
++
++  static const int max_gpr;
++  static const int max_fpr;
++};
++
++// A set of registers
++template <class RegImpl>
++class AbstractRegSet {
++  uint32_t _bitset;
++
++  AbstractRegSet(uint32_t bitset) : _bitset(bitset) { }
++
++public:
++
++  AbstractRegSet() : _bitset(0) { }
++
++  AbstractRegSet(RegImpl r1) : _bitset(1 << r1->encoding()) { }
++
++  AbstractRegSet operator+(const AbstractRegSet aSet) const {
++    AbstractRegSet result(_bitset | aSet._bitset);
++    return result;
++  }
++
++  AbstractRegSet operator-(const AbstractRegSet aSet) const {
++    AbstractRegSet result(_bitset & ~aSet._bitset);
++    return result;
++  }
++
++  AbstractRegSet &operator+=(const AbstractRegSet aSet) {
++    *this = *this + aSet;
++    return *this;
++  }
++
++  AbstractRegSet &operator-=(const AbstractRegSet aSet) {
++    *this = *this - aSet;
++    return *this;
++  }
++
++  static AbstractRegSet of(RegImpl r1) {
++    return AbstractRegSet(r1);
++  }
++
++  static AbstractRegSet of(RegImpl r1, RegImpl r2) {
++    return of(r1) + r2;
++  }
++
++  static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3) {
++    return of(r1, r2) + r3;
++  }
++
++  static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4) {
++    return of(r1, r2, r3) + r4;
++  }
++
++  static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4, RegImpl r5) {
++    return of(r1, r2, r3, r4) + r5;
++  }
++
++  static AbstractRegSet range(RegImpl start, RegImpl end) {
++    uint32_t bits = ~0;
++    bits <<= start->encoding();
++    bits <<= 31 - end->encoding();
++    bits >>= 31 - end->encoding();
++
++    return AbstractRegSet(bits);
++  }
++
++  uint32_t bits() const { return _bitset; }
++};
++
++typedef AbstractRegSet<Register> RegSet;
++
++#endif //CPU_LOONGARCH_REGISTER_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP
++#define CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP
++
++// machine-dependent implemention for register maps
++  friend class frame;
++
++ private:
++#ifndef CORE
++  // This is the hook for finding a register in an "well-known" location,
++  // such as a register block of a predetermined format.
++  // Since there is none, we just return NULL.
++  // See registerMap_sparc.hpp for an example of grabbing registers
++  // from register save areas of a standard layout.
++   address pd_location(VMReg reg) const {return NULL;}
++#endif
++
++  // no PD state to clear or copy:
++  void pd_clear() {}
++  void pd_initialize() {}
++  void pd_initialize_from(const RegisterMap* map) {}
++
++#endif // CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,132 @@
++/*
++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/relocInfo.hpp"
++#include "compiler/disassembler.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/compressedOops.inline.hpp"
++#include "oops/oop.hpp"
++#include "oops/klass.inline.hpp"
++#include "runtime/safepoint.hpp"
++
++
++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
++  x += o;
++  typedef Assembler::WhichOperand WhichOperand;
++  WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop
++  assert(which == Assembler::disp32_operand ||
++         which == Assembler::narrow_oop_operand ||
++         which == Assembler::imm_operand, "format unpacks ok");
++  if (type() == relocInfo::internal_word_type ||
++      type() == relocInfo::section_word_type) {
++    MacroAssembler::pd_patch_instruction(addr(), x);
++  } else if (which == Assembler::imm_operand) {
++    if (verify_only) {
++      assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match");
++    } else {
++      nativeMovConstReg_at(addr())->set_data((intptr_t)(x));
++    }
++  } else if (which == Assembler::narrow_oop_operand) {
++    // both compressed oops and compressed classes look the same
++    if (Universe::heap()->is_in_reserved((oop)x)) {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode((oop)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(oop(x))), (intptr_t)(x));
++      }
++    } else {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x));
++      }
++    }
++  } else {
++    // Note:  Use runtime_call_type relocations for call32_operand.
++    assert(0, "call32_operand not supported in LoongArch64");
++  }
++}
++
++
++address Relocation::pd_call_destination(address orig_addr) {
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_far_call()) {
++    return nativeFarCall_at(addr())->destination(orig_addr);
++  } else if (ni->is_call()) {
++    address trampoline = nativeCall_at(addr())->get_trampoline();
++    if (trampoline) {
++      return nativeCallTrampolineStub_at(trampoline)->destination();
++    } else {
++      address new_addr = nativeCall_at(addr())->target_addr_for_bl(orig_addr);
++      // If call is branch to self, don't try to relocate it, just leave it
++      // as branch to self. This happens during code generation if the code
++      // buffer expands. It will be relocated to the trampoline above once
++      // code generation is complete.
++      return (new_addr == orig_addr) ? addr() : new_addr;
++    }
++  } else if (ni->is_jump()) {
++    return nativeGeneralJump_at(addr())->jump_destination(orig_addr);
++  } else {
++    tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr()));
++    Disassembler::decode(addr() - 10 * BytesPerInstWord, addr() + 10 * BytesPerInstWord, tty);
++    ShouldNotReachHere();
++    return NULL;
++  }
++}
++
++void Relocation::pd_set_call_destination(address x) {
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_far_call()) {
++    nativeFarCall_at(addr())->set_destination(x);
++  } else if (ni->is_call()) {
++    address trampoline = nativeCall_at(addr())->get_trampoline();
++    if (trampoline) {
++      nativeCall_at(addr())->set_destination_mt_safe(x, false);
++    } else {
++      nativeCall_at(addr())->set_destination(x);
++    }
++  } else if (ni->is_jump()) {
++    nativeGeneralJump_at(addr())->set_jump_destination(x);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++address* Relocation::pd_address_in_code() {
++  return (address*)addr();
++}
++
++address Relocation::pd_get_address_from_code() {
++  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
++  return (address)ni->data();
++}
++
++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++
++void metadata_Relocation::pd_fix_value(address x) {
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP
++#define CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP
++
++  // machine-dependent parts of class relocInfo
++ private:
++  enum {
++    // Since LoongArch instructions are whole words,
++    // the two low-order offset bits can always be discarded.
++    offset_unit        =  4,
++
++    // imm_oop_operand vs. narrow_oop_operand
++    format_width       =  2
++  };
++
++ public:
++
++  static bool mustIterateImmediateOopsInCode() { return false; }
++
++#endif // CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,191 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#ifdef COMPILER2
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "code/vmreg.hpp"
++#include "interpreter/interpreter.hpp"
++#include "opto/runtime.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/globalDefinitions.hpp"
++#include "vmreg_loongarch.inline.hpp"
++#endif
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++//-------------- generate_exception_blob -----------
++// creates _exception_blob.
++// The exception blob is jumped to from a compiled method.
++// (see emit_exception_handler in sparc.ad file)
++//
++// Given an exception pc at a call we call into the runtime for the
++// handler in this method. This handler might merely restore state
++// (i.e. callee save registers) unwind the frame and jump to the
++// exception handler for the nmethod if there is no Java level handler
++// for the nmethod.
++//
++// This code is entered with a jump, and left with a jump.
++//
++// Arguments:
++//   V0: exception oop
++//   V1: exception pc
++//
++// Results:
++//   A0: exception oop
++//   A1: exception pc in caller or ???
++//   jumps to: exception handler of caller
++//
++// Note: the exception pc MUST be at a call (precise debug information)
++//
++//  [stubGenerator_loongarch_64.cpp] generate_forward_exception()
++//      |- V0, V1 are created
++//      |- T4 <= SharedRuntime::exception_handler_for_return_address
++//      `- jr T4
++//           `- the caller's exception_handler
++//                 `- jr OptoRuntime::exception_blob
++//                        `- here
++//
++void OptoRuntime::generate_exception_blob() {
++  // Capture info about frame layout
++  enum layout {
++    fp_off,
++    return_off,                 // slot for return address
++    framesize
++  };
++
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer   buffer("exception_blob", 5120, 5120);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++  address start = __ pc();
++
++  __ addi_d(SP, SP, -1 * framesize * wordSize);   // Prolog!
++
++  // this frame will be treated as the original caller method.
++  // So, the return pc should be filled with the original exception pc.
++  //   ref: X86's implementation
++  __ st_d(V1, SP, return_off * wordSize);  // return address
++  __ st_d(FP, SP, fp_off * wordSize);
++
++  // Save callee saved registers.  None for UseSSE=0,
++  // floats-only for UseSSE=1, and doubles for UseSSE=2.
++
++  __ addi_d(FP, SP, fp_off * wordSize);
++
++  // Store exception in Thread object. We cannot pass any arguments to the
++  // handle_exception call, since we do not want to make any assumption
++  // about the size of the frame where the exception happened in.
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ st_d(V0, Address(thread, JavaThread::exception_oop_offset()));
++  __ st_d(V1, Address(thread, JavaThread::exception_pc_offset()));
++
++  // This call does all the hard work.  It checks if an exception handler
++  // exists in the method.
++  // If so, it returns the handler address.
++  // If not, it prepares for stack-unwinding, restoring the callee-save
++  // registers of the frame being removed.
++  Label L;
++  address the_pc = __ pc();
++  __ bind(L);
++  __ set_last_Java_frame(thread, NOREG, NOREG, L);
++
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  __ move(A0, thread);
++  // TODO: confirm reloc
++  __ call((address)OptoRuntime::handle_exception_C, relocInfo::runtime_call_type);
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++
++  oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0));
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(thread, true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // V0: exception handler
++
++  // We have a handler in V0, (could be deopt blob)
++  __ move(T4, V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // Get the exception
++  __ ld_d(A0, Address(thread, JavaThread::exception_oop_offset()));
++  // Get the exception pc in case we are deoptimized
++  __ ld_d(A1, Address(thread, JavaThread::exception_pc_offset()));
++#ifdef ASSERT
++  __ st_d(R0, Address(thread, JavaThread::exception_handler_pc_offset()));
++  __ st_d(R0, Address(thread, JavaThread::exception_pc_offset()));
++#endif
++  // Clear the exception oop so GC no longer processes it as a root.
++  __ st_d(R0, Address(thread, JavaThread::exception_oop_offset()));
++
++  // Fix seg fault when running:
++  //    Eclipse + Plugin + Debug As
++  //  This is the only condition where C2 calls SharedRuntime::generate_deopt_blob()
++  //
++  __ move(V0, A0);
++  __ move(V1, A1);
++
++  // V0: exception oop
++  // T4: exception handler
++  // A1: exception pc
++  __ jr(T4);
++
++  // make sure all code is generated
++  masm->flush();
++  _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,3621 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/debugInfoRec.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/vframeArray.hpp"
++#include "vmreg_loongarch.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++#if INCLUDE_JVMCI
++#include "jvmci/jvmciJavaClasses.hpp"
++#endif
++
++#include <alloca.h>
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
++
++class RegisterSaver {
++  // Capture info about frame layout
++  enum layout {
++    fpr0_off = 0,
++    fpr1_off,
++    fpr2_off,
++    fpr3_off,
++    fpr4_off,
++    fpr5_off,
++    fpr6_off,
++    fpr7_off,
++    fpr8_off,
++    fpr9_off,
++    fpr10_off,
++    fpr11_off,
++    fpr12_off,
++    fpr13_off,
++    fpr14_off,
++    fpr15_off,
++    fpr16_off,
++    fpr17_off,
++    fpr18_off,
++    fpr19_off,
++    fpr20_off,
++    fpr21_off,
++    fpr22_off,
++    fpr23_off,
++    fpr24_off,
++    fpr25_off,
++    fpr26_off,
++    fpr27_off,
++    fpr28_off,
++    fpr29_off,
++    fpr30_off,
++    fpr31_off,
++    a0_off,
++    a1_off,
++    a2_off,
++    a3_off,
++    a4_off,
++    a5_off,
++    a6_off,
++    a7_off,
++    t0_off,
++    t1_off,
++    t2_off,
++    t3_off,
++    t4_off,
++    t5_off,
++    t6_off,
++    t7_off,
++    t8_off,
++    s0_off,
++    s1_off,
++    s2_off,
++    s3_off,
++    s4_off,
++    s5_off,
++    s6_off,
++    s7_off,
++    s8_off,
++    fp_off,
++    ra_off,
++    fpr_size = fpr31_off - fpr0_off + 1,
++    gpr_size = ra_off - a0_off + 1,
++  };
++
++  const bool _save_vectors;
++  public:
++  RegisterSaver(bool save_vectors) : _save_vectors(save_vectors) {}
++
++  OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
++  void restore_live_registers(MacroAssembler* masm);
++
++  int slots_save() {
++    int slots = gpr_size * VMRegImpl::slots_per_word;
++
++    if (_save_vectors && UseLASX)
++      slots += FloatRegisterImpl::slots_per_lasx_register * fpr_size;
++    else if (_save_vectors && UseLSX)
++      slots += FloatRegisterImpl::slots_per_lsx_register * fpr_size;
++    else
++      slots += FloatRegisterImpl::save_slots_per_register * fpr_size;
++
++    return slots;
++  }
++
++  int gpr_offset(int off) {
++      int slots_per_fpr = FloatRegisterImpl::save_slots_per_register;
++      int slots_per_gpr = VMRegImpl::slots_per_word;
++
++      if (_save_vectors && UseLASX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register;
++      else if (_save_vectors && UseLSX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register;
++
++      return (fpr_size * slots_per_fpr + (off - a0_off) * slots_per_gpr) * VMRegImpl::stack_slot_size;
++  }
++
++  int fpr_offset(int off) {
++      int slots_per_fpr = FloatRegisterImpl::save_slots_per_register;
++
++      if (_save_vectors && UseLASX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register;
++      else if (_save_vectors && UseLSX)
++        slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register;
++
++      return off * slots_per_fpr * VMRegImpl::stack_slot_size;
++  }
++
++  int ra_offset() { return gpr_offset(ra_off); }
++  int t5_offset() { return gpr_offset(t5_off); }
++  int s3_offset() { return gpr_offset(s3_off); }
++  int v0_offset() { return gpr_offset(a0_off); }
++  int v1_offset() { return gpr_offset(a1_off); }
++
++  int fpr0_offset() { return fpr_offset(fpr0_off); }
++  int fpr1_offset() { return fpr_offset(fpr1_off); }
++
++  // During deoptimization only the result register need to be restored
++  // all the other values have already been extracted.
++  void restore_result_registers(MacroAssembler* masm);
++};
++
++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
++
++  // Always make the frame size 16-byte aligned
++  int frame_size_in_bytes = align_up(additional_frame_words * wordSize + slots_save() * VMRegImpl::stack_slot_size, StackAlignmentInBytes);
++  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
++  int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size;
++  // The caller will allocate additional_frame_words
++  int additional_frame_slots = additional_frame_words * wordSize / VMRegImpl::stack_slot_size;
++  // CodeBlob frame size is in words.
++  int frame_size_in_words = frame_size_in_bytes / wordSize;
++
++  *total_frame_words = frame_size_in_words;
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap(frame_size_in_slots, 0);
++
++  // save registers
++  __ addi_d(SP, SP, -slots_save() * VMRegImpl::stack_slot_size);
++
++  for (int i = 0; i < fpr_size; i++) {
++    FloatRegister fpr = as_FloatRegister(i);
++    int off = fpr_offset(i);
++
++    if (_save_vectors && UseLASX)
++      __ xvst(fpr, SP, off);
++    else if (_save_vectors && UseLSX)
++      __ vst(fpr, SP, off);
++    else
++      __ fst_d(fpr, SP, off);
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), fpr->as_VMReg());
++  }
++
++  for (int i = a0_off; i <= a7_off; i++) {
++    Register gpr = as_Register(A0->encoding() + (i - a0_off));
++    int off = gpr_offset(i);
++
++    __ st_d(gpr, SP, gpr_offset(i));
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg());
++  }
++
++  for (int i = t0_off; i <= t6_off; i++) {
++    Register gpr = as_Register(T0->encoding() + (i - t0_off));
++    int off = gpr_offset(i);
++
++    __ st_d(gpr, SP, gpr_offset(i));
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg());
++  }
++  __ st_d(T8, SP, gpr_offset(t8_off));
++  map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(t8_off) / VMRegImpl::stack_slot_size + additional_frame_slots), T8->as_VMReg());
++
++  for (int i = s0_off; i <= s8_off; i++) {
++    Register gpr = as_Register(S0->encoding() + (i - s0_off));
++    int off = gpr_offset(i);
++
++    __ st_d(gpr, SP, gpr_offset(i));
++    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg());
++  }
++
++  __ st_d(FP, SP, gpr_offset(fp_off));
++  map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(fp_off) / VMRegImpl::stack_slot_size + additional_frame_slots), FP->as_VMReg());
++  __ st_d(RA, SP, gpr_offset(ra_off));
++  map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(ra_off) / VMRegImpl::stack_slot_size + additional_frame_slots), RA->as_VMReg());
++
++  __ addi_d(FP, SP, gpr_offset(fp_off));
++
++  return map;
++}
++
++
++// Pop the current frame and restore all the registers that we
++// saved.
++void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
++  for (int i = 0; i < fpr_size; i++) {
++    FloatRegister fpr = as_FloatRegister(i);
++    int off = fpr_offset(i);
++
++    if (_save_vectors && UseLASX)
++      __ xvld(fpr, SP, off);
++    else if (_save_vectors && UseLSX)
++      __ vld(fpr, SP, off);
++    else
++      __ fld_d(fpr, SP, off);
++  }
++
++  for (int i = a0_off; i <= a7_off; i++) {
++    Register gpr = as_Register(A0->encoding() + (i - a0_off));
++    int off = gpr_offset(i);
++
++    __ ld_d(gpr, SP, gpr_offset(i));
++  }
++
++  for (int i = t0_off; i <= t6_off; i++) {
++    Register gpr = as_Register(T0->encoding() + (i - t0_off));
++    int off = gpr_offset(i);
++
++    __ ld_d(gpr, SP, gpr_offset(i));
++  }
++  __ ld_d(T8, SP, gpr_offset(t8_off));
++
++  for (int i = s0_off; i <= s8_off; i++) {
++    Register gpr = as_Register(S0->encoding() + (i - s0_off));
++    int off = gpr_offset(i);
++
++    __ ld_d(gpr, SP, gpr_offset(i));
++  }
++
++  __ ld_d(FP, SP, gpr_offset(fp_off));
++  __ ld_d(RA, SP, gpr_offset(ra_off));
++
++  __ addi_d(SP, SP, slots_save() * VMRegImpl::stack_slot_size);
++}
++
++// Pop the current frame and restore the registers that might be holding
++// a result.
++void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
++  // Just restore result register. Only used by deoptimization. By
++  // now any callee save register that needs to be restore to a c2
++  // caller of the deoptee has been extracted into the vframeArray
++  // and will be stuffed into the c2i adapter we create for later
++  // restoration so only result registers need to be restored here.
++
++  __ ld_d(V0, SP, gpr_offset(a0_off));
++  __ ld_d(V1, SP, gpr_offset(a1_off));
++
++  __ fld_d(F0, SP, fpr_offset(fpr0_off));
++  __ fld_d(F1, SP, fpr_offset(fpr1_off));
++
++  __ addi_d(SP, SP, gpr_offset(ra_off));
++}
++
++// Is vector's size (in bytes) bigger than a size saved by default?
++// 8 bytes registers are saved by default using fld/fst instructions.
++bool SharedRuntime::is_wide_vector(int size) {
++  return size > 8;
++}
++
++size_t SharedRuntime::trampoline_size() {
++  return 32;
++}
++
++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
++  // trampoline is not in CodeCache
++  __ li(T4, (long)destination);
++  __ jr(T4);
++}
++
++// The java_calling_convention describes stack locations as ideal slots on
++// a frame with no abi restrictions. Since we must observe abi restrictions
++// (like the placement of the register window) the slots must be biased by
++// the following value.
++
++static int reg2offset_in(VMReg r) {
++  // Account for saved fp and return address
++  // This should really be in_preserve_stack_slots
++  return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size;  // + 2 * VMRegImpl::stack_slot_size);
++}
++
++static int reg2offset_out(VMReg r) {
++  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++}
++
++// ---------------------------------------------------------------------------
++// Read the array of BasicTypes from a signature, and compute where the
++// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
++// quantities.  Values less than SharedInfo::stack0 are registers, those above
++// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
++// as framesizes are fixed.
++// VMRegImpl::stack0 refers to the first slot 0(sp).
++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
++// up to RegisterImpl::number_of_registers) are the 32-bit
++// integer registers.
++
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++// Note: the INPUTS in sig_bt are in units of Java argument words, which are
++// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
++// units regardless of build.
++
++
++// ---------------------------------------------------------------------------
++// The compiled Java calling convention.
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
++                                           VMRegPair *regs,
++                                           int total_args_passed,
++                                           int is_outgoing) {
++
++  // Create the mapping between argument positions and registers.
++  static const Register INT_ArgReg[Argument::n_register_parameters + 1] = {
++    T0, A0, A1, A2, A3, A4, A5, A6, A7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7
++  };
++
++  uint int_args = 0;
++  uint fp_args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID:
++      // halves of T_LONG or T_DOUBLE
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (int_args < Argument::n_register_parameters + 1) {
++        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++      if (int_args < Argument::n_register_parameters + 1) {
++        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return round_to(stk_args, 2);
++}
++
++// Patch the callers callsite with entry to compiled code if it exists.
++static void patch_callers_callsite(MacroAssembler *masm) {
++  Label L;
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++  __ beq(AT, R0, L);
++  // Schedule the branch target address early.
++  // Call into the VM to patch the caller, then jump to compiled callee
++  // T5 isn't live so capture return address while we easily can
++  __ move(T5, RA);
++
++  __ pushad();
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // VM needs caller's callsite
++  // VM needs target method
++
++  __ move(A0, Rmethod);
++  __ move(A1, T5);
++  // we should preserve the return address
++  __ move(TSR, SP);
++  __ li(AT, -(StackAlignmentInBytes));   // align the stack
++  __ andr(SP, SP, AT);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
++          relocInfo::runtime_call_type);
++
++  __ move(SP, TSR);
++  __ popad();
++  __ bind(L);
++}
++
++static void gen_c2i_adapter(MacroAssembler *masm,
++                            int total_args_passed,
++                            int comp_args_on_stack,
++                            const BasicType *sig_bt,
++                            const VMRegPair *regs,
++                            Label& skip_fixup) {
++
++  // Before we get into the guts of the C2I adapter, see if we should be here
++  // at all.  We've come from compiled code and are attempting to jump to the
++  // interpreter, which means the caller made a static call to get here
++  // (vcalls always get a compiled target if there is one).  Check for a
++  // compiled target.  If there is one, we need to patch the caller's call.
++  // However we will run interpreted if we come thru here. The next pass
++  // thru the call site will run compiled. If we ran compiled here then
++  // we can (theorectically) do endless i2c->c2i->i2c transitions during
++  // deopt/uncommon trap cycles. If we always go interpreted here then
++  // we can have at most one and don't need to play any tricks to keep
++  // from endlessly growing the stack.
++  //
++  // Actually if we detected that we had an i2c->c2i transition here we
++  // ought to be able to reset the world back to the state of the interpreted
++  // call and not bother building another interpreter arg area. We don't
++  // do that at this point.
++
++  patch_callers_callsite(masm);
++  __ bind(skip_fixup);
++
++#ifdef COMPILER2
++  __ empty_FPU_stack();
++#endif
++  //this is for native ?
++  // Since all args are passed on the stack, total_args_passed * interpreter_
++  // stack_element_size  is the
++  // space we need.
++  int extraspace = total_args_passed * Interpreter::stackElementSize;
++
++  // stack is aligned, keep it that way
++  extraspace = round_to(extraspace, 2*wordSize);
++
++  // Get return address
++  __ move(T5, RA);
++  // set senderSP value
++  //refer to interpreter_loongarch.cpp:generate_asm_entry
++  __ move(Rsender, SP);
++  __ addi_d(SP, SP, -extraspace);
++
++  // Now write the args into the outgoing interpreter space
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // st_off points to lowest address on stack.
++    int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
++    // Say 4 args:
++    // i   st_off
++    // 0   12 T_LONG
++    // 1    8 T_VOID
++    // 2    4 T_OBJECT
++    // 3    0 T_BOOL
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // memory to memory use fpu stack top
++      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
++      if (!r_2->is_valid()) {
++        __ ld_ptr(AT, Address(SP, ld_off));
++        __ st_ptr(AT, Address(SP, st_off));
++
++      } else {
++
++
++        int next_off = st_off - Interpreter::stackElementSize;
++        __ ld_ptr(AT, Address(SP, ld_off));
++        __ st_ptr(AT, Address(SP, st_off));
++
++        // Ref to is_Register condition
++        if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ st_ptr(AT, SP, st_off - 8);
++      }
++    } else if (r_1->is_Register()) {
++      Register r = r_1->as_Register();
++      if (!r_2->is_valid()) {
++          __ st_d(r, SP, st_off);
++      } else {
++        //FIXME, LA will not enter here
++        // long/double in gpr
++        __ st_d(r, SP, st_off);
++        // In [java/util/zip/ZipFile.java]
++        //
++        //    private static native long open(String name, int mode, long lastModified);
++        //    private static native int getTotal(long jzfile);
++        //
++        // We need to transfer T_LONG paramenters from a compiled method to a native method.
++        // It's a complex process:
++        //
++        // Caller -> lir_static_call -> gen_resolve_stub
++        //      -> -- resolve_static_call_C
++        //         `- gen_c2i_adapter()  [*]
++        //             |
++        //       `- AdapterHandlerLibrary::get_create_apapter_index
++        //      -> generate_native_entry
++        //      -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
++        //
++        // In [**], T_Long parameter is stored in stack as:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // However, the sequence is reversed here:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
++        //
++        if (sig_bt[i] == T_LONG)
++          __ st_d(r, SP, st_off - 8);
++      }
++    } else if (r_1->is_FloatRegister()) {
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++        __ fst_s(fr, SP, st_off);
++      else {
++        __ fst_d(fr, SP, st_off);
++        __ fst_d(fr, SP, st_off - 8);  // T_DOUBLE needs two slots
++      }
++    }
++  }
++
++  // Schedule the branch target address early.
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) );
++  // And repush original return address
++  __ move(RA, T5);
++  __ jr (AT);
++}
++
++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
++                                    int total_args_passed,
++                                    int comp_args_on_stack,
++                                    const BasicType *sig_bt,
++                                    const VMRegPair *regs) {
++
++  // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
++  // layout.  Lesp was saved by the calling I-frame and will be restored on
++  // return.  Meanwhile, outgoing arg space is all owned by the callee
++  // C-frame, so we can mangle it at will.  After adjusting the frame size,
++  // hoist register arguments and repack other args according to the compiled
++  // code convention.  Finally, end in a jump to the compiled code.  The entry
++  // point address is the start of the buffer.
++
++  // We will only enter here from an interpreted frame and never from after
++  // passing thru a c2i. Azul allowed this but we do not. If we lose the
++  // race and use a c2i we will remain interpreted for the race loser(s).
++  // This removes all sorts of headaches on the LA side and also eliminates
++  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
++
++  __ move(T4, SP);
++
++  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
++  // in registers, we will occasionally have no stack args.
++  int comp_words_on_stack = 0;
++  if (comp_args_on_stack) {
++    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
++    // registers are below.  By subtracting stack0, we either get a negative
++    // number (all values in registers) or the maximum stack slot accessed.
++    // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
++    // Convert 4-byte stack slots to words.
++    // did LA need round? FIXME
++    comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
++    // Round up to miminum stack alignment, in wordSize
++    comp_words_on_stack = round_to(comp_words_on_stack, 2);
++    __ addi_d(SP, SP, -comp_words_on_stack * wordSize);
++  }
++
++  // Align the outgoing SP
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  // push the return address on the stack (note that pushing, rather
++  // than storing it, yields the correct frame alignment for the callee)
++  // Put saved SP in another register
++  const Register saved_sp = T5;
++  __ move(saved_sp, T4);
++
++
++  // Will jump to the compiled code just as if compiled code was doing it.
++  // Pre-load the register-jump target early, to schedule it better.
++  __ ld_d(T4, Rmethod, in_bytes(Method::from_compiled_offset()));
++
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    // check if this call should be routed towards a specific entry point
++    __ ld_d(AT, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
++    Label no_alternative_target;
++    __ beqz(AT, no_alternative_target);
++    __ move(T4, AT);
++    __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
++    __ bind(no_alternative_target);
++  }
++#endif // INCLUDE_JVMCI
++
++  // Now generate the shuffle code.  Pick up all register args and move the
++  // rest through the floating point stack top.
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      // Longs and doubles are passed in native word order, but misaligned
++      // in the 32-bit build.
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // Pick up 0, 1 or 2 words from SP+offset.
++
++    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
++    // Load in argument order going down.
++    int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
++    // Point to interpreter value (vs. tag)
++    int next_off = ld_off - Interpreter::stackElementSize;
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // Convert stack slot to an SP offset (+ wordSize to
++      // account for return address )
++      // NOTICE HERE!!!! I sub a wordSize here
++      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
++      //+ wordSize;
++
++      if (!r_2->is_valid()) {
++        __ ld_d(AT, saved_sp, ld_off);
++        __ st_d(AT, SP, st_off);
++      } else {
++        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
++        // are accessed as negative so LSW is at LOW address
++
++        // ld_off is MSW so get LSW
++        // st_off is LSW (i.e. reg.first())
++
++        // [./org/eclipse/swt/graphics/GC.java]
++        // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
++        //  int destX, int destY, int destWidth, int destHeight,
++        //  boolean simple,
++        //  int imgWidth, int imgHeight,
++        //  long maskPixmap,  <-- Pass T_LONG in stack
++        //  int maskType);
++        // Before this modification, Eclipse displays icons with solid black background.
++        //
++        __ ld_d(AT, saved_sp, ld_off);
++        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ ld_d(AT, saved_sp, ld_off - 8);
++        __ st_d(AT, SP, st_off);
++      }
++    } else if (r_1->is_Register()) {  // Register argument
++      Register r = r_1->as_Register();
++      if (r_2->is_valid()) {
++        // Remember r_1 is low address (and LSB on LA)
++        // So r_2 gets loaded from high address regardless of the platform
++        assert(r_2->as_Register() == r_1->as_Register(), "");
++        __ ld_d(r, saved_sp, ld_off);
++
++        //
++        // For T_LONG type, the real layout is as below:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // We should load the low-8 bytes.
++        //
++        if (sig_bt[i] == T_LONG)
++          __ ld_d(r, saved_sp, ld_off - 8);
++      } else {
++        __ ld_w(r, saved_sp, ld_off);
++      }
++    } else if (r_1->is_FloatRegister()) { // Float Register
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++          __ fld_s(fr, saved_sp, ld_off);
++      else {
++          __ fld_d(fr, saved_sp, ld_off);
++          __ fld_d(fr, saved_sp, ld_off - 8);
++      }
++    }
++  }
++
++  // 6243940 We might end up in handle_wrong_method if
++  // the callee is deoptimized as we race thru here. If that
++  // happens we don't want to take a safepoint because the
++  // caller frame will look interpreted and arguments are now
++  // "compiled" so it is much better to make this transition
++  // invisible to the stack walking code. Unfortunately if
++  // we try and find the callee by normal means a safepoint
++  // is possible. So we stash the desired callee in the thread
++  // and the vm will find there should this case occur.
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ st_d(Rmethod, thread, in_bytes(JavaThread::callee_target_offset()));
++
++  // move methodOop to T5 in case we end up in an c2i adapter.
++  // the c2i adapters expect methodOop in T5 (c2) because c2's
++  // resolve stubs return the result (the method) in T5.
++  // I'd love to fix this.
++  __ move(T5, Rmethod);
++  __ jr(T4);
++}
++
++// ---------------------------------------------------------------
++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
++                                                            int total_args_passed,
++                                                            int comp_args_on_stack,
++                                                            const BasicType *sig_bt,
++                                                            const VMRegPair *regs,
++                                                            AdapterFingerPrint* fingerprint) {
++  address i2c_entry = __ pc();
++
++  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
++
++  // -------------------------------------------------------------------------
++  // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The
++  // args start out packed in the compiled layout.  They need to be unpacked
++  // into the interpreter layout.  This will almost always require some stack
++  // space.  We grow the current (compiled) stack, then repack the args.  We
++  // finally end in a jump to the generic interpreter entry point.  On exit
++  // from the interpreter, the interpreter will restore our SP (lest the
++  // compiled code, which relys solely on SP and not FP, get sick).
++
++  address c2i_unverified_entry = __ pc();
++  Label skip_fixup;
++  {
++    Register holder = T1;
++    Register receiver = T0;
++    Register temp = T8;
++    address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++    Label missed;
++
++    //add for compressedoops
++    __ load_klass(temp, receiver);
++
++    __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
++    __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset());
++    __ bne(AT, temp, missed);
++    // Method might have been compiled since the call site was patched to
++    // interpreted if that is the case treat it as a miss so we can get
++    // the call site corrected.
++    __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++    __ beq(AT, R0, skip_fixup);
++    __ bind(missed);
++
++    __ jmp(ic_miss, relocInfo::runtime_call_type);
++  }
++  address c2i_entry = __ pc();
++
++  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
++
++  __ flush();
++  return  AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
++}
++
++int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
++                                         VMRegPair *regs,
++                                         VMRegPair *regs2,
++                                         int total_args_passed) {
++  assert(regs2 == NULL, "not needed on LA");
++  // Return the number of VMReg stack_slots needed for the args.
++  // This value does not include an abi space (like register window
++  // save area).
++
++  // We return the amount of VMReg stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots. Since we always
++  // have space for storing at least 6 registers to memory we start with that.
++  // See int_stk_helper for a further discussion.
++  // We return the amount of VMRegImpl stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots.
++  static const Register INT_ArgReg[Argument::n_register_parameters] = {
++    A0, A1, A2, A3, A4, A5, A6, A7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7
++  };
++  uint int_args = 0;
++  uint fp_args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++// Example:
++//    n   java.lang.UNIXProcess::forkAndExec
++//     private native int forkAndExec(byte[] prog,
++//                                    byte[] argBlock, int argc,
++//                                    byte[] envBlock, int envc,
++//                                    byte[] dir,
++//                                    boolean redirectErrorStream,
++//                                    FileDescriptor stdin_fd,
++//                                    FileDescriptor stdout_fd,
++//                                    FileDescriptor stderr_fd)
++// JNIEXPORT jint JNICALL
++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
++//                                        jobject process,
++//                                        jbyteArray prog,
++//                                        jbyteArray argBlock, jint argc,
++//                                        jbyteArray envBlock, jint envc,
++//                                        jbyteArray dir,
++//                                        jboolean redirectErrorStream,
++//                                        jobject stdin_fd,
++//                                        jobject stdout_fd,
++//                                        jobject stderr_fd)
++//
++// ::c_calling_convention
++//  0:      // env                 <--       a0
++//  1: L    // klass/obj           <-- t0 => a1
++//  2: [    // prog[]              <-- a0 => a2
++//  3: [    // argBlock[]          <-- a1 => a3
++//  4: I    // argc                <-- a2 => a4
++//  5: [    // envBlock[]          <-- a3 => a5
++//  6: I    // envc                <-- a4 => a5
++//  7: [    // dir[]               <-- a5 => a7
++//  8: Z    // redirectErrorStream <-- a6 => sp[0]
++//  9: L    // stdin               <-- a7 => sp[8]
++// 10: L    // stdout              fp[16] => sp[16]
++// 11: L    // stderr              fp[24] => sp[24]
++//
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID: // Halves of longs and doubles
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (int_args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++    case T_METADATA:
++      if (int_args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
++      } else if (int_args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (fp_args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
++      } else if (int_args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return round_to(stk_args, 2);
++}
++
++// ---------------------------------------------------------------------------
++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ fst_s(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ fst_d(FSF, FP, -wordSize );
++      break;
++    case T_VOID:  break;
++    case T_LONG:
++      __ st_d(V0, FP, -wordSize);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ st_d(V0, FP, -wordSize);
++      break;
++    default: {
++      __ st_w(V0, FP, -wordSize);
++      }
++  }
++}
++
++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ fld_s(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ fld_d(FSF, FP, -wordSize );
++      break;
++    case T_LONG:
++      __ ld_d(V0, FP, -wordSize);
++      break;
++    case T_VOID:  break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ ld_d(V0, FP, -wordSize);
++      break;
++    default: {
++      __ ld_w(V0, FP, -wordSize);
++      }
++  }
++}
++
++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      __ push(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ push(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
++    if (args[i].first()->is_Register()) {
++      __ pop(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ pop(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++// A simple move of integer like type
++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ ld_w(AT, FP, reg2offset_in(src.first()));
++      __ st_d(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      // stack to reg
++      __ ld_w(dst.first()->as_Register(),  FP, reg2offset_in(src.first()));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
++  } else {
++    if (dst.first() != src.first()){
++      __ move(dst.first()->as_Register(), src.first()->as_Register());
++    }
++  }
++}
++
++// An oop arg. Must pass a handle not the oop itself
++static void object_move(MacroAssembler* masm,
++                        OopMap* map,
++                        int oop_handle_offset,
++                        int framesize_in_slots,
++                        VMRegPair src,
++                        VMRegPair dst,
++                        bool is_receiver,
++                        int* receiver_offset) {
++
++  // must pass a handle. First figure out the location we use as a handle
++
++  if (src.first()->is_stack()) {
++    // Oop is already on the stack as an argument
++    Register rHandle = T5;
++    Label nil;
++    __ xorr(rHandle, rHandle, rHandle);
++    __ ld_d(AT, FP, reg2offset_in(src.first()));
++    __ beq(AT, R0, nil);
++    __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
++    __ bind(nil);
++    if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move( (dst.first())->as_Register(), rHandle);
++
++    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
++    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
++    if (is_receiver) {
++      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
++    }
++  } else {
++    // Oop is in an a register we must store it to the space we reserve
++    // on the stack for oop_handles
++    const Register rOop = src.first()->as_Register();
++    assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
++    const Register rHandle = T5;
++    //Important: refer to java_calling_convertion
++    int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
++    int offset = oop_slot*VMRegImpl::stack_slot_size;
++    Label skip;
++    __ st_d( rOop , SP, offset );
++    map->set_oop(VMRegImpl::stack2reg(oop_slot));
++    __ xorr( rHandle, rHandle, rHandle);
++    __ beq(rOop, R0, skip);
++    __ lea(rHandle, Address(SP, offset));
++    __ bind(skip);
++    // Store the handle parameter
++    if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move((dst.first())->as_Register(), rHandle);
++
++    if (is_receiver) {
++      *receiver_offset = offset;
++    }
++  }
++}
++
++// A float arg may have to do float reg int reg conversion
++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
++  if (src.first()->is_stack()) {
++    // stack to stack/reg
++    if (dst.first()->is_stack()) {
++      __ ld_w(AT, FP, reg2offset_in(src.first()));
++      __ st_w(AT, SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fld_s(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
++    } else {
++      __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
++    }
++  } else {
++    // reg to stack/reg
++    if(dst.first()->is_stack()) {
++      __ fst_s(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fmov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      __ movfr2gr_s(dst.first()->as_Register(), src.first()->as_FloatRegister());
++    }
++  }
++}
++
++// A long move
++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibility for a long_move VMRegPair is:
++  // 1: two stack slots (possibly unaligned)
++  // as neither the java  or C calling convention will use registers
++  // for longs.
++  if (src.first()->is_stack()) {
++    assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
++    if( dst.first()->is_stack()){
++      __ ld_d(AT, FP, reg2offset_in(src.first()));
++      __ st_d(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
++    }
++  } else {
++    if( dst.first()->is_stack()){
++      __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
++    } else {
++      __ move(dst.first()->as_Register(), src.first()->as_Register());
++    }
++  }
++}
++
++// A double move
++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibilities for a double_move VMRegPair are:
++  // The painful thing here is that like long_move a VMRegPair might be
++
++  // Because of the calling convention we know that src is either
++  //   1: a single physical register (xmm registers only)
++  //   2: two stack slots (possibly unaligned)
++  // dst can only be a pair of stack slots.
++
++  if (src.first()->is_stack()) {
++    // source is all stack
++    if( dst.first()->is_stack()){
++      __ ld_d(AT, FP, reg2offset_in(src.first()));
++      __ st_d(AT, SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fld_d(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
++    } else {
++      __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
++    }
++  } else {
++    // reg to stack/reg
++    // No worries about stack alignment
++    if( dst.first()->is_stack()){
++      __ fst_d(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    } else if (dst.first()->is_FloatRegister()) {
++      __ fmov_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      __ movfr2gr_d(dst.first()->as_Register(), src.first()->as_FloatRegister());
++    }
++  }
++}
++
++static void verify_oop_args(MacroAssembler* masm,
++                            methodHandle method,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {
++  Register temp_reg = T4;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < method->size_of_parameters(); i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          __ ld_d(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
++  }
++}
++
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 methodHandle method,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {
++  verify_oop_args(masm, method, sig_bt, regs);
++  vmIntrinsics::ID iid = method->intrinsic_id();
++
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
++  if (ref_kind != 0) {
++    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
++    member_reg = S3;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (iid == vmIntrinsics::_invokeBasic) {
++    has_receiver = true;
++  } else {
++    fatal("unexpected intrinsic id %d", iid);
++  }
++
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
++    VMReg r = regs[member_arg_pos].first();
++    if (r->is_stack()) {
++      __ ld_d(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
++
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(method->size_of_parameters() > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      fatal("receiver always in a register");
++      receiver_reg = SSR;  // known to be free at this point
++      __ ld_d(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
++
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, iid,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
++
++// ---------------------------------------------------------------------------
++// Generate a native wrapper for a given method.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// convention (handlizes oops, etc), transitions to native, makes the call,
++// returns to java state (possibly blocking), unhandlizes any result and
++// returns.
++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
++                                                const methodHandle& method,
++                                                int compile_id,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
++                                                BasicType ret_type,
++                                                address critical_entry) {
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
++    gen_special_dispatch(masm,
++                         method,
++                         in_sig_bt,
++                         in_regs);
++    assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord,
++           "valid size for make_non_entrant");
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
++
++  bool is_critical_native = true;
++  address native_func = critical_entry;
++  if (native_func == NULL) {
++    native_func = method->native_function();
++    is_critical_native = false;
++  }
++  assert(native_func != NULL, "must have function");
++
++  // Native nmethod wrappers never take possesion of the oop arguments.
++  // So the caller will gc the arguments. The only thing we need an
++  // oopMap for is if the call is static
++  //
++  // An OopMap for lock (and class if static), and one for the VM call itself
++  OopMapSet *oop_maps = new OopMapSet();
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the jni function will expect them. To figure out where they go
++  // we convert the java signature to a C signature by inserting
++  // the hidden arguments as arg[0] and possibly arg[1] (static method)
++
++  const int total_in_args = method->size_of_parameters();
++  int total_c_args = total_in_args;
++  if (!is_critical_native) {
++    total_c_args += 1;
++    if (method->is_static()) {
++      total_c_args++;
++    }
++  } else {
++    for (int i = 0; i < total_in_args; i++) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        total_c_args++;
++      }
++    }
++  }
++
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
++  BasicType* in_elem_bt = NULL;
++
++  int argc = 0;
++  if (!is_critical_native) {
++    out_sig_bt[argc++] = T_ADDRESS;
++    if (method->is_static()) {
++      out_sig_bt[argc++] = T_OBJECT;
++    }
++
++    for (int i = 0; i < total_in_args ; i++ ) {
++      out_sig_bt[argc++] = in_sig_bt[i];
++    }
++  } else {
++    Thread* THREAD = Thread::current();
++    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
++    SignatureStream ss(method->signature());
++    for (int i = 0; i < total_in_args ; i++ ) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        // Arrays are passed as int, elem* pair
++        out_sig_bt[argc++] = T_INT;
++        out_sig_bt[argc++] = T_ADDRESS;
++        Symbol* atype = ss.as_symbol(CHECK_NULL);
++        const char* at = atype->as_C_string();
++        if (strlen(at) == 2) {
++          assert(at[0] == '[', "must be");
++          switch (at[1]) {
++            case 'B': in_elem_bt[i]  = T_BYTE; break;
++            case 'C': in_elem_bt[i]  = T_CHAR; break;
++            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
++            case 'F': in_elem_bt[i]  = T_FLOAT; break;
++            case 'I': in_elem_bt[i]  = T_INT; break;
++            case 'J': in_elem_bt[i]  = T_LONG; break;
++            case 'S': in_elem_bt[i]  = T_SHORT; break;
++            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
++            default: ShouldNotReachHere();
++          }
++        }
++      } else {
++        out_sig_bt[argc++] = in_sig_bt[i];
++        in_elem_bt[i] = T_VOID;
++      }
++      if (in_sig_bt[i] != T_VOID) {
++        assert(in_sig_bt[i] == ss.type(), "must match");
++        ss.next();
++      }
++    }
++  }
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++  //
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Compute framesize for the wrapper.  We need to handlize all oops in
++  // registers. We must create space for them here that is disjoint from
++  // the windowed save area because we have no control over when we might
++  // flush the window again and overwrite values that gc has since modified.
++  // (The live window race)
++  //
++  // We always just allocate 6 word for storing down these object. This allow
++  // us to simply record the base and use the Ireg number to decide which
++  // slot to use. (Note that the reg number is the inbound number not the
++  // outbound number).
++  // We must shuffle args to match the native convention, and include var-args space.
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Now the space for the inbound oop handle area
++  int total_save_slots = 9 * VMRegImpl::slots_per_word;  // 9 arguments passed in registers
++  if (is_critical_native) {
++    // Critical natives may have to call out so they need a save area
++    // for register arguments.
++    int double_slots = 0;
++    int single_slots = 0;
++    for ( int i = 0; i < total_in_args; i++) {
++      if (in_regs[i].first()->is_Register()) {
++        const Register reg = in_regs[i].first()->as_Register();
++        switch (in_sig_bt[i]) {
++          case T_BOOLEAN:
++          case T_BYTE:
++          case T_SHORT:
++          case T_CHAR:
++          case T_INT:  single_slots++; break;
++          case T_ARRAY:
++          case T_LONG: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      } else if (in_regs[i].first()->is_FloatRegister()) {
++        switch (in_sig_bt[i]) {
++          case T_FLOAT:  single_slots++; break;
++          case T_DOUBLE: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      }
++    }
++    total_save_slots = double_slots * 2 + single_slots;
++    // align the save area
++    if (double_slots != 0) {
++      stack_slots = round_to(stack_slots, 2);
++    }
++  }
++
++  int oop_handle_offset = stack_slots;
++  stack_slots += total_save_slots;
++
++  // Now any space we need for handlizing a klass if static method
++
++  int klass_slot_offset = 0;
++  int klass_offset = -1;
++  int lock_slot_offset = 0;
++  bool is_static = false;
++
++  if (method->is_static()) {
++    klass_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
++    is_static = true;
++  }
++
++  // Plus a lock if needed
++
++  if (method->is_synchronized()) {
++    lock_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++  }
++
++  // Now a place to save return value or as a temporary for any gpr -> fpr moves
++  // + 2 for return address (which we own) and saved fp
++  stack_slots += 2 + 9 * VMRegImpl::slots_per_word;  // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | 2 slots for moves   |
++  //      |---------------------|
++  //      | lock box (if sync)  |
++  //      |---------------------| <- lock_slot_offset
++  //      | klass (if static)   |
++  //      |---------------------| <- klass_slot_offset
++  //      | oopHandle area      |
++  //      |---------------------| <- oop_handle_offset
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      | vararg area         |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++
++  intptr_t start = (intptr_t)__ pc();
++
++
++
++  // First thing make an ic check to see if we should even be here
++  address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++  // We are free to use all registers as temps without saving them and
++  // restoring them except fp. fp is the only callee save register
++  // as far as the interpreter and the compiler(s) are concerned.
++
++  //refer to register_loongarch.hpp:IC_Klass
++  const Register ic_reg = T1;
++  const Register receiver = T0;
++
++  Label hit;
++  Label exception_pending;
++
++  __ verify_oop(receiver);
++  //add for compressedoops
++  __ load_klass(T4, receiver);
++  __ beq(T4, ic_reg, hit);
++  __ jmp(ic_miss, relocInfo::runtime_call_type);
++  __ bind(hit);
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++#ifdef COMPILER1
++  if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
++    // Object.hashCode can pull the hashCode from the header word
++    // instead of doing a full VM transition once it's been computed.
++    // Since hashCode is usually polymorphic at call sites we can't do
++    // this optimization at the call site without a lot of work.
++    Label slowCase;
++    Register receiver = T0;
++    Register result = V0;
++    __ ld_d ( result, receiver, oopDesc::mark_offset_in_bytes());
++    // check if locked
++    __ andi(AT, result, markOopDesc::unlocked_value);
++    __ beq(AT, R0, slowCase);
++    if (UseBiasedLocking) {
++      // Check if biased and fall through to runtime if so
++      __ andi (AT, result, markOopDesc::biased_lock_bit_in_place);
++      __ bne(AT, R0, slowCase);
++    }
++    // get hash
++    __ li(AT, markOopDesc::hash_mask_in_place);
++    __ andr (AT, result, AT);
++    // test if hashCode exists
++    __ beq (AT, R0, slowCase);
++    __ shr(result, markOopDesc::hash_shift);
++    __ jr(RA);
++    __ bind (slowCase);
++  }
++#endif // COMPILER1
++
++  // Generate stack overflow check
++  if (UseStackBanging) {
++    __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size());
++  }
++
++  // The instruction at the verified entry point must be 4 bytes or longer
++  // because it can be patched on the fly by make_non_entrant.
++  if (((intptr_t)__ pc() - start - vep_offset) < 1 * BytesPerInstWord) {
++    __ nop();
++  }
++
++  // Generate a new frame for the wrapper.
++  // do LA need this ?
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++
++  __ enter();
++  // -2 because return address is already present and so is saved fp
++  __ addi_d(SP, SP, -1 * (stack_size - 2*wordSize));
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++  // Calculate the difference between sp and fp. We need to know it
++  // after the native call because on windows Java Natives will pop
++  // the arguments and it is painful to do sp relative addressing
++  // in a platform independent way. So after the call we switch to
++  // fp relative addressing.
++  //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change
++  //the SP
++  int fp_adjustment = stack_size - 2*wordSize;
++
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // Compute the fp offset for any slots used after the jni call
++
++  int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
++  // We use TREG as a thread pointer because it is callee save and
++  // if we load it once it is usable thru the entire wrapper
++  const Register thread = TREG;
++
++  // We use S4 as the oop handle for the receiver/klass
++  // It is callee save so it survives the call to native
++
++  const Register oop_handle_reg = S4;
++  if (is_critical_native) {
++    Unimplemented();
++    // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
++    //                                   oop_handle_offset, oop_maps, in_regs, in_sig_bt);
++  }
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  //
++  // We immediately shuffle the arguments so that any vm call we have to
++  // make from here on out (sync slow path, jvmpi, etc.) we will have
++  // captured the oops from our caller and have a valid oopMap for
++  // them.
++
++  // -----------------
++  // The Grand Shuffle
++  //
++  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
++  // and, if static, the class mirror instead of a receiver.  This pretty much
++  // guarantees that register layout will not match (and LA doesn't use reg
++  // parms though amd does).  Since the native abi doesn't use register args
++  // and the java conventions does we don't have to worry about collisions.
++  // All of our moved are reg->stack or stack->stack.
++  // We ignore the extra arguments during the shuffle and handle them at the
++  // last moment. The shuffle is described by the two calling convention
++  // vectors we have in our possession. We simply walk the java vector to
++  // get the source locations and the c vector to get the destinations.
++
++  int c_arg = method->is_static() ? 2 : 1 ;
++
++  // Record sp-based slot for receiver on stack for non-static methods
++  int receiver_offset = -1;
++
++  // This is a trick. We double the stack slots so we can claim
++  // the oops in the caller's frame. Since we are sure to have
++  // more args than the caller doubling is enough to make
++  // sure we can capture all the incoming oop args from the
++  // caller.
++  //
++  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++
++  // Mark location of fp (someday)
++  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp));
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  // This may iterate in two different directions depending on the
++  // kind of native it is.  The reason is that for regular JNI natives
++  // the incoming and outgoing registers are offset upwards and for
++  // critical natives they are offset down.
++  GrowableArray<int> arg_order(2 * total_in_args);
++  VMRegPair tmp_vmreg;
++  tmp_vmreg.set2(T8->as_VMReg());
++
++  if (!is_critical_native) {
++    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
++      arg_order.push(i);
++      arg_order.push(c_arg);
++    }
++  } else {
++    // Compute a valid move order, using tmp_vmreg to break any cycles
++    Unimplemented();
++    // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
++  }
++
++  int temploc = -1;
++  for (int ai = 0; ai < arg_order.length(); ai += 2) {
++    int i = arg_order.at(ai);
++    int c_arg = arg_order.at(ai + 1);
++    __ block_comment(err_msg("move %d -> %d", i, c_arg));
++    if (c_arg == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // This arg needs to be moved to a temporary
++      __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
++      in_regs[i] = tmp_vmreg;
++      temploc = i;
++      continue;
++    } else if (i == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // Read from the temporary location
++      assert(temploc != -1, "must be valid");
++      i = temploc;
++      temploc = -1;
++    }
++#ifdef ASSERT
++    if (in_regs[i].first()->is_Register()) {
++      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
++    } else if (in_regs[i].first()->is_FloatRegister()) {
++      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
++    }
++    if (out_regs[c_arg].first()->is_Register()) {
++      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++    }
++#endif /* ASSERT */
++    switch (in_sig_bt[i]) {
++      case T_ARRAY:
++        if (is_critical_native) {
++          Unimplemented();
++          // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
++          c_arg++;
++#ifdef ASSERT
++          if (out_regs[c_arg].first()->is_Register()) {
++            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++          }
++#endif
++          break;
++        }
++      case T_OBJECT:
++        assert(!is_critical_native, "no oop arguments");
++        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
++                    ((i == 0) && (!is_static)),
++                    &receiver_offset);
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        float_move(masm, in_regs[i], out_regs[c_arg]);
++          break;
++
++      case T_DOUBLE:
++        assert( i + 1 < total_in_args &&
++                in_sig_bt[i + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        double_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_LONG :
++        long_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        simple_move32(masm, in_regs[i], out_regs[c_arg]);
++    }
++  }
++
++  // point c_arg at the first arg that is already loaded in case we
++  // need to spill before we call out
++  c_arg = total_c_args - total_in_args;
++  // Pre-load a static method's oop.  Used both by locking code and
++  // the normal JNI call code.
++
++  __ move(oop_handle_reg, A1);
++
++  if (method->is_static() && !is_critical_native) {
++
++    //  load opp into a register
++    int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
++          (method->method_holder())->java_mirror()));
++
++
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    __ relocate(rspec);
++    __ patchable_li52(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
++    // Now handlize the static class mirror it's known not-null.
++    __ st_d( oop_handle_reg, SP, klass_offset);
++    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
++
++    // Now get the handle
++    __ lea(oop_handle_reg, Address(SP, klass_offset));
++    // store the klass handle as second argument
++    __ move(A1, oop_handle_reg);
++    // and protect the arg if we must spill
++    c_arg--;
++  }
++
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
++  // points into the right code segment. It does not have to be the correct return pc.
++  // We use the same pc/oopMap repeatedly when we call out
++
++  Label native_return;
++  __ set_last_Java_frame(SP, noreg, native_return);
++
++  // We have all of the arguments setup at this point. We must not touch any register
++  // argument registers at this point (what if we save/restore them there are no oop?
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    save_args(masm, total_c_args, c_arg, out_regs);
++    int metadata_index = __ oop_recorder()->find_index(method());
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_li52(AT, (long)(method()));
++
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++      thread, AT);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++  }
++
++  // These are register definitions we need for locking/unlocking
++  const Register swap_reg = T8;  // Must use T8 for cmpxchg instruction
++  const Register obj_reg  = T4;  // Will contain the oop
++  //const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
++  const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
++
++
++
++  Label slow_path_lock;
++  Label lock_done;
++
++  // Lock a synchronized method
++  if (method->is_synchronized()) {
++    assert(!is_critical_native, "unhandled");
++
++    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
++
++    // Get the handle (the 2nd argument)
++    __ move(oop_handle_reg, A1);
++
++    // Get address of the box
++    __ lea(lock_reg, Address(FP, lock_slot_fp_offset));
++
++    // Load the oop from the handle
++    __ ld_d(obj_reg, oop_handle_reg, 0);
++
++    if (UseBiasedLocking) {
++      // Note that oop_handle_reg is trashed during this call
++      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock);
++    }
++
++    // Load immediate 1 into swap_reg %T8
++    __ li(swap_reg, 1);
++
++    __ ld_d(AT, obj_reg, 0);
++    __ orr(swap_reg, swap_reg, AT);
++
++    __ st_d(swap_reg, lock_reg, mark_word_offset);
++    __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done);
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) sp <= mark < mark + os::pagesize()
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg
++
++    __ sub_d(swap_reg, swap_reg, SP);
++    __ li(AT, 3 - os::vm_page_size());
++    __ andr(swap_reg , swap_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    __ st_d(swap_reg, lock_reg, mark_word_offset);
++    __ bne(swap_reg, R0, slow_path_lock);
++    // Slow path will re-enter here
++    __ bind(lock_done);
++
++    if (UseBiasedLocking) {
++      // Re-fetch oop_handle_reg as we trashed it above
++      __ move(A1, oop_handle_reg);
++    }
++  }
++
++
++  // Finally just about ready to make the JNI call
++
++
++  // get JNIEnv* which is first argument to native
++  if (!is_critical_native) {
++    __ addi_d(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
++  }
++
++  // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
++  // Load the second arguments into A1
++  //__ ld(A1, SP , wordSize );   // klass
++
++  // Now set thread in native
++  __ addi_d(AT, R0, _thread_in_native);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++  // do the call
++  __ call(native_func, relocInfo::runtime_call_type);
++  __ bind(native_return);
++
++  oop_maps->add_gc_map(((intptr_t)__ pc()) - start, map);
++
++  // WARNING - on Windows Java Natives use pascal calling convention and pop the
++  // arguments off of the stack. We could just re-adjust the stack pointer here
++  // and continue to do SP relative addressing but we instead switch to FP
++  // relative addressing.
++
++  // Unpack native results.
++  switch (ret_type) {
++  case T_BOOLEAN: __ c2bool(V0);                break;
++  case T_CHAR   : __ bstrpick_d(V0, V0, 15, 0); break;
++  case T_BYTE   : __ sign_extend_byte (V0);     break;
++  case T_SHORT  : __ sign_extend_short(V0);     break;
++  case T_INT    : // nothing to do         break;
++  case T_DOUBLE :
++  case T_FLOAT  :
++  // Result is in st0 we'll save as needed
++  break;
++  case T_ARRAY:                 // Really a handle
++  case T_OBJECT:                // Really a handle
++  break; // can't de-handlize until after safepoint check
++  case T_VOID: break;
++  case T_LONG: break;
++  default       : ShouldNotReachHere();
++  }
++  // Switch thread to "native transition" state before reading the synchronization state.
++  // This additional state is necessary because reading and testing the synchronization
++  // state is not atomic w.r.t. GC, as this scenario demonstrates:
++  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
++  //     VM thread changes sync state to synchronizing and suspends threads for GC.
++  //     Thread A is resumed to finish this native method, but doesn't block here since it
++  //     didn't see any synchronization is progress, and escapes.
++  __ addi_d(AT, R0, _thread_in_native_trans);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) {
++    if (UseMembar) {
++      // Force this write out before the read below
++      __ membar(__ AnyAny);
++    } else {
++      // Write serialization page so VM thread can do a pseudo remote membar.
++      // We use the current thread pointer to calculate a thread specific
++      // offset to write to within the page. This minimizes bus traffic
++      // due to cache line collision.
++      __ serialize_memory(thread, T5);
++    }
++  }
++
++  Label after_transition;
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    Label Continue;
++    Label slow_path;
++
++    __ safepoint_poll_acquire(slow_path, thread);
++    __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ bind(slow_path);
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    //
++    save_native_result(masm, ret_type, stack_slots);
++    __ move(A0, thread);
++    __ addi_d(SP, SP, -wordSize);
++    __ push(S2);
++    __ li(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++    if (!is_critical_native) {
++      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
++    } else {
++      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
++    }
++    __ move(SP, S2);     // use S2 as a sender SP holder
++    __ pop(S2);
++    __ addi_d(SP, SP, wordSize);
++    // Restore any method result value
++    restore_native_result(masm, ret_type, stack_slots);
++
++    if (is_critical_native) {
++      // The call above performed the transition to thread_in_Java so
++      // skip the transition logic below.
++      __ beq(R0, R0, after_transition);
++    }
++
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ addi_d(AT, R0, _thread_in_Java);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(AT,  thread, in_bytes(JavaThread::thread_state_offset()));
++  __ bind(after_transition);
++  Label reguard;
++  Label reguard_done;
++  __ ld_w(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++  __ addi_d(AT, AT, -JavaThread::stack_guard_yellow_reserved_disabled);
++  __ beq(AT, R0, reguard);
++  // slow path reguard  re-enters here
++  __ bind(reguard_done);
++
++  // Handle possible exception (will unlock if necessary)
++
++  // native result if any is live
++
++  // Unlock
++  Label slow_path_unlock;
++  Label unlock_done;
++  if (method->is_synchronized()) {
++
++    Label done;
++
++    // Get locked oop from the handle we passed to jni
++    __ ld_d( obj_reg, oop_handle_reg, 0);
++    if (UseBiasedLocking) {
++      __ biased_locking_exit(obj_reg, T8, done);
++
++    }
++
++    // Simple recursive lock?
++
++    __ ld_d(AT, FP, lock_slot_fp_offset);
++    __ beq(AT, R0, done);
++    // Must save FSF if if it is live now because cmpxchg must use it
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++
++    //  get old displaced header
++    __ ld_d (T8, FP, lock_slot_fp_offset);
++    // get address of the stack lock
++    __ addi_d (c_rarg0, FP, lock_slot_fp_offset);
++    // Atomic swap old header if oop still contains the stack lock
++    __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock);
++
++    // slow path re-enters here
++    __ bind(unlock_done);
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++
++    __ bind(done);
++
++  }
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    // Tell dtrace about this method exit
++    save_native_result(masm, ret_type, stack_slots);
++    int metadata_index = __ oop_recorder()->find_index( (method()));
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_li52(AT, (long)(method()));
++
++    __ call_VM_leaf(
++         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++         thread, AT);
++    restore_native_result(masm, ret_type, stack_slots);
++  }
++
++  // We can finally stop using that last_Java_frame we setup ages ago
++
++  __ reset_last_Java_frame(false);
++
++  // Unpack oop result, e.g. JNIHandles::resolve value.
++  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
++    __ resolve_jobject(V0, thread, T4);
++  }
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ st_d(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  if (!is_critical_native) {
++    // reset handle block
++    __ ld_d(AT, thread, in_bytes(JavaThread::active_handles_offset()));
++    __ st_w(R0, AT, JNIHandleBlock::top_offset_in_bytes());
++  }
++
++  if (!is_critical_native) {
++    // Any exception pending?
++    __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, exception_pending);
++  }
++  // no exception, we're almost done
++
++  // check that only result value is on FPU stack
++  __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
++
++  // Return
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ leave();
++
++  __ jr(RA);
++  // Unexpected paths are out of line and go here
++  // Slow path locking & unlocking
++  if (method->is_synchronized()) {
++
++    // BEGIN Slow path lock
++    __ bind(slow_path_lock);
++
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++
++    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
++    // args are (oop obj, BasicLock* lock, JavaThread* thread)
++
++    __ move(A0, obj_reg);
++    __ move(A1, lock_reg);
++    __ move(A2, thread);
++    __ addi_d(SP, SP, - 3*wordSize);
++
++    __ li(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
++    __ move(SP, S2);
++    __ addi_d(SP, SP, 3*wordSize);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++
++#ifdef ASSERT
++    { Label L;
++      __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ stop("no pending exception allowed on exit from monitorenter");
++      __ bind(L);
++    }
++#endif
++    __ b(lock_done);
++    // END Slow path lock
++
++    // BEGIN Slow path unlock
++    __ bind(slow_path_unlock);
++
++    // Slow path unlock
++
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
++
++    __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ push(AT);
++    __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++    __ li(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++
++    // should be a peal
++    // +wordSize because of the push above
++    __ addi_d(A1, FP, lock_slot_fp_offset);
++
++    __ move(A0, obj_reg);
++    __ move(A2, thread);
++    __ addi_d(SP, SP, -2*wordSize);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
++        relocInfo::runtime_call_type);
++    __ addi_d(SP, SP, 2*wordSize);
++    __ move(SP, S2);
++#ifdef ASSERT
++    {
++      Label L;
++      __ ld_d( AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
++      __ bind(L);
++    }
++#endif /* ASSERT */
++
++    __ pop(AT);
++    __ st_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++    __ b(unlock_done);
++    // END Slow path unlock
++
++  }
++
++  // SLOW PATH Reguard the stack if needed
++
++  __ bind(reguard);
++  save_native_result(masm, ret_type, stack_slots);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
++      relocInfo::runtime_call_type);
++  restore_native_result(masm, ret_type, stack_slots);
++  __ b(reguard_done);
++
++  // BEGIN EXCEPTION PROCESSING
++  if (!is_critical_native) {
++    // Forward  the exception
++    __ bind(exception_pending);
++
++    // remove possible return value from FPU register stack
++    __ empty_FPU_stack();
++
++    // pop our frame
++    //forward_exception_entry need return address on stack
++    __ move(SP, FP);
++    __ pop(FP);
++
++    // and forward the exception
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++  }
++  __ flush();
++
++  nmethod *nm = nmethod::new_native_nmethod(method,
++                                            compile_id,
++                                            masm->code(),
++                                            vep_offset,
++                                            frame_complete,
++                                            stack_slots / VMRegImpl::slots_per_word,
++                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
++                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
++                                            oop_maps);
++
++  if (is_critical_native) {
++    nm->set_lazy_critical_native(true);
++  }
++  return nm;
++}
++
++#ifdef HAVE_DTRACE_H
++// ---------------------------------------------------------------------------
++// Generate a dtrace nmethod for a given signature.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// abi and then leaves nops at the position you would expect to call a native
++// function. When the probe is enabled the nops are replaced with a trap
++// instruction that dtrace inserts and the trace will cause a notification
++// to dtrace.
++//
++// The probes are only able to take primitive types and java/lang/String as
++// arguments.  No other java types are allowed. Strings are converted to utf8
++// strings so that from dtrace point of view java strings are converted to C
++// strings. There is an arbitrary fixed limit on the total space that a method
++// can use for converting the strings. (256 chars per string in the signature).
++// So any java string larger then this is truncated.
++
++static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
++static bool offsets_initialized = false;
++
++static VMRegPair reg64_to_VMRegPair(Register r) {
++  VMRegPair ret;
++  if (wordSize == 8) {
++    ret.set2(r->as_VMReg());
++  } else {
++    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
++  }
++  return ret;
++}
++
++
++nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
++                                                methodHandle method) {
++
++
++  // generate_dtrace_nmethod is guarded by a mutex so we are sure to
++  // be single threaded in this method.
++  assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
++
++  // Fill in the signature array, for the calling-convention call.
++  int total_args_passed = method->size_of_parameters();
++
++  BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
++  VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
++
++  // The signature we are going to use for the trap that dtrace will see
++  // java/lang/String is converted. We drop "this" and any other object
++  // is converted to NULL.  (A one-slot java/lang/Long object reference
++  // is converted to a two-slot long, which is why we double the allocation).
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
++
++  int i=0;
++  int total_strings = 0;
++  int first_arg_to_pass = 0;
++  int total_c_args = 0;
++
++  // Skip the receiver as dtrace doesn't want to see it
++  if( !method->is_static() ) {
++    in_sig_bt[i++] = T_OBJECT;
++    first_arg_to_pass = 1;
++  }
++
++  SignatureStream ss(method->signature());
++  for ( ; !ss.at_return_type(); ss.next()) {
++    BasicType bt = ss.type();
++    in_sig_bt[i++] = bt;  // Collect remaining bits of signature
++    out_sig_bt[total_c_args++] = bt;
++    if( bt == T_OBJECT) {
++      symbolOop s = ss.as_symbol_or_null();
++      if (s == vmSymbols::java_lang_String()) {
++        total_strings++;
++        out_sig_bt[total_c_args-1] = T_ADDRESS;
++      } else if (s == vmSymbols::java_lang_Boolean() ||
++                 s == vmSymbols::java_lang_Byte()) {
++        out_sig_bt[total_c_args-1] = T_BYTE;
++      } else if (s == vmSymbols::java_lang_Character() ||
++                 s == vmSymbols::java_lang_Short()) {
++        out_sig_bt[total_c_args-1] = T_SHORT;
++      } else if (s == vmSymbols::java_lang_Integer() ||
++                 s == vmSymbols::java_lang_Float()) {
++        out_sig_bt[total_c_args-1] = T_INT;
++      } else if (s == vmSymbols::java_lang_Long() ||
++                 s == vmSymbols::java_lang_Double()) {
++        out_sig_bt[total_c_args-1] = T_LONG;
++        out_sig_bt[total_c_args++] = T_VOID;
++      }
++    } else if ( bt == T_LONG || bt == T_DOUBLE ) {
++      in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
++      // We convert double to long
++      out_sig_bt[total_c_args-1] = T_LONG;
++      out_sig_bt[total_c_args++] = T_VOID;
++    } else if ( bt == T_FLOAT) {
++      // We convert float to int
++      out_sig_bt[total_c_args-1] = T_INT;
++    }
++  }
++
++  assert(i==total_args_passed, "validly parsed signature");
++
++  // Now get the compiled-Java layout as input arguments
++  int comp_args_on_stack;
++  comp_args_on_stack = SharedRuntime::java_calling_convention(
++      in_sig_bt, in_regs, total_args_passed, false);
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the a  native (non-jni) function would expect them. To figure out
++  // where they go we convert the java signature to a C signature and remove
++  // T_VOID for any long/double we might have received.
++
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Plus a temp for possible converion of float/double/long register args
++
++  int conversion_temp = stack_slots;
++  stack_slots += 2;
++
++
++  // Now space for the string(s) we must convert
++
++  int string_locs = stack_slots;
++  stack_slots += total_strings *
++                   (max_dtrace_string_size / VMRegImpl::stack_slot_size);
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | string[n]           |
++  //      |---------------------| <- string_locs[n]
++  //      | string[n-1]         |
++  //      |---------------------| <- string_locs[n-1]
++  //      | ...                 |
++  //      | ...                 |
++  //      |---------------------| <- string_locs[1]
++  //      | string[0]           |
++  //      |---------------------| <- string_locs[0]
++  //      | temp                |
++  //      |---------------------| <- conversion_temp
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++  intptr_t start = (intptr_t)__ pc();
++
++  // First thing make an ic check to see if we should even be here
++
++  {
++    Label L;
++    const Register temp_reg = G3_scratch;
++    Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
++    __ verify_oop(O0);
++    __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
++    __ cmp(temp_reg, G5_inline_cache_reg);
++    __ brx(Assembler::equal, true, Assembler::pt, L);
++
++    __ jump_to(ic_miss, 0);
++    __ align(CodeEntryAlignment);
++    __ bind(L);
++  }
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++
++  // The instruction at the verified entry point must be 4 bytes or longer
++  // because it can be patched on the fly by make_non_entrant. The stack bang
++  // instruction fits that requirement.
++
++  // Generate stack overflow check before creating frame
++  __ generate_stack_overflow_check(stack_size);
++
++  assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord,
++         "valid size for make_non_entrant");
++
++  // Generate a new frame for the wrapper.
++  __ save(SP, -stack_size, SP);
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  VMRegPair zero;
++  const Register g0 = G0; // without this we get a compiler warning (why??)
++  zero.set2(g0->as_VMReg());
++
++  int c_arg, j_arg;
++
++  Register conversion_off = noreg;
++
++  for (j_arg = first_arg_to_pass, c_arg = 0 ;
++       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
++
++    VMRegPair src = in_regs[j_arg];
++    VMRegPair dst = out_regs[c_arg];
++
++#ifdef ASSERT
++    if (src.first()->is_Register()) {
++      assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
++    } else if (src.first()->is_FloatRegister()) {
++      assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
++                                               FloatRegisterImpl::S)], "ack!");
++    }
++    if (dst.first()->is_Register()) {
++      reg_destroyed[dst.first()->as_Register()->encoding()] = true;
++    } else if (dst.first()->is_FloatRegister()) {
++      freg_destroyed[dst.first()->as_FloatRegister()->encoding(
++                                                 FloatRegisterImpl::S)] = true;
++    }
++#endif /* ASSERT */
++
++    switch (in_sig_bt[j_arg]) {
++      case T_ARRAY:
++      case T_OBJECT:
++        {
++          if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||
++              out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
++            // need to unbox a one-slot value
++            Register in_reg = L0;
++            Register tmp = L2;
++            if ( src.first()->is_reg() ) {
++              in_reg = src.first()->as_Register();
++            } else {
++              assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
++                     "must be");
++              __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
++            }
++            // If the final destination is an acceptable register
++            if ( dst.first()->is_reg() ) {
++              if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
++                tmp = dst.first()->as_Register();
++              }
++            }
++
++            Label skipUnbox;
++            if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
++              __ mov(G0, tmp->successor());
++            }
++            __ mov(G0, tmp);
++            __ br_null(in_reg, true, Assembler::pn, skipUnbox);
++
++            BasicType bt = out_sig_bt[c_arg];
++            int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
++            switch (bt) {
++                case T_BYTE:
++                  __ ldub(in_reg, box_offset, tmp); break;
++                case T_SHORT:
++                  __ lduh(in_reg, box_offset, tmp); break;
++                case T_INT:
++                  __ ld(in_reg, box_offset, tmp); break;
++                case T_LONG:
++                  __ ld_long(in_reg, box_offset, tmp); break;
++                default: ShouldNotReachHere();
++            }
++
++            __ bind(skipUnbox);
++            // If tmp wasn't final destination copy to final destination
++            if (tmp == L2) {
++              VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
++              if (out_sig_bt[c_arg] == T_LONG) {
++                long_move(masm, tmp_as_VM, dst);
++              } else {
++                move32_64(masm, tmp_as_VM, out_regs[c_arg]);
++              }
++            }
++            if (out_sig_bt[c_arg] == T_LONG) {
++              assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
++              ++c_arg; // move over the T_VOID to keep the loop indices in sync
++            }
++          } else if (out_sig_bt[c_arg] == T_ADDRESS) {
++            Register s =
++                src.first()->is_reg() ? src.first()->as_Register() : L2;
++            Register d =
++                dst.first()->is_reg() ? dst.first()->as_Register() : L2;
++
++            // We store the oop now so that the conversion pass can reach
++            // while in the inner frame. This will be the only store if
++            // the oop is NULL.
++            if (s != L2) {
++              // src is register
++              if (d != L2) {
++                // dst is register
++                __ mov(s, d);
++              } else {
++                assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                          STACK_BIAS), "must be");
++                __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
++              }
++            } else {
++                // src not a register
++                assert(Assembler::is_simm13(reg2offset(src.first()) +
++                           STACK_BIAS), "must be");
++                __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
++                if (d == L2) {
++                  assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                             STACK_BIAS), "must be");
++                  __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
++                }
++            }
++          } else if (out_sig_bt[c_arg] != T_VOID) {
++            // Convert the arg to NULL
++            if (dst.first()->is_reg()) {
++              __ mov(G0, dst.first()->as_Register());
++            } else {
++              assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                         STACK_BIAS), "must be");
++              __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
++            }
++          }
++        }
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        if (src.first()->is_stack()) {
++          // Stack to stack/reg is simple
++          move32_64(masm, src, dst);
++        } else {
++          if (dst.first()->is_reg()) {
++            // freg -> reg
++            int off =
++              STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++            Register d = dst.first()->as_Register();
++            if (Assembler::is_simm13(off)) {
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, off);
++              __ ld(SP, off, d);
++            } else {
++              if (conversion_off == noreg) {
++                __ set(off, L6);
++                conversion_off = L6;
++              }
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, conversion_off);
++              __ ld(SP, conversion_off , d);
++            }
++          } else {
++            // freg -> mem
++            int off = STACK_BIAS + reg2offset(dst.first());
++            if (Assembler::is_simm13(off)) {
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, off);
++            } else {
++              if (conversion_off == noreg) {
++                __ set(off, L6);
++                conversion_off = L6;
++              }
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, conversion_off);
++            }
++          }
++        }
++        break;
++
++      case T_DOUBLE:
++        assert( j_arg + 1 < total_args_passed &&
++                in_sig_bt[j_arg + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        if (src.first()->is_stack()) {
++          // Stack to stack/reg is simple
++          long_move(masm, src, dst);
++        } else {
++          Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
++
++          // Destination could be an odd reg on 32bit in which case
++          // we can't load direct to the destination.
++
++          if (!d->is_even() && wordSize == 4) {
++            d = L2;
++          }
++          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++          if (Assembler::is_simm13(off)) {
++            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
++                   SP, off);
++            __ ld_long(SP, off, d);
++          } else {
++            if (conversion_off == noreg) {
++              __ set(off, L6);
++              conversion_off = L6;
++            }
++            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
++                   SP, conversion_off);
++            __ ld_long(SP, conversion_off, d);
++          }
++          if (d == L2) {
++            long_move(masm, reg64_to_VMRegPair(L2), dst);
++          }
++        }
++        break;
++
++      case T_LONG :
++        // 32bit can't do a split move of something like g1 -> O0, O1
++        // so use a memory temp
++        if (src.is_single_phys_reg() && wordSize == 4) {
++          Register tmp = L2;
++          if (dst.first()->is_reg() &&
++              (wordSize == 8 || dst.first()->as_Register()->is_even())) {
++            tmp = dst.first()->as_Register();
++          }
++
++          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++          if (Assembler::is_simm13(off)) {
++            __ stx(src.first()->as_Register(), SP, off);
++            __ ld_long(SP, off, tmp);
++          } else {
++            if (conversion_off == noreg) {
++              __ set(off, L6);
++              conversion_off = L6;
++            }
++            __ stx(src.first()->as_Register(), SP, conversion_off);
++            __ ld_long(SP, conversion_off, tmp);
++          }
++
++          if (tmp == L2) {
++            long_move(masm, reg64_to_VMRegPair(L2), dst);
++          }
++        } else {
++          long_move(masm, src, dst);
++        }
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        move32_64(masm, src, dst);
++    }
++  }
++
++
++  // If we have any strings we must store any register based arg to the stack
++  // This includes any still live xmm registers too.
++
++  if (total_strings > 0 ) {
++
++    // protect all the arg registers
++    __ save_frame(0);
++    __ mov(G2_thread, L7_thread_cache);
++    const Register L2_string_off = L2;
++
++    // Get first string offset
++    __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
++
++    for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
++      if (out_sig_bt[c_arg] == T_ADDRESS) {
++
++        VMRegPair dst = out_regs[c_arg];
++        const Register d = dst.first()->is_reg() ?
++            dst.first()->as_Register()->after_save() : noreg;
++
++        // It's a string the oop and it was already copied to the out arg
++        // position
++        if (d != noreg) {
++          __ mov(d, O0);
++        } else {
++          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
++                 "must be");
++          __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);
++        }
++        Label skip;
++
++        __ add_d(FP, L2_string_off, O1);
++        __ br_null(O0, false, Assembler::pn, skip);
++
++        if (d != noreg) {
++          __ mov(O1, d);
++        } else {
++          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
++                 "must be");
++          __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);
++        }
++
++        __ addi_d(L2_string_off, max_dtrace_string_size, L2_string_off);
++        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
++                relocInfo::runtime_call_type);
++
++        __ bind(skip);
++
++      }
++
++    }
++    __ mov(L7_thread_cache, G2_thread);
++    __ restore();
++
++  }
++
++
++  // Ok now we are done. Need to place the nop that dtrace wants in order to
++  // patch in the trap
++
++  int patch_offset = ((intptr_t)__ pc()) - start;
++
++  __ nop();
++
++
++  // Return
++
++  __ restore();
++  __ ret();
++
++  __ flush();
++  nmethod *nm = nmethod::new_dtrace_nmethod(
++      method, masm->code(), vep_offset, patch_offset, frame_complete,
++      stack_slots / VMRegImpl::slots_per_word);
++  return nm;
++}
++
++#endif // HAVE_DTRACE_H
++
++// this function returns the adjust size (in number of words) to a c2i adapter
++// activation for use during deoptimization
++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
++  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
++}
++
++// "Top of Stack" slots that may be unused by the calling convention but must
++// otherwise be preserved.
++// On Intel these are not necessary and the value can be zero.
++// On Sparc this describes the words reserved for storing a register window
++// when an interrupt occurs.
++uint SharedRuntime::out_preserve_stack_slots() {
++   return 0;
++}
++
++//------------------------------generate_deopt_blob----------------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_deopt_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  int pad = 0;
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    pad += 512; // Increase the buffer size when compiling for JVMCI
++  }
++#endif
++  //CodeBuffer     buffer ("deopt_blob", 4000, 2048);
++  CodeBuffer     buffer ("deopt_blob", 8000+pad, 2048); // FIXME for debug
++  MacroAssembler* masm  = new MacroAssembler( & buffer);
++  int frame_size_in_words;
++  OopMap* map = NULL;
++  // Account for the extra args we place on the stack
++  // by the time we call fetch_unroll_info
++  const int additional_words = 2; // deopt kind, thread
++
++  OopMapSet *oop_maps = new OopMapSet();
++  RegisterSaver reg_save(COMPILER2_OR_JVMCI != 0);
++
++  address start = __ pc();
++  Label cont;
++  // we use S3 for DeOpt reason register
++  Register reason = S3;
++  // use S6 for thread register
++  Register thread = TREG;
++  // use S7 for fetch_unroll_info returned UnrollBlock
++  Register unroll = S7;
++  // Prolog for non exception case!
++
++  // We have been called from the deopt handler of the deoptee.
++  //
++  // deoptee:
++  //                      ...
++  //                      call X
++  //                      ...
++  //  deopt_handler:      call_deopt_stub
++  //  cur. return pc  --> ...
++  //
++  // So currently RA points behind the call in the deopt handler.
++  // We adjust it such that it points to the start of the deopt handler.
++  // The return_pc has been stored in the frame of the deoptee and
++  // will replace the address of the deopt_handler in the call
++  // to Deoptimization::fetch_unroll_info below.
++
++  // HandlerImpl::size_deopt_handler()
++  __ addi_d(RA, RA, - NativeFarCall::instruction_size);
++  // Save everything in sight.
++  map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++  // Normal deoptimization
++  __ li(reason, Deoptimization::Unpack_deopt);
++  __ b(cont);
++
++  int reexecute_offset = __ pc() - start;
++#if INCLUDE_JVMCI && !defined(COMPILER1)
++  if (EnableJVMCI && UseJVMCICompiler) {
++    // JVMCI does not use this kind of deoptimization
++    __ should_not_reach_here();
++  }
++#endif
++
++  // Reexecute case
++  // return address is the pc describes what bci to do re-execute at
++
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++  __ li(reason, Deoptimization::Unpack_reexecute);
++  __ b(cont);
++
++#if INCLUDE_JVMCI
++  Label after_fetch_unroll_info_call;
++  int implicit_exception_uncommon_trap_offset = 0;
++  int uncommon_trap_offset = 0;
++
++  if (EnableJVMCI) {
++    implicit_exception_uncommon_trap_offset = __ pc() - start;
++
++    __ ld_d(RA, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
++    __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
++
++    uncommon_trap_offset = __ pc() - start;
++
++    // Save everything in sight.
++    (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++    __ addi_d(SP, SP, -additional_words * wordSize);
++    // fetch_unroll_info needs to call last_java_frame()
++    Label retaddr;
++    __ set_last_Java_frame(NOREG, NOREG, retaddr);
++
++    __ ld_w(A1, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset())));
++    __ li(AT, -1);
++    __ st_w(AT, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset())));
++
++    __ li(reason, (int32_t)Deoptimization::Unpack_reexecute);
++    __ move(A0, TREG);
++    __ move(A2, reason); // exec mode
++    __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type);
++    __ bind(retaddr);
++    oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
++    __ addi_d(SP, SP, additional_words * wordSize);
++
++    __ reset_last_Java_frame(false);
++
++    __ b(after_fetch_unroll_info_call);
++  } // EnableJVMCI
++#endif // INCLUDE_JVMCI
++
++  int   exception_offset = __ pc() - start;
++  // Prolog for exception case
++
++  // all registers are dead at this entry point, except for V0 and
++  // V1 which contain the exception oop and exception pc
++  // respectively.  Set them in TLS and fall thru to the
++  // unpack_with_exception_in_tls entry point.
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  int exception_in_tls_offset = __ pc() - start;
++  // new implementation because exception oop is now passed in JavaThread
++
++  // Prolog for exception case
++  // All registers must be preserved because they might be used by LinearScan
++  // Exceptiop oop and throwing PC are passed in JavaThread
++  // tos: stack at point of call to method that threw the exception (i.e. only
++  // args are on the stack, no return address)
++
++  // Return address will be patched later with the throwing pc. The correct value is not
++  // available now because loading it from memory would destroy registers.
++  // Save everything in sight.
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++
++  // Now it is safe to overwrite any register
++  // store the correct deoptimization type
++  __ li(reason, Deoptimization::Unpack_exception);
++  // load throwing pc from JavaThread and patch it as the return address
++  // of the current frame. Then clear the field in JavaThread
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V1, SP, reg_save.ra_offset()); //save ra
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++
++
++#ifdef ASSERT
++  // verify that there is really an exception oop in JavaThread
++  __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ verify_oop(AT);
++  // verify that there is no pending exception
++  Label no_pending_exception;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, no_pending_exception);
++  __ stop("must not have pending exception here");
++  __ bind(no_pending_exception);
++#endif
++  __ bind(cont);
++  // Compiled code leaves the floating point stack dirty, empty it.
++  __ empty_FPU_stack();
++
++
++  // Call C code.  Need thread and this frame, but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ move(A0, thread);
++  __ move(A1, reason); // exec_mode
++  __ addi_d(SP, SP, -additional_words * wordSize);
++
++  Label retaddr;
++  __ set_last_Java_frame(NOREG, NOREG, retaddr);
++
++  // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.  Call should capture return values.
++
++  // TODO: confirm reloc
++  __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
++  __ bind(retaddr);
++  oop_maps->add_gc_map(__ pc() - start, map);
++  __ addi_d(SP, SP, additional_words * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    __ bind(after_fetch_unroll_info_call);
++  }
++#endif
++
++  // Load UnrollBlock into S7
++  __ move(unroll, V0);
++
++
++  // Move the unpack kind to a safe place in the UnrollBlock because
++  // we are very short of registers
++
++  Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++  __ st_w(reason, unpack_kind);
++  // save the unpack_kind value
++  // Retrieve the possible live values (return values)
++  // All callee save registers representing jvm state
++  // are now in the vframeArray.
++
++  Label noException;
++  __ li(AT, Deoptimization::Unpack_exception);
++  __ bne(AT, reason, noException);// Was exception pending?
++  __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
++
++  __ verify_oop(V0);
++
++  // Overwrite the result registers with the exception results.
++  __ st_ptr(V0, SP, reg_save.v0_offset());
++  __ st_ptr(V1, SP, reg_save.v1_offset());
++
++  __ bind(noException);
++
++
++  // Stack is back to only having register save data on the stack.
++  // Now restore the result registers. Everything else is either dead or captured
++  // in the vframeArray.
++
++  reg_save.restore_result_registers(masm);
++  // All of the register save area has been popped of the stack. Only the
++  // return address remains.
++  // Pop all the frames we must move/replace.
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
++  //
++  // Note: by leaving the return address of self-frame on the stack
++  // and using the size of frame 2 to adjust the stack
++  // when we are done the return to frame 3 will still be on the stack.
++
++  // register for the sender's sp
++  Register sender_sp = Rsender;
++  // register for frame pcs
++  Register pcs = T0;
++  // register for frame sizes
++  Register sizes = T1;
++  // register for frame count
++  Register count = T3;
++
++  // Pop deoptimized frame
++  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ add_d(SP, SP, AT);
++  // sp should be pointing at the return address to the caller (3)
++
++  // Load array of frame pcs into pcs
++  __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++  __ addi_d(SP, SP, wordSize);  // trash the old pc
++  // Load array of frame sizes into T6
++  __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++
++
++
++  // Load count of frams into T3
++  __ ld_w(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++  // Pick up the initial fp we should save
++  __ ld_d(FP, unroll,  Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++   // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++  __ move(sender_sp, SP);
++  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ sub_d(SP, SP, AT);
++
++  Label loop;
++  __ bind(loop);
++  __ ld_d(T2, sizes, 0);    // Load frame size
++  __ ld_ptr(AT, pcs, 0);           // save return address
++  __ addi_d(T2, T2, -2 * wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ sub_d(SP, SP, T2);       // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);  // pass to next frame
++  __ addi_d(count, count, -1);   // decrement counter
++  __ addi_d(sizes, sizes, wordSize);   // Bump array pointer (sizes)
++  __ addi_d(pcs, pcs, wordSize);   // Bump array pointer (pcs)
++  __ bne(count, R0, loop);
++  __ ld_d(AT, pcs, 0);      // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
++  // Re-push self-frame
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  __ addi_d(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
++
++  // Restore frame locals after moving the frame
++  __ st_d(V0, SP, reg_save.v0_offset());
++  __ st_d(V1, SP, reg_save.v1_offset());
++  __ fst_d(F0, SP, reg_save.fpr0_offset());
++  __ fst_d(F1, SP, reg_save.fpr1_offset());
++
++  // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.
++  __ move(A1, reason);  // exec_mode
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(A0, thread);  // thread
++  __ addi_d(SP, SP, (-additional_words) *wordSize);
++
++  // set last_Java_sp, last_Java_fp
++  Label L;
++  address the_pc = __ pc();
++  __ bind(L);
++  __ set_last_Java_frame(NOREG, FP, L);
++
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
++  // Revert SP alignment after call since we're going to do some SP relative addressing below
++  __ ld_d(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map(the_pc - start, new OopMap(frame_size_in_words, 0));
++
++  __ push(V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(true);
++
++  // Collect return values
++  __ ld_d(V0, SP, reg_save.v0_offset() + (additional_words + 1) * wordSize);
++  __ ld_d(V1, SP, reg_save.v1_offset() + (additional_words + 1) * wordSize);
++  // Pop float stack and store in local
++  __ fld_d(F0, SP, reg_save.fpr0_offset() + (additional_words + 1) * wordSize);
++  __ fld_d(F1, SP, reg_save.fpr1_offset() + (additional_words + 1) * wordSize);
++
++  //FIXME,
++  // Clear floating point stack before returning to interpreter
++  __ empty_FPU_stack();
++  //FIXME, we should consider about float and double
++  // Push a float or double return value if necessary.
++  __ leave();
++
++  // Jump to interpreter
++  __ jr(RA);
++
++  masm->flush();
++  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
++  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
++    _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
++  }
++#endif
++}
++
++#ifdef COMPILER2
++
++//------------------------------generate_uncommon_trap_blob--------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_uncommon_trap_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 );
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++  enum frame_layout {
++    fp_off, fp_off2,
++    return_off, return_off2,
++    framesize
++  };
++  assert(framesize % 4 == 0, "sp not 16-byte aligned");
++  address start = __ pc();
++
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++  // Push self-frame.
++  __ addi_d(SP, SP, -framesize * BytesPerInt);
++
++  __ st_d(RA, SP, return_off * BytesPerInt);
++  __ st_d(FP, SP, fp_off * BytesPerInt);
++
++  __ addi_d(FP, SP, fp_off * BytesPerInt);
++
++  // Clear the floating point exception stack
++  __ empty_FPU_stack();
++
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // set last_Java_sp
++  Label retaddr;
++  __ set_last_Java_frame(NOREG, FP, retaddr);
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // capture callee-saved registers as well as return values.
++  __ move(A0, thread);
++  // argument already in T0
++  __ move(A1, T0);
++  __ addi_d(A2, R0, Deoptimization::Unpack_uncommon_trap);
++  __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap( framesize, 0 );
++
++  oop_maps->add_gc_map(__ pc() - start, map);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock into S7
++  Register unroll = S7;
++  __ move(unroll, V0);
++
++#ifdef ASSERT
++  { Label L;
++    __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++    __ li(T4, Deoptimization::Unpack_uncommon_trap);
++    __ beq(AT, T4, L);
++    __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
++    __ bind(L);
++  }
++#endif
++
++  // Pop all the frames we must move/replace.
++  //
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: possible-i2c-adapter-frame
++  // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
++  //    and c2i here)
++
++  __ addi_d(SP, SP, framesize * BytesPerInt);
++
++  // Pop deoptimized frame
++  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ add_d(SP, SP, AT);
++
++  // register for frame pcs
++  Register pcs = T8;
++  // register for frame sizes
++  Register sizes = T4;
++  // register for frame count
++  Register count = T3;
++  // register for the sender's sp
++  Register sender_sp = T1;
++
++  // sp should be pointing at the return address to the caller (4)
++  // Load array of frame pcs
++  __ ld_d(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++
++  // Load array of frame sizes
++  __ ld_d(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++  __ ld_wu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++
++  // Pick up the initial fp we should save
++  __ ld_d(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++  // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++
++  __ move(sender_sp, SP);
++  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ sub_d(SP, SP, AT);
++  // Push interpreter frames in a loop
++  Label loop;
++  __ bind(loop);
++  __ ld_d(T2, sizes, 0);          // Load frame size
++  __ ld_d(AT, pcs, 0);           // save return address
++  __ addi_d(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ sub_d(SP, SP, T2);                   // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);       // pass to next frame
++  __ addi_d(count, count, -1);    // decrement counter
++  __ addi_d(sizes, sizes, wordSize);     // Bump array pointer (sizes)
++  __ addi_d(pcs, pcs, wordSize);      // Bump array pointer (pcs)
++  __ bne(count, R0, loop);
++
++  __ ld_d(RA, pcs, 0);
++
++  // Re-push self-frame
++  // save old & set new FP
++  // save final return address
++  __ enter();
++
++  // Use FP because the frames look interpreted now
++  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  Label L;
++  address the_pc = __ pc();
++  __ bind(L);
++  __ set_last_Java_frame(NOREG, FP, L);
++
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  __ move(A0, thread);
++  __ li(A1, Deoptimization::Unpack_uncommon_trap);
++  __ call((address)Deoptimization::unpack_frames, relocInfo::runtime_call_type);
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0));
++
++  __ reset_last_Java_frame(true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // Jump to interpreter
++  __ jr(RA);
++  // -------------
++  // make sure all code is generated
++  masm->flush();
++  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
++}
++
++#endif // COMPILER2
++
++//------------------------------generate_handler_blob-------------------
++//
++// Generate a special Compile2Runtime blob that saves all registers, and sets
++// up an OopMap and calls safepoint code to stop the compiled code for
++// a safepoint.
++//
++// This blob is jumped to (via a breakpoint and the signal handler) from a
++// safepoint in compiled code.
++
++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
++
++  // Account for thread arg in our frame
++  const int additional_words = 0;
++  int frame_size_in_words;
++
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  ResourceMark rm;
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map;
++
++  // allocate space for the code
++  // setup code generation tools
++  CodeBuffer  buffer ("handler_blob", 2048, 512);
++  MacroAssembler* masm = new MacroAssembler( &buffer);
++
++  const Register thread = TREG;
++  address start   = __ pc();
++  bool cause_return = (poll_type == POLL_AT_RETURN);
++  RegisterSaver reg_save(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  // The following is basically a call_VM. However, we need the precise
++  // address of the call in order to generate an oopmap. Hence, we do all the
++  // work outselvs.
++
++  Label retaddr;
++  __ set_last_Java_frame(NOREG, NOREG, retaddr);
++
++  if (!cause_return) {
++    // overwrite the return address pushed by save_live_registers
++    // Additionally, TSR is a callee-saved register so we can look at
++    // it later to determine if someone changed the return address for
++    // us!
++    __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    __ st_ptr(TSR, SP, reg_save.ra_offset());
++  }
++
++  // Do the call
++  __ move(A0, thread);
++  // TODO: confirm reloc
++  __ call(call_ptr, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
++  oop_maps->add_gc_map(__ pc() - start, map);
++
++  Label noException;
++
++  // Clear last_Java_sp again
++  __ reset_last_Java_frame(false);
++
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, noException);
++
++  // Exception pending
++
++  reg_save.restore_live_registers(masm);
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++  // TODO: confirm reloc
++  __ jmp((address)StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++
++  // No exception case
++  __ bind(noException);
++
++  Label no_adjust, bail;
++  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
++    // If our stashed return pc was modified by the runtime we avoid touching it
++    __ ld_ptr(AT, SP, reg_save.ra_offset());
++    __ bne(AT, TSR, no_adjust);
++
++#ifdef ASSERT
++    // Verify the correct encoding of the poll we're about to skip.
++    // See NativeInstruction::is_safepoint_poll()
++    __ ld_wu(AT, TSR, 0);
++    __ push(T5);
++    __ li(T5, 0xffc0001f);
++    __ andr(AT, AT, T5);
++    __ li(T5, 0x28800013);
++    __ xorr(AT, AT, T5);
++    __ pop(T5);
++    __ bne(AT, R0, bail);
++#endif
++    // Adjust return pc forward to step over the safepoint poll instruction
++     __ addi_d(RA, TSR, 4);    // NativeInstruction::instruction_size=4
++     __ st_ptr(RA, SP, reg_save.ra_offset());
++  }
++
++  __ bind(no_adjust);
++  // Normal exit, register restoring and exit
++  reg_save.restore_live_registers(masm);
++  __ jr(RA);
++
++#ifdef ASSERT
++  __ bind(bail);
++  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
++#endif
++
++  // Make sure all code is generated
++  masm->flush();
++  // Fill-out other meta info
++  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
++}
++
++//
++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
++//
++// Generate a stub that calls into vm to find out the proper destination
++// of a java call. All the argument registers are live at this point
++// but since this is generic code we don't know what they are and the caller
++// must do any gc of the args.
++//
++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  // allocate space for the code
++  ResourceMark rm;
++
++  //CodeBuffer buffer(name, 1000, 512);
++  //FIXME. code_size
++  CodeBuffer buffer(name, 2000, 2048);
++  MacroAssembler* masm  = new MacroAssembler(&buffer);
++
++  int frame_size_words;
++  RegisterSaver reg_save(false /* save_vectors */);
++  //we put the thread in A0
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map = NULL;
++
++  address start = __ pc();
++  map = reg_save.save_live_registers(masm, 0, &frame_size_words);
++
++
++  int frame_complete = __ offset();
++#ifndef OPT_THREAD
++  const Register thread = T8;
++  __ get_thread(thread);
++#else
++  const Register thread = TREG;
++#endif
++
++  __ move(A0, thread);
++  Label retaddr;
++  __ set_last_Java_frame(noreg, FP, retaddr);
++  // align the stack before invoke native
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++
++  // TODO: confirm reloc
++  __ call(destination, relocInfo::runtime_call_type);
++  __ bind(retaddr);
++
++  // Set an oopmap for the call site.
++  // We need this not only for callee-saved registers, but also for volatile
++  // registers that the compiler might be keeping live across a safepoint.
++  oop_maps->add_gc_map(__ pc() - start, map);
++  // V0 contains the address we are going to jump to assuming no exception got installed
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // clear last_Java_sp
++  __ reset_last_Java_frame(true);
++  // check for pending exceptions
++  Label pending;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ bne(AT, R0, pending);
++  // get the returned Method*
++  __ get_vm_result_2(Rmethod, thread);
++  __ st_ptr(Rmethod, SP, reg_save.s3_offset());
++  __ st_ptr(V0, SP, reg_save.t5_offset());
++  reg_save.restore_live_registers(masm);
++
++  // We are back the the original state on entry and ready to go the callee method.
++  __ jr(T5);
++  // Pending exception after the safepoint
++
++  __ bind(pending);
++
++  reg_save.restore_live_registers(masm);
++
++  // exception pending => remove activation and forward to exception handler
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
++  __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
++  __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++  //
++  // make sure all code is generated
++  masm->flush();
++  RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
++  return tmp;
++}
++
++extern "C" int SpinPause() {return 0;}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,4804 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "nativeInst_loongarch.hpp"
++#include "oops/instanceOop.hpp"
++#include "oops/method.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++// Declaration and definition of StubGenerator (no .hpp file).
++// For a more detailed description of the stub routine structure
++// see the comment in stubRoutines.hpp
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++
++//#ifdef PRODUCT
++//#define BLOCK_COMMENT(str) /* nothing */
++//#else
++//#define BLOCK_COMMENT(str) __ block_comment(str)
++//#endif
++
++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++const int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
++
++// Stub Code definitions
++
++class StubGenerator: public StubCodeGenerator {
++ private:
++
++  // This fig is not LA ABI. It is call Java from C ABI.
++  // Call stubs are used to call Java from C
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //      ...
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S0)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
++  //  3 [ result               ] <--- a1
++  //  4 [ result_type          ] <--- a2
++  //  5 [ method               ] <--- a3
++  //  6 [ entry_point          ] <--- a4
++  //  7 [ parameters           ] <--- a5
++  //  8 [ parameter_size       ] <--- a6
++  //  9 [ thread               ] <--- a7
++
++  //
++  // LA ABI does not save paras in sp.
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //-22 [ F31                  ]
++  //      ...
++  //-15 [ F24                  ]
++  //-14 [ S8                   ]
++  //-13 [ thread               ]
++  //-12 [ result_type          ] <--- a2
++  //-11 [ result               ] <--- a1
++  //-10 [                      ]
++  // -9 [ ptr. to call wrapper ] <--- a0
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S0)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [                      ] <--- old sp
++  //
++  // Find a right place in the call_stub for S8.
++  // S8 will point to the starting point of Interpreter::dispatch_table(itos).
++  // It should be saved/restored before/after Java calls.
++  //
++  enum call_stub_layout {
++    RA_off             =  1,
++    FP_off             =  0,
++    BCP_off            = -1,
++    LVP_off            = -2,
++    TSR_off            = -3,
++    S1_off             = -4,
++    S3_off             = -5,
++    S4_off             = -6,
++    S5_off             = -7,
++    S6_off             = -8,
++    call_wrapper_off   = -9,
++    result_off         = -11,
++    result_type_off    = -12,
++    thread_off         = -13,
++    S8_off             = -14,
++    F24_off            = -15,
++    F25_off            = -16,
++    F26_off            = -17,
++    F27_off            = -18,
++    F28_off            = -19,
++    F29_off            = -20,
++    F30_off            = -21,
++    F31_off            = -22,
++    total_off          = F31_off,
++  };
++
++  address generate_call_stub(address& return_address) {
++    assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code");
++    StubCodeMark mark(this, "StubRoutines", "call_stub");
++    address start = __ pc();
++
++    // same as in generate_catch_exception()!
++
++    // stub code
++    // save ra and fp
++    __ enter();
++    // I think 14 is the max gap between argument and callee saved register
++    __ addi_d(SP, SP, total_off * wordSize);
++    __ st_d(BCP, FP, BCP_off * wordSize);
++    __ st_d(LVP, FP, LVP_off * wordSize);
++    __ st_d(TSR, FP, TSR_off * wordSize);
++    __ st_d(S1, FP, S1_off * wordSize);
++    __ st_d(S3, FP, S3_off * wordSize);
++    __ st_d(S4, FP, S4_off * wordSize);
++    __ st_d(S5, FP, S5_off * wordSize);
++    __ st_d(S6, FP, S6_off * wordSize);
++    __ st_d(A0, FP, call_wrapper_off * wordSize);
++    __ st_d(A1, FP, result_off * wordSize);
++    __ st_d(A2, FP, result_type_off * wordSize);
++    __ st_d(A7, FP, thread_off * wordSize);
++    __ st_d(S8, FP, S8_off * wordSize);
++
++    __ fst_d(F24, FP, F24_off * wordSize);
++    __ fst_d(F25, FP, F25_off * wordSize);
++    __ fst_d(F26, FP, F26_off * wordSize);
++    __ fst_d(F27, FP, F27_off * wordSize);
++    __ fst_d(F28, FP, F28_off * wordSize);
++    __ fst_d(F29, FP, F29_off * wordSize);
++    __ fst_d(F30, FP, F30_off * wordSize);
++    __ fst_d(F31, FP, F31_off * wordSize);
++
++    __ li(S8, (long)Interpreter::dispatch_table(itos));
++
++#ifdef OPT_THREAD
++    __ move(TREG, A7);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++
++#ifdef ASSERT
++    // make sure we have no pending exceptions
++    {
++      Label L;
++      __ ld_d(AT, A7, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      /* FIXME: I do not know how to realize stop in LA, do it in the future */
++      __ stop("StubRoutines::call_stub: entered with pending exception");
++      __ bind(L);
++    }
++#endif
++
++    // pass parameters if any
++    // A5: parameter
++    // A6: parameter_size
++    // T0: parameter_size_tmp(--)
++    // T2: offset(++)
++    // T3: tmp
++    Label parameters_done;
++    // judge if the parameter_size equals 0
++    __ beq(A6, R0, parameters_done);
++    __ slli_d(AT, A6, Interpreter::logStackElementSize);
++    __ sub_d(SP, SP, AT);
++    __ li(AT, -StackAlignmentInBytes);
++    __ andr(SP, SP, AT);
++    // Copy Java parameters in reverse order (receiver last)
++    // Note that the argument order is inverted in the process
++    Label loop;
++    __ move(T0, A6);
++    __ move(T2, R0);
++    __ bind(loop);
++
++    // get parameter
++    __ alsl_d(T3, T0, A5, LogBytesPerWord - 1);
++    __ ld_d(AT, T3,  -wordSize);
++    __ alsl_d(T3, T2, SP, LogBytesPerWord - 1);
++    __ st_d(AT, T3, Interpreter::expr_offset_in_bytes(0));
++    __ addi_d(T2, T2, 1);
++    __ addi_d(T0, T0, -1);
++    __ bne(T0, R0, loop);
++    // advance to next parameter
++
++    // call Java function
++    __ bind(parameters_done);
++
++    // receiver in V0, methodOop in Rmethod
++
++    __ move(Rmethod, A3);
++    __ move(Rsender, SP);             //set sender sp
++    __ jalr(A4);
++    return_address = __ pc();
++
++    Label common_return;
++    __ bind(common_return);
++
++    // store result depending on type
++    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
++    __ ld_d(T0, FP, result_off * wordSize);   // result --> T0
++    Label is_long, is_float, is_double, exit;
++    __ ld_d(T2, FP, result_type_off * wordSize);  // result_type --> T2
++    __ addi_d(T3, T2, (-1) * T_LONG);
++    __ beq(T3, R0, is_long);
++    __ addi_d(T3, T2, (-1) * T_FLOAT);
++    __ beq(T3, R0, is_float);
++    __ addi_d(T3, T2, (-1) * T_DOUBLE);
++    __ beq(T3, R0, is_double);
++
++    // handle T_INT case
++    __ st_d(V0, T0, 0 * wordSize);
++    __ bind(exit);
++
++    // restore
++    __ ld_d(BCP, FP, BCP_off * wordSize);
++    __ ld_d(LVP, FP, LVP_off * wordSize);
++    __ ld_d(S8, FP, S8_off * wordSize);
++    __ ld_d(TSR, FP, TSR_off * wordSize);
++
++    __ ld_d(S1, FP, S1_off * wordSize);
++    __ ld_d(S3, FP, S3_off * wordSize);
++    __ ld_d(S4, FP, S4_off * wordSize);
++    __ ld_d(S5, FP, S5_off * wordSize);
++    __ ld_d(S6, FP, S6_off * wordSize);
++
++    __ fld_d(F24, FP, F24_off * wordSize);
++    __ fld_d(F25, FP, F25_off * wordSize);
++    __ fld_d(F26, FP, F26_off * wordSize);
++    __ fld_d(F27, FP, F27_off * wordSize);
++    __ fld_d(F28, FP, F28_off * wordSize);
++    __ fld_d(F29, FP, F29_off * wordSize);
++    __ fld_d(F30, FP, F30_off * wordSize);
++    __ fld_d(F31, FP, F31_off * wordSize);
++
++    __ leave();
++
++    // return
++    __ jr(RA);
++
++    // handle return types different from T_INT
++    __ bind(is_long);
++    __ st_d(V0, T0, 0 * wordSize);
++    __ b(exit);
++
++    __ bind(is_float);
++    __ fst_s(FV0, T0, 0 * wordSize);
++    __ b(exit);
++
++    __ bind(is_double);
++    __ fst_d(FV0, T0, 0 * wordSize);
++    __ b(exit);
++    StubRoutines::la::set_call_stub_compiled_return(__ pc());
++    __ b(common_return);
++    return start;
++  }
++
++  // Return point for a Java call if there's an exception thrown in
++  // Java code.  The exception is caught and transformed into a
++  // pending exception stored in JavaThread that can be tested from
++  // within the VM.
++  //
++  // Note: Usually the parameters are removed by the callee. In case
++  // of an exception crossing an activation frame boundary, that is
++  // not the case if the callee is compiled code => need to setup the
++  // sp.
++  //
++  // V0: exception oop
++
++  address generate_catch_exception() {
++    StubCodeMark mark(this, "StubRoutines", "catch_exception");
++    address start = __ pc();
++
++    Register thread = TREG;
++
++    // get thread directly
++#ifndef OPT_THREAD
++    __ ld_d(thread, FP, thread_off * wordSize);
++#endif
++
++#ifdef ASSERT
++    // verify that threads correspond
++    { Label L;
++      __ get_thread(T8);
++      __ beq(T8, thread, L);
++      __ stop("StubRoutines::catch_exception: threads must correspond");
++      __ bind(L);
++    }
++#endif
++    // set pending exception
++    __ verify_oop(V0);
++    __ st_d(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ li(AT, (long)__FILE__);
++    __ st_d(AT, thread, in_bytes(Thread::exception_file_offset   ()));
++    __ li(AT, (long)__LINE__);
++    __ st_d(AT, thread, in_bytes(Thread::exception_line_offset   ()));
++
++    // complete return to VM
++    assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
++    __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
++    return start;
++  }
++
++  // Continuation point for runtime calls returning with a pending
++  // exception.  The pending exception check happened in the runtime
++  // or native call stub.  The pending exception in Thread is
++  // converted into a Java-level exception.
++  //
++  // Contract with Java-level exception handlers:
++  // V0: exception
++  // V1: throwing pc
++  //
++  // NOTE: At entry of this stub, exception-pc must be on stack !!
++
++  address generate_forward_exception() {
++    StubCodeMark mark(this, "StubRoutines", "forward exception");
++    //Register thread = TREG;
++    Register thread = TREG;
++    address start = __ pc();
++
++    // Upon entry, the sp points to the return address returning into
++    // Java (interpreted or compiled) code; i.e., the return address
++    // throwing pc.
++    //
++    // Arguments pushed before the runtime call are still on the stack
++    // but the exception handler will reset the stack pointer ->
++    // ignore them.  A potential result in registers can be ignored as
++    // well.
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++#ifdef ASSERT
++    // make sure this code is only executed if there is a pending exception
++    {
++      Label L;
++      __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ bne(AT, R0, L);
++      __ stop("StubRoutines::forward exception: no pending exception (1)");
++      __ bind(L);
++    }
++#endif
++
++    // compute exception handler into T4
++    __ ld_d(A1, SP, 0);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++    __ move(T4, V0);
++    __ pop(V1);
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld_d(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++#ifdef ASSERT
++    // make sure exception is set
++    {
++      Label L;
++      __ bne(V0, R0, L);
++      __ stop("StubRoutines::forward exception: no pending exception (2)");
++      __ bind(L);
++    }
++#endif
++
++    // continue at exception handler (return address removed)
++    // V0: exception
++    // T4: exception handler
++    // V1: throwing pc
++    __ verify_oop(V0);
++    __ jr(T4);
++    return start;
++  }
++
++  // Non-destructive plausibility checks for oops
++  //
++  address generate_verify_oop() {
++    StubCodeMark mark(this, "StubRoutines", "verify_oop");
++    address start = __ pc();
++    __ verify_oop_subroutine();
++    address end = __ pc();
++    return start;
++  }
++
++  //
++  // Generate stub for array fill. If "aligned" is true, the
++  // "to" address is assumed to be heapword aligned.
++  //
++  // Arguments for generated stub:
++  //   to:    A0
++  //   value: A1
++  //   count: A2 treated as signed
++  //
++  address generate_fill(BasicType t, bool aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    const Register to        = A0;  // source array address
++    const Register value     = A1;  // value
++    const Register count     = A2;  // elements count
++
++    const Register end       = T5;  // source array address end
++    const Register tmp       = T8;  // temp register
++
++    Label L_fill_elements;
++
++    int shift = -1;
++    switch (t) {
++      case T_BYTE:
++        shift = 0;
++        __ slti(AT, count, 9);              // Short arrays (<= 8 bytes) fill by element
++        __ bstrins_d(value, value, 15, 8);  //  8 bit -> 16 bit
++        __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit
++        __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit
++        __ bnez(AT, L_fill_elements);
++        break;
++      case T_SHORT:
++        shift = 1;
++        __ slti(AT, count, 5);              // Short arrays (<= 8 bytes) fill by element
++        __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit
++        __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit
++        __ bnez(AT, L_fill_elements);
++        break;
++      case T_INT:
++        shift = 2;
++        __ slti(AT, count, 3);              // Short arrays (<= 8 bytes) fill by element
++        __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit
++        __ bnez(AT, L_fill_elements);
++        break;
++      default: ShouldNotReachHere();
++    }
++
++    switch (t) {
++      case T_BYTE:
++        __ add_d(end, to, count);
++        break;
++      case T_SHORT:
++      case T_INT:
++        __ alsl_d(end, count, to, shift-1);
++        break;
++      default: ShouldNotReachHere();
++    }
++    if (!aligned) {
++      __ st_d(value, to,  0);
++      __ bstrins_d(to, R0, 2, 0);
++      __ addi_d(to, to, 8);
++    }
++    __ st_d(value, end, -8);
++    __ bstrins_d(end, R0, 2, 0);
++
++    //
++    //  Fill large chunks
++    //
++    Label L_loop_begin, L_not_64bytes_fill, L_loop_end, L_jtab1, L_jtab2;
++    __ addi_d(AT, to, 64);
++    __ blt(end, AT, L_not_64bytes_fill);
++    __ addi_d(to, to, 64);
++    __ bind(L_loop_begin);
++    __ st_d(value, to,  -8);
++    __ st_d(value, to, -16);
++    __ st_d(value, to, -24);
++    __ st_d(value, to, -32);
++    __ st_d(value, to, -40);
++    __ st_d(value, to, -48);
++    __ st_d(value, to, -56);
++    __ st_d(value, to, -64);
++    __ addi_d(to, to, 64);
++    __ bge(end, to, L_loop_begin);
++    __ addi_d(to, to, -64);
++    __ beq(to, end, L_loop_end);
++
++    __ bind(L_not_64bytes_fill);
++    // There are 0 - 7 words
++    __ lipc(AT, L_jtab1);
++    __ sub_d(tmp, end, to);
++    __ alsl_d(AT, tmp, AT, 1);
++    __ jr(AT);
++
++    __ bind(L_jtab1);
++    // 0:
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ st_d(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ st_d(value, to, 0);
++    __ st_d(value, to, 8);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 4:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ st_d(value, to, 32);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 6:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ st_d(value, to, 32);
++    __ st_d(value, to, 40);
++    __ jr(RA);
++    __ nop();
++
++    // 7:
++    __ st_d(value, to,  0);
++    __ st_d(value, to,  8);
++    __ st_d(value, to, 16);
++    __ st_d(value, to, 24);
++    __ st_d(value, to, 32);
++    __ st_d(value, to, 40);
++    __ st_d(value, to, 48);
++
++    __ bind(L_loop_end);
++    __ jr(RA);
++
++    // Short arrays (<= 8 bytes)
++    __ bind(L_fill_elements);
++    __ lipc(AT, L_jtab2);
++    __ slli_d(tmp, count, 4 + shift);
++    __ add_d(AT, AT, tmp);
++    __ jr(AT);
++
++    __ bind(L_jtab2);
++    // 0:
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ st_b(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ st_h(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ st_h(value, to, 0);
++    __ st_b(value, to, 2);
++    __ jr(RA);
++    __ nop();
++
++    // 4:
++    __ st_w(value, to, 0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ st_w(value, to, 0);
++    __ st_b(value, to, 4);
++    __ jr(RA);
++    __ nop();
++
++    // 6:
++    __ st_w(value, to, 0);
++    __ st_h(value, to, 4);
++    __ jr(RA);
++    __ nop();
++
++    // 7:
++    __ st_w(value, to, 0);
++    __ st_w(value, to, 3);
++    __ jr(RA);
++    __ nop();
++
++    // 8:
++    __ st_d(value, to, 0);
++    __ jr(RA);
++    return start;
++  }
++
++  //
++  //  Generate overlap test for array copy stubs
++  //
++  //  Input:
++  //    A0   - source array address
++  //    A1   - destination array address
++  //    A2   - element count
++  //
++  //  Temp:
++  //    AT   - destination array address - source array address
++  //    T4   - element count * element size
++  //
++  void array_overlap_test(address no_overlap_target, int log2_elem_size) {
++    __ slli_d(T4, A2, log2_elem_size);
++    __ sub_d(AT, A1, A0);
++    __ bgeu(AT, T4, no_overlap_target);
++  }
++
++  // disjoint large copy
++  void generate_disjoint_large_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label loop, le32, le16, le8, lt8;
++
++    __ bind(entry);
++    __ add_d(A3, A1, A2);
++    __ add_d(A2, A0, A2);
++    __ ld_d(A6, A0, 0);
++    __ ld_d(A7, A2, -8);
++
++    __ andi(T1, A0, 7);
++    __ sub_d(T0, R0, T1);
++    __ addi_d(T0, T0, 8);
++
++    __ add_d(A0, A0, T0);
++    __ add_d(A5, A1, T0);
++
++    __ addi_d(A4, A2, -64);
++    __ bgeu(A0, A4, le32);
++
++    __ bind(loop);
++    __ ld_d(T0, A0, 0);
++    __ ld_d(T1, A0, 8);
++    __ ld_d(T2, A0, 16);
++    __ ld_d(T3, A0, 24);
++    __ ld_d(T4, A0, 32);
++    __ ld_d(T5, A0, 40);
++    __ ld_d(T6, A0, 48);
++    __ ld_d(T7, A0, 56);
++    __ addi_d(A0, A0, 64);
++    __ st_d(T0, A5, 0);
++    __ st_d(T1, A5, 8);
++    __ st_d(T2, A5, 16);
++    __ st_d(T3, A5, 24);
++    __ st_d(T4, A5, 32);
++    __ st_d(T5, A5, 40);
++    __ st_d(T6, A5, 48);
++    __ st_d(T7, A5, 56);
++    __ addi_d(A5, A5, 64);
++    __ bltu(A0, A4, loop);
++
++    __ bind(le32);
++    __ addi_d(A4, A2, -32);
++    __ bgeu(A0, A4, le16);
++    __ ld_d(T0, A0, 0);
++    __ ld_d(T1, A0, 8);
++    __ ld_d(T2, A0, 16);
++    __ ld_d(T3, A0, 24);
++    __ addi_d(A0, A0, 32);
++    __ st_d(T0, A5, 0);
++    __ st_d(T1, A5, 8);
++    __ st_d(T2, A5, 16);
++    __ st_d(T3, A5, 24);
++    __ addi_d(A5, A5, 32);
++
++    __ bind(le16);
++    __ addi_d(A4, A2, -16);
++    __ bgeu(A0, A4, le8);
++    __ ld_d(T0, A0, 0);
++    __ ld_d(T1, A0, 8);
++    __ addi_d(A0, A0, 16);
++    __ st_d(T0, A5, 0);
++    __ st_d(T1, A5, 8);
++    __ addi_d(A5, A5, 16);
++
++    __ bind(le8);
++    __ addi_d(A4, A2, -8);
++    __ bgeu(A0, A4, lt8);
++    __ ld_d(T0, A0, 0);
++    __ st_d(T0, A5, 0);
++
++    __ bind(lt8);
++    __ st_d(A6, A1, 0);
++    __ st_d(A7, A3, -8);
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // disjoint large copy lsx
++  void generate_disjoint_large_copy_lsx(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label loop, le64, le32, le16, lt16;
++
++    __ bind(entry);
++    __ add_d(A3, A1, A2);
++    __ add_d(A2, A0, A2);
++    __ vld(F0, A0, 0);
++    __ vld(F1, A2, -16);
++
++    __ andi(T1, A0, 15);
++    __ sub_d(T0, R0, T1);
++    __ addi_d(T0, T0, 16);
++
++    __ add_d(A0, A0, T0);
++    __ add_d(A5, A1, T0);
++
++    __ addi_d(A4, A2, -128);
++    __ bgeu(A0, A4, le64);
++
++    __ bind(loop);
++    __ vld(FT0, A0, 0);
++    __ vld(FT1, A0, 16);
++    __ vld(FT2, A0, 32);
++    __ vld(FT3, A0, 48);
++    __ vld(FT4, A0, 64);
++    __ vld(FT5, A0, 80);
++    __ vld(FT6, A0, 96);
++    __ vld(FT7, A0, 112);
++    __ addi_d(A0, A0, 128);
++    __ vst(FT0, A5, 0);
++    __ vst(FT1, A5, 16);
++    __ vst(FT2, A5, 32);
++    __ vst(FT3, A5, 48);
++    __ vst(FT4, A5, 64);
++    __ vst(FT5, A5, 80);
++    __ vst(FT6, A5, 96);
++    __ vst(FT7, A5, 112);
++    __ addi_d(A5, A5, 128);
++    __ bltu(A0, A4, loop);
++
++    __ bind(le64);
++    __ addi_d(A4, A2, -64);
++    __ bgeu(A0, A4, le32);
++    __ vld(FT0, A0, 0);
++    __ vld(FT1, A0, 16);
++    __ vld(FT2, A0, 32);
++    __ vld(FT3, A0, 48);
++    __ addi_d(A0, A0, 64);
++    __ vst(FT0, A5, 0);
++    __ vst(FT1, A5, 16);
++    __ vst(FT2, A5, 32);
++    __ vst(FT3, A5, 48);
++    __ addi_d(A5, A5, 64);
++
++    __ bind(le32);
++    __ addi_d(A4, A2, -32);
++    __ bgeu(A0, A4, le16);
++    __ vld(FT0, A0, 0);
++    __ vld(FT1, A0, 16);
++    __ addi_d(A0, A0, 32);
++    __ vst(FT0, A5, 0);
++    __ vst(FT1, A5, 16);
++    __ addi_d(A5, A5, 32);
++
++    __ bind(le16);
++    __ addi_d(A4, A2, -16);
++    __ bgeu(A0, A4, lt16);
++    __ vld(FT0, A0, 0);
++    __ vst(FT0, A5, 0);
++
++    __ bind(lt16);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A3, -16);
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // disjoint large copy lasx
++  void generate_disjoint_large_copy_lasx(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label loop, le128, le64, le32, lt32;
++
++    __ bind(entry);
++    __ add_d(A3, A1, A2);
++    __ add_d(A2, A0, A2);
++    __ xvld(F0, A0, 0);
++    __ xvld(F1, A2, -32);
++
++    __ andi(T1, A0, 31);
++    __ sub_d(T0, R0, T1);
++    __ addi_d(T0, T0, 32);
++
++    __ add_d(A0, A0, T0);
++    __ add_d(A5, A1, T0);
++
++    __ addi_d(A4, A2, -256);
++    __ bgeu(A0, A4, le128);
++
++    __ bind(loop);
++    __ xvld(FT0, A0, 0);
++    __ xvld(FT1, A0, 32);
++    __ xvld(FT2, A0, 64);
++    __ xvld(FT3, A0, 96);
++    __ xvld(FT4, A0, 128);
++    __ xvld(FT5, A0, 160);
++    __ xvld(FT6, A0, 192);
++    __ xvld(FT7, A0, 224);
++    __ addi_d(A0, A0, 256);
++    __ xvst(FT0, A5, 0);
++    __ xvst(FT1, A5, 32);
++    __ xvst(FT2, A5, 64);
++    __ xvst(FT3, A5, 96);
++    __ xvst(FT4, A5, 128);
++    __ xvst(FT5, A5, 160);
++    __ xvst(FT6, A5, 192);
++    __ xvst(FT7, A5, 224);
++    __ addi_d(A5, A5, 256);
++    __ bltu(A0, A4, loop);
++
++    __ bind(le128);
++    __ addi_d(A4, A2, -128);
++    __ bgeu(A0, A4, le64);
++    __ xvld(FT0, A0, 0);
++    __ xvld(FT1, A0, 32);
++    __ xvld(FT2, A0, 64);
++    __ xvld(FT3, A0, 96);
++    __ addi_d(A0, A0, 128);
++    __ xvst(FT0, A5, 0);
++    __ xvst(FT1, A5, 32);
++    __ xvst(FT2, A5, 64);
++    __ xvst(FT3, A5, 96);
++    __ addi_d(A5, A5, 128);
++
++    __ bind(le64);
++    __ addi_d(A4, A2, -64);
++    __ bgeu(A0, A4, le32);
++    __ xvld(FT0, A0, 0);
++    __ xvld(FT1, A0, 32);
++    __ addi_d(A0, A0, 64);
++    __ xvst(FT0, A5, 0);
++    __ xvst(FT1, A5, 32);
++    __ addi_d(A5, A5, 64);
++
++    __ bind(le32);
++    __ addi_d(A4, A2, -32);
++    __ bgeu(A0, A4, lt32);
++    __ xvld(FT0, A0, 0);
++    __ xvst(FT0, A5, 0);
++
++    __ bind(lt32);
++    __ xvst(F0, A1, 0);
++    __ xvst(F1, A3, -32);
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // conjoint large copy
++  void generate_conjoint_large_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label loop, le32, le16, le8, lt8;
++
++    __ bind(entry);
++    __ add_d(A3, A1, A2);
++    __ add_d(A2, A0, A2);
++    __ ld_d(A6, A0, 0);
++    __ ld_d(A7, A2, -8);
++
++    __ andi(T1, A2, 7);
++    __ sub_d(A2, A2, T1);
++    __ sub_d(A5, A3, T1);
++
++    __ addi_d(A4, A0, 64);
++    __ bgeu(A4, A2, le32);
++
++    __ bind(loop);
++    __ ld_d(T0, A2, -8);
++    __ ld_d(T1, A2, -16);
++    __ ld_d(T2, A2, -24);
++    __ ld_d(T3, A2, -32);
++    __ ld_d(T4, A2, -40);
++    __ ld_d(T5, A2, -48);
++    __ ld_d(T6, A2, -56);
++    __ ld_d(T7, A2, -64);
++    __ addi_d(A2, A2, -64);
++    __ st_d(T0, A5, -8);
++    __ st_d(T1, A5, -16);
++    __ st_d(T2, A5, -24);
++    __ st_d(T3, A5, -32);
++    __ st_d(T4, A5, -40);
++    __ st_d(T5, A5, -48);
++    __ st_d(T6, A5, -56);
++    __ st_d(T7, A5, -64);
++    __ addi_d(A5, A5, -64);
++    __ bltu(A4, A2, loop);
++
++    __ bind(le32);
++    __ addi_d(A4, A0, 32);
++    __ bgeu(A4, A2, le16);
++    __ ld_d(T0, A2, -8);
++    __ ld_d(T1, A2, -16);
++    __ ld_d(T2, A2, -24);
++    __ ld_d(T3, A2, -32);
++    __ addi_d(A2, A2, -32);
++    __ st_d(T0, A5, -8);
++    __ st_d(T1, A5, -16);
++    __ st_d(T2, A5, -24);
++    __ st_d(T3, A5, -32);
++    __ addi_d(A5, A5, -32);
++
++    __ bind(le16);
++    __ addi_d(A4, A0, 16);
++    __ bgeu(A4, A2, le8);
++    __ ld_d(T0, A2, -8);
++    __ ld_d(T1, A2, -16);
++    __ addi_d(A2, A2, -16);
++    __ st_d(T0, A5, -8);
++    __ st_d(T1, A5, -16);
++    __ addi_d(A5, A5, -16);
++
++    __ bind(le8);
++    __ addi_d(A4, A0, 8);
++    __ bgeu(A4, A2, lt8);
++    __ ld_d(T0, A2, -8);
++    __ st_d(T0, A5, -8);
++
++    __ bind(lt8);
++    __ st_d(A6, A1, 0);
++    __ st_d(A7, A3, -8);
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // conjoint large copy lsx
++  void generate_conjoint_large_copy_lsx(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label loop, le64, le32, le16, lt16;
++
++    __ bind(entry);
++    __ add_d(A3, A1, A2);
++    __ add_d(A2, A0, A2);
++    __ vld(F0, A0, 0);
++    __ vld(F1, A2, -16);
++
++    __ andi(T1, A2, 15);
++    __ sub_d(A2, A2, T1);
++    __ sub_d(A5, A3, T1);
++
++    __ addi_d(A4, A0, 128);
++    __ bgeu(A4, A2, le64);
++
++    __ bind(loop);
++    __ vld(FT0, A2, -16);
++    __ vld(FT1, A2, -32);
++    __ vld(FT2, A2, -48);
++    __ vld(FT3, A2, -64);
++    __ vld(FT4, A2, -80);
++    __ vld(FT5, A2, -96);
++    __ vld(FT6, A2, -112);
++    __ vld(FT7, A2, -128);
++    __ addi_d(A2, A2, -128);
++    __ vst(FT0, A5, -16);
++    __ vst(FT1, A5, -32);
++    __ vst(FT2, A5, -48);
++    __ vst(FT3, A5, -64);
++    __ vst(FT4, A5, -80);
++    __ vst(FT5, A5, -96);
++    __ vst(FT6, A5, -112);
++    __ vst(FT7, A5, -128);
++    __ addi_d(A5, A5, -128);
++    __ bltu(A4, A2, loop);
++
++    __ bind(le64);
++    __ addi_d(A4, A0, 64);
++    __ bgeu(A4, A2, le32);
++    __ vld(FT0, A2, -16);
++    __ vld(FT1, A2, -32);
++    __ vld(FT2, A2, -48);
++    __ vld(FT3, A2, -64);
++    __ addi_d(A2, A2, -64);
++    __ vst(FT0, A5, -16);
++    __ vst(FT1, A5, -32);
++    __ vst(FT2, A5, -48);
++    __ vst(FT3, A5, -64);
++    __ addi_d(A5, A5, -64);
++
++    __ bind(le32);
++    __ addi_d(A4, A0, 32);
++    __ bgeu(A4, A2, le16);
++    __ vld(FT0, A2, -16);
++    __ vld(FT1, A2, -32);
++    __ addi_d(A2, A2, -32);
++    __ vst(FT0, A5, -16);
++    __ vst(FT1, A5, -32);
++    __ addi_d(A5, A5, -32);
++
++    __ bind(le16);
++    __ addi_d(A4, A0, 16);
++    __ bgeu(A4, A2, lt16);
++    __ vld(FT0, A2, -16);
++    __ vst(FT0, A5, -16);
++
++    __ bind(lt16);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A3, -16);
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // conjoint large copy lasx
++  void generate_conjoint_large_copy_lasx(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label loop, le128, le64, le32, lt32;
++
++    __ bind(entry);
++    __ add_d(A3, A1, A2);
++    __ add_d(A2, A0, A2);
++    __ xvld(F0, A0, 0);
++    __ xvld(F1, A2, -32);
++
++    __ andi(T1, A2, 31);
++    __ sub_d(A2, A2, T1);
++    __ sub_d(A5, A3, T1);
++
++    __ addi_d(A4, A0, 256);
++    __ bgeu(A4, A2, le128);
++
++    __ bind(loop);
++    __ xvld(FT0, A2, -32);
++    __ xvld(FT1, A2, -64);
++    __ xvld(FT2, A2, -96);
++    __ xvld(FT3, A2, -128);
++    __ xvld(FT4, A2, -160);
++    __ xvld(FT5, A2, -192);
++    __ xvld(FT6, A2, -224);
++    __ xvld(FT7, A2, -256);
++    __ addi_d(A2, A2, -256);
++    __ xvst(FT0, A5, -32);
++    __ xvst(FT1, A5, -64);
++    __ xvst(FT2, A5, -96);
++    __ xvst(FT3, A5, -128);
++    __ xvst(FT4, A5, -160);
++    __ xvst(FT5, A5, -192);
++    __ xvst(FT6, A5, -224);
++    __ xvst(FT7, A5, -256);
++    __ addi_d(A5, A5, -256);
++    __ bltu(A4, A2, loop);
++
++    __ bind(le128);
++    __ addi_d(A4, A0, 128);
++    __ bgeu(A4, A2, le64);
++    __ xvld(FT0, A2, -32);
++    __ xvld(FT1, A2, -64);
++    __ xvld(FT2, A2, -96);
++    __ xvld(FT3, A2, -128);
++    __ addi_d(A2, A2, -128);
++    __ xvst(FT0, A5, -32);
++    __ xvst(FT1, A5, -64);
++    __ xvst(FT2, A5, -96);
++    __ xvst(FT3, A5, -128);
++    __ addi_d(A5, A5, -128);
++
++    __ bind(le64);
++    __ addi_d(A4, A0, 64);
++    __ bgeu(A4, A2, le32);
++    __ xvld(FT0, A2, -32);
++    __ xvld(FT1, A2, -64);
++    __ addi_d(A2, A2, -64);
++    __ xvst(FT0, A5, -32);
++    __ xvst(FT1, A5, -64);
++    __ addi_d(A5, A5, -64);
++
++    __ bind(le32);
++    __ addi_d(A4, A0, 32);
++    __ bgeu(A4, A2, lt32);
++    __ xvld(FT0, A2, -32);
++    __ xvst(FT0, A5, -32);
++
++    __ bind(lt32);
++    __ xvst(F0, A1, 0);
++    __ xvst(F1, A3, -32);
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // Byte small copy: less than { int:9, lsx:17, lasx:33 } elements.
++  void generate_byte_small_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ ld_b(AT, A0, 0);
++    __ st_b(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ ld_h(AT, A0, 0);
++    __ st_h(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ ld_h(AT, A0, 0);
++    __ ld_b(A2, A0, 2);
++    __ st_h(AT, A1, 0);
++    __ st_b(A2, A1, 2);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 4:
++    __ ld_w(AT, A0, 0);
++    __ st_w(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ ld_w(AT, A0, 0);
++    __ ld_b(A2, A0, 4);
++    __ st_w(AT, A1, 0);
++    __ st_b(A2, A1, 4);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 6:
++    __ ld_w(AT, A0, 0);
++    __ ld_h(A2, A0, 4);
++    __ st_w(AT, A1, 0);
++    __ st_h(A2, A1, 4);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 7:
++    __ ld_w(AT, A0, 0);
++    __ ld_w(A2, A0, 3);
++    __ st_w(AT, A1, 0);
++    __ st_w(A2, A1, 3);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 8:
++    __ ld_d(AT, A0, 0);
++    __ st_d(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++
++    if (!UseLSX)
++        return;
++
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 9:
++    __ ld_d(AT, A0, 0);
++    __ ld_b(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_b(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 10:
++    __ ld_d(AT, A0, 0);
++    __ ld_h(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_h(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 11:
++    __ ld_d(AT, A0, 0);
++    __ ld_w(A2, A0, 7);
++    __ st_d(AT, A1, 0);
++    __ st_w(A2, A1, 7);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 12:
++    __ ld_d(AT, A0, 0);
++    __ ld_w(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_w(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 13:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 5);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 5);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 14:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 6);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 6);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 15:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 7);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 7);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 16:
++    __ vld(F0, A0, 0);
++    __ vst(F0, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    if (!UseLASX)
++        return;
++
++    // 17:
++    __ vld(F0, A0, 0);
++    __ ld_b(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_b(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 18:
++    __ vld(F0, A0, 0);
++    __ ld_h(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_h(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 19:
++    __ vld(F0, A0, 0);
++    __ ld_w(AT, A0, 15);
++    __ vst(F0, A1, 0);
++    __ st_w(AT, A1, 15);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 20:
++    __ vld(F0, A0, 0);
++    __ ld_w(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_w(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 21:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 13);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 13);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 22:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 14);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 14);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 23:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 15);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 15);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 24:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 25:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 9);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 9);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 26:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 10);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 10);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 27:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 11);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 11);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 28:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 12);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 12);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 29:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 13);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 13);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 30:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 14);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 14);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 31:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 15);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 15);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 32:
++    __ xvld(F0, A0, 0);
++    __ xvst(F0, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_byte_copy().
++  //
++  address generate_disjoint_byte_copy(bool aligned, Label &small, Label &large,
++                                      Label &large_aligned, const char * name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    if (UseLASX)
++      __ sltui(T0, A2, 33);
++    else if (UseLSX)
++      __ sltui(T0, A2, 17);
++    else
++      __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    if (large_aligned.is_bound()) {
++      __ orr(T0, A0, A1);
++      __ andi(T0, T0, 7);
++      __ beqz(T0, large_aligned);
++    }
++
++    __ b(large);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_byte_copy(bool aligned, Label &small, Label &large,
++                                      Label &large_aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    array_overlap_test(StubRoutines::jbyte_disjoint_arraycopy(), 0);
++
++    if (UseLASX)
++      __ sltui(T0, A2, 33);
++    else if (UseLSX)
++      __ sltui(T0, A2, 17);
++    else
++      __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    if (large_aligned.is_bound()) {
++      __ orr(T0, A0, A1);
++      __ andi(T0, T0, 7);
++      __ beqz(T0, large_aligned);
++    }
++
++    __ b(large);
++
++    return start;
++  }
++
++  // Short small copy: less than { int:9, lsx:9, lasx:17 } elements.
++  void generate_short_small_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ ld_h(AT, A0, 0);
++    __ st_h(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ ld_w(AT, A0, 0);
++    __ st_w(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ ld_w(AT, A0, 0);
++    __ ld_h(A2, A0, 4);
++    __ st_w(AT, A1, 0);
++    __ st_h(A2, A1, 4);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 4:
++    __ ld_d(AT, A0, 0);
++    __ st_d(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 5:
++    __ ld_d(AT, A0, 0);
++    __ ld_h(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_h(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 6:
++    __ ld_d(AT, A0, 0);
++    __ ld_w(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_w(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 7:
++    __ ld_d(AT, A0, 0);
++    __ ld_d(A2, A0, 6);
++    __ st_d(AT, A1, 0);
++    __ st_d(A2, A1, 6);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 8:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ vst(F0, A1, 0);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++
++    if (!UseLASX)
++        return;
++
++    __ nop();
++    __ nop();
++
++    // 9:
++    __ vld(F0, A0, 0);
++    __ ld_h(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_h(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 10:
++    __ vld(F0, A0, 0);
++    __ ld_w(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_w(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 11:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 14);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 14);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 12:
++    __ vld(F0, A0, 0);
++    __ ld_d(AT, A0, 16);
++    __ vst(F0, A1, 0);
++    __ st_d(AT, A1, 16);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 13:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 10);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 10);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 14:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 12);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 12);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 15:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 14);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 14);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 16:
++    __ xvld(F0, A0, 0);
++    __ xvst(F0, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_short_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_short_copy().
++  //
++  address generate_disjoint_short_copy(bool aligned, Label &small, Label &large,
++                                       Label &large_aligned, const char * name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    if (UseLASX)
++      __ sltui(T0, A2, 17);
++    else
++      __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    __ slli_d(A2, A2, 1);
++
++    if (large_aligned.is_bound()) {
++      __ orr(T0, A0, A1);
++      __ andi(T0, T0, 7);
++      __ beqz(T0, large_aligned);
++    }
++
++    __ b(large);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_short_copy(bool aligned, Label &small, Label &large,
++                                       Label &large_aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    array_overlap_test(StubRoutines::jshort_disjoint_arraycopy(), 1);
++
++    if (UseLASX)
++      __ sltui(T0, A2, 17);
++    else
++      __ sltui(T0, A2, 9);
++    __ bnez(T0, small);
++
++    __ slli_d(A2, A2, 1);
++
++    if (large_aligned.is_bound()) {
++      __ orr(T0, A0, A1);
++      __ andi(T0, T0, 7);
++      __ beqz(T0, large_aligned);
++    }
++
++    __ b(large);
++
++    return start;
++  }
++
++  // Int small copy: less than { int:7, lsx:7, lasx:9 } elements.
++  void generate_int_small_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ ld_w(AT, A0, 0);
++    __ st_w(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    __ ld_d(AT, A0, 0);
++    __ st_d(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 3:
++    __ ld_d(AT, A0, 0);
++    __ ld_w(A2, A0, 8);
++    __ st_d(AT, A1, 0);
++    __ st_w(A2, A1, 8);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 4:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ vst(F0, A1, 0);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++    __ nop();
++    __ nop();
++
++    // 5:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ ld_w(AT, A0, 16);
++      __ vst(F0, A1, 0);
++      __ st_w(AT, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ ld_w(A3, A0, 16);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ st_w(A3, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++
++    // 6:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ ld_d(AT, A0, 16);
++      __ vst(F0, A1, 0);
++      __ st_d(AT, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ ld_d(A3, A0, 16);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ st_d(A3, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++
++    if (!UseLASX)
++        return;
++
++    // 7:
++    __ vld(F0, A0, 0);
++    __ vld(F1, A0, 12);
++    __ vst(F0, A1, 0);
++    __ vst(F1, A1, 12);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++
++    // 8:
++    __ xvld(F0, A0, 0);
++    __ xvst(F0, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // Generate maybe oop copy
++  void gen_maybe_oop_copy(bool is_oop, bool disjoint, bool aligned, Label &small,
++                          Label &large, Label &large_aligned, const char *name,
++                          int small_limit, int log2_elem_size, bool dest_uninitialized = false) {
++    Label post, _large;
++    DecoratorSet decorators = 0;
++    BarrierSetAssembler *bs = NULL;
++
++    if (is_oop) {
++      decorators = IN_HEAP | IS_ARRAY;
++
++      if (disjoint) {
++        decorators |= ARRAYCOPY_DISJOINT;
++      }
++
++      if (aligned) {
++        decorators |= ARRAYCOPY_ALIGNED;
++      }
++
++      if (dest_uninitialized) {
++        decorators |= IS_DEST_UNINITIALIZED;
++      }
++
++      __ addi_d(SP, SP, -4 * wordSize);
++      __ st_d(A2, SP, 3 * wordSize);
++      __ st_d(A1, SP, 2 * wordSize);
++      __ st_d(A0, SP, 1 * wordSize);
++      __ st_d(RA, SP, 0 * wordSize);
++
++      bs = BarrierSet::barrier_set()->barrier_set_assembler();
++      bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2, RegSet());
++
++      __ ld_d(A2, SP, 3 * wordSize);
++      __ ld_d(A1, SP, 2 * wordSize);
++      __ ld_d(A0, SP, 1 * wordSize);
++    }
++
++    __ sltui(T0, A2, small_limit);
++    if (is_oop) {
++      __ beqz(T0, _large);
++      __ bl(small);
++      __ b(post);
++    } else {
++      __ bnez(T0, small);
++    }
++
++    __ bind(_large);
++    __ slli_d(A2, A2, log2_elem_size);
++
++    if (large_aligned.is_bound()) {
++      __ orr(T0, A0, A1);
++      __ andi(T0, T0, (1 << (log2_elem_size + 1)) - 1);
++      if (is_oop) {
++        Label skip;
++        __ bnez(T0, skip);
++        __ bl(large_aligned);
++        __ b(post);
++        __ bind(skip);
++      } else {
++        __ beqz(T0, large_aligned);
++      }
++    }
++
++    if (is_oop) {
++      __ bl(large);
++    } else {
++      __ b(large);
++    }
++
++    if (is_oop) {
++      __ bind(post);
++      __ ld_d(A2, SP, 3 * wordSize);
++      __ ld_d(A1, SP, 2 * wordSize);
++
++      bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1, RegSet());
++
++      __ ld_d(RA, SP, 0 * wordSize);
++      __ addi_d(SP, SP, 4 * wordSize);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, Label &small,
++                                         Label &large, Label &large_aligned, const char *name,
++                                         int small_limit, bool dest_uninitialized = false) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    gen_maybe_oop_copy(is_oop, true, aligned, small, large, large_aligned,
++                       name, small_limit, 2, dest_uninitialized);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, Label &small,
++                                         Label &large, Label &large_aligned, const char *name,
++                                         int small_limit, bool dest_uninitialized = false) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    if (is_oop) {
++      array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 2);
++    } else {
++      array_overlap_test(StubRoutines::jint_disjoint_arraycopy(), 2);
++    }
++
++    gen_maybe_oop_copy(is_oop, false, aligned, small, large, large_aligned,
++                       name, small_limit, 2, dest_uninitialized);
++
++    return start;
++  }
++
++  // Long small copy: less than { int:4, lsx:4, lasx:5 } elements.
++  void generate_long_small_copy(Label &entry, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Label L;
++    __ bind(entry);
++    __ lipc(AT, L);
++    __ slli_d(A2, A2, 5);
++    __ add_d(AT, AT, A2);
++    __ jr(AT);
++
++    __ bind(L);
++    // 0:
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 1:
++    __ ld_d(AT, A0, 0);
++    __ st_d(AT, A1, 0);
++    __ move(A0, R0);
++    __ jr(RA);
++    __ nop();
++    __ nop();
++    __ nop();
++    __ nop();
++
++    // 2:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ vst(F0, A1, 0);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++    __ nop();
++    __ nop();
++
++    // 3:
++    if (UseLSX) {
++      __ vld(F0, A0, 0);
++      __ ld_d(AT, A0, 16);
++      __ vst(F0, A1, 0);
++      __ st_d(AT, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++      __ nop();
++      __ nop();
++    } else {
++      __ ld_d(AT, A0, 0);
++      __ ld_d(A2, A0, 8);
++      __ ld_d(A3, A0, 16);
++      __ st_d(AT, A1, 0);
++      __ st_d(A2, A1, 8);
++      __ st_d(A3, A1, 16);
++      __ move(A0, R0);
++      __ jr(RA);
++    }
++
++    if (!UseLASX)
++      return;
++
++    // 4:
++    __ xvld(F0, A0, 0);
++    __ xvst(F0, A1, 0);
++
++    __ move(A0, R0);
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, Label &small,
++                                          Label &large, Label &large_aligned, const char *name,
++                                          int small_limit, bool dest_uninitialized = false) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    gen_maybe_oop_copy(is_oop, true, aligned, small, large, large_aligned,
++                       name, small_limit, 3, dest_uninitialized);
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0      - source array address
++  //   A1      - destination array address
++  //   A2      - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, Label &small,
++                                          Label &large, Label &large_aligned, const char *name,
++                                          int small_limit, bool dest_uninitialized = false) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    if (is_oop) {
++      array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 3);
++    } else {
++      array_overlap_test(StubRoutines::jlong_disjoint_arraycopy(), 3);
++    }
++
++    gen_maybe_oop_copy(is_oop, false, aligned, small, large, large_aligned,
++                       name, small_limit, 3, dest_uninitialized);
++
++    return start;
++  }
++
++  // Helper for generating a dynamic type check.
++  // Smashes scratch1, scratch2.
++  void generate_type_check(Register sub_klass,
++                           Register super_check_offset,
++                           Register super_klass,
++                           Register tmp1,
++                           Register tmp2,
++                           Label& L_success) {
++    assert_different_registers(sub_klass, super_check_offset, super_klass);
++
++    __ block_comment("type_check:");
++
++    Label L_miss;
++
++    __ check_klass_subtype_fast_path(sub_klass, super_klass, tmp1,       &L_success, &L_miss, NULL,
++                                     super_check_offset);
++    __ check_klass_subtype_slow_path(sub_klass, super_klass, tmp1, tmp2, &L_success, NULL);
++
++    // Fall through on failure!
++    __ bind(L_miss);
++  }
++
++  //
++  //  Generate checkcasting array copy stub
++  //
++  //  Input:
++  //    A0   - source array address
++  //    A1   - destination array address
++  //    A2   - element count, treated as ssize_t, can be zero
++  //    A3   - size_t ckoff (super_check_offset)
++  //    A4   - oop ckval (super_klass)
++  //
++  //  Output:
++  //    V0 ==  0  -  success
++  //    V0 == -1^K - failure, where K is partial transfer count
++  //
++  address generate_checkcast_copy(const char *name, bool dest_uninitialized = false) {
++    Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
++
++    // Input registers (after setup_arg_regs)
++    const Register from        = A0; // source array address
++    const Register to          = A1; // destination array address
++    const Register count       = A2; // elementscount
++    const Register ckoff       = A3; // super_check_offset
++    const Register ckval       = A4; // super_klass
++
++    RegSet wb_pre_saved_regs = RegSet::range(A0, A4);
++    RegSet wb_post_saved_regs = RegSet::of(count);
++
++    // Registers used as temps (S0, S1, S2, S3 are save-on-entry)
++    const Register copied_oop  = S0; // actual oop copied
++    const Register count_save  = S1; // orig elementscount
++    const Register start_to    = S2; // destination array start address
++    const Register oop_klass   = S3; // oop._klass
++    const Register tmp1        = A5;
++    const Register tmp2        = A6;
++
++    //---------------------------------------------------------------
++    // Assembler stub will be used for this call to arraycopy
++    // if the two arrays are subtypes of Object[] but the
++    // destination array type is not equal to or a supertype
++    // of the source type.  Each element must be separately
++    // checked.
++
++    assert_different_registers(from, to, count, ckoff, ckval, start_to,
++                               copied_oop, oop_klass, count_save);
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    // caller guarantees that the arrays really are different
++    // otherwise, we would have to make conjoint checks
++
++    // Caller of this entry point must set up the argument registers.
++    __ block_comment("Entry:");
++
++    // Empty array:  Nothing to do.
++    __ beqz(count, L_done);
++
++    __ push(RegSet::of(S0, S1, S2, S3, RA));
++
++#ifdef ASSERT
++    __ block_comment("assert consistent ckoff/ckval");
++    // The ckoff and ckval must be mutually consistent,
++    // even though caller generates both.
++    { Label L;
++      int sco_offset = in_bytes(Klass::super_check_offset_offset());
++      __ ld_w(start_to, Address(ckval, sco_offset));
++      __ beq(ckoff, start_to, L);
++      __ stop("super_check_offset inconsistent");
++      __ bind(L);
++    }
++#endif //ASSERT
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
++    bool is_oop = true;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, to, count, wb_pre_saved_regs);
++
++    // save the original count
++    __ move(count_save, count);
++
++    // Copy from low to high addresses
++    __ move(start_to, to); // Save destination array start address
++    __ b(L_load_element);
++
++    // ======== begin loop ========
++    // (Loop is rotated; its entry is L_load_element.)
++    // Loop control:
++    //   for (; count != 0; count--) {
++    //     copied_oop = load_heap_oop(from++);
++    //     ... generate_type_check ...;
++    //     store_heap_oop(to++, copied_oop);
++    //   }
++    __ align(OptoLoopAlignment);
++
++    __ bind(L_store_element);
++    __ store_heap_oop(Address(to, 0), copied_oop, tmp1, tmp2, AS_RAW); // store the oop
++    __ addi_d(to, to, UseCompressedOops ? 4 : 8);
++    __ addi_d(count, count, -1);
++    __ beqz(count, L_do_card_marks);
++
++    // ======== loop entry is here ========
++    __ bind(L_load_element);
++    __ load_heap_oop(copied_oop, Address(from, 0), tmp1, tmp2, AS_RAW); // load the oop
++    __ addi_d(from, from, UseCompressedOops ? 4 : 8);
++    __ beqz(copied_oop, L_store_element);
++
++    __ load_klass(oop_klass, copied_oop); // query the object klass
++    generate_type_check(oop_klass, ckoff, ckval, tmp1, tmp2, L_store_element);
++    // ======== end loop ========
++
++    // Register count = remaining oops, count_orig = total oops.
++    // Emit GC store barriers for the oops we have copied and report
++    // their number to the caller.
++
++    __ sub_d(tmp1, count_save, count); // K = partially copied oop count
++    __ nor(count, tmp1, R0); // report (-1^K) to caller
++    __ beqz(tmp1, L_done_pop);
++
++    __ bind(L_do_card_marks);
++
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, tmp2, wb_post_saved_regs);
++
++    __ bind(L_done_pop);
++    __ pop(RegSet::of(S0, S1, S2, S3, RA));
++
++#ifndef PRODUCT
++    __ li(SCR2, (address)&SharedRuntime::_checkcast_array_copy_ctr);
++    __ increment(Address(SCR2, 0), 1);
++#endif
++
++    __ bind(L_done);
++    __ move(A0, count);
++    __ jr(RA);
++
++    return start;
++  }
++
++  //
++  //  Generate 'unsafe' array copy stub
++  //  Though just as safe as the other stubs, it takes an unscaled
++  //  size_t argument instead of an element count.
++  //
++  //  Input:
++  //    A0   - source array address
++  //    A1   - destination array address
++  //    A2   - byte count, treated as ssize_t, can be zero
++  //
++  // Examines the alignment of the operands and dispatches
++  // to a long, int, short, or byte copy loop.
++  //
++  address generate_unsafe_copy(const char *name) {
++    Label L_long_aligned, L_int_aligned, L_short_aligned;
++    Register s = A0, d = A1, count = A2;
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    __ orr(AT, s, d);
++    __ orr(AT, AT, count);
++
++    __ andi(AT, AT, BytesPerLong-1);
++    __ beqz(AT, L_long_aligned);
++    __ andi(AT, AT, BytesPerInt-1);
++    __ beqz(AT, L_int_aligned);
++    __ andi(AT, AT, BytesPerShort-1);
++    __ beqz(AT, L_short_aligned);
++    __ b(StubRoutines::_jbyte_arraycopy);
++
++    __ bind(L_short_aligned);
++    __ srli_d(count, count, LogBytesPerShort);  // size => short_count
++    __ b(StubRoutines::_jshort_arraycopy);
++    __ bind(L_int_aligned);
++    __ srli_d(count, count, LogBytesPerInt);    // size => int_count
++    __ b(StubRoutines::_jint_arraycopy);
++    __ bind(L_long_aligned);
++    __ srli_d(count, count, LogBytesPerLong);   // size => long_count
++    __ b(StubRoutines::_jlong_arraycopy);
++
++    return start;
++  }
++
++  // Perform range checks on the proposed arraycopy.
++  // Kills temp, but nothing else.
++  // Also, clean the sign bits of src_pos and dst_pos.
++  void arraycopy_range_checks(Register src,     // source array oop (A0)
++                              Register src_pos, // source position (A1)
++                              Register dst,     // destination array oo (A2)
++                              Register dst_pos, // destination position (A3)
++                              Register length,
++                              Register temp,
++                              Label& L_failed) {
++    __ block_comment("arraycopy_range_checks:");
++
++    assert_different_registers(SCR1, temp);
++
++    // if (src_pos + length > arrayOop(src)->length()) FAIL;
++    __ ld_w(SCR1, Address(src, arrayOopDesc::length_offset_in_bytes()));
++    __ add_w(temp, length, src_pos);
++    __ bltu(SCR1, temp, L_failed);
++
++    //  if (dst_pos + length > arrayOop(dst)->length())  FAIL;
++    __ ld_w(SCR1, Address(dst, arrayOopDesc::length_offset_in_bytes()));
++    __ add_w(temp, length, dst_pos);
++    __ bltu(SCR1, temp, L_failed);
++
++    // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
++    __ move(src_pos, src_pos);
++    __ move(dst_pos, dst_pos);
++
++    __ block_comment("arraycopy_range_checks done");
++  }
++
++  //
++  //  Generate generic array copy stubs
++  //
++  //  Input:
++  //    A0    -  src oop
++  //    A1    -  src_pos (32-bits)
++  //    A2    -  dst oop
++  //    A3    -  dst_pos (32-bits)
++  //    A4    -  element count (32-bits)
++  //
++  //  Output:
++  //    V0 ==  0  -  success
++  //    V0 == -1^K - failure, where K is partial transfer count
++  //
++  address generate_generic_copy(const char *name) {
++    Label L_failed, L_objArray;
++    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
++
++    // Input registers
++    const Register src        = A0; // source array oop
++    const Register src_pos    = A1; // source position
++    const Register dst        = A2; // destination array oop
++    const Register dst_pos    = A3; // destination position
++    const Register length     = A4;
++
++    // Registers used as temps
++    const Register dst_klass  = A5;
++
++    __ align(CodeEntryAlignment);
++
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    address start = __ pc();
++
++#ifndef PRODUCT
++    // bump this on entry, not on exit:
++    __ li(SCR2, (address)&SharedRuntime::_generic_array_copy_ctr);
++    __ increment(Address(SCR2, 0), 1);
++#endif
++
++    //-----------------------------------------------------------------------
++    // Assembler stub will be used for this call to arraycopy
++    // if the following conditions are met:
++    //
++    // (1) src and dst must not be null.
++    // (2) src_pos must not be negative.
++    // (3) dst_pos must not be negative.
++    // (4) length  must not be negative.
++    // (5) src klass and dst klass should be the same and not NULL.
++    // (6) src and dst should be arrays.
++    // (7) src_pos + length must not exceed length of src.
++    // (8) dst_pos + length must not exceed length of dst.
++    //
++
++    // if (src == NULL) return -1;
++    __ beqz(src, L_failed);
++
++    // if (src_pos < 0) return -1;
++    __ blt(src_pos, R0, L_failed);
++
++    // if (dst == NULL) return -1;
++    __ beqz(dst, L_failed);
++
++    // if (dst_pos < 0) return -1;
++    __ blt(dst_pos, R0, L_failed);
++
++    // registers used as temp
++    const Register scratch_length    = T0; // elements count to copy
++    const Register scratch_src_klass = T1; // array klass
++    const Register lh                = T2; // layout helper
++    const Register tmp1              = T3;
++    const Register tmp2              = T4;
++
++    // if (length < 0) return -1;
++    __ move(scratch_length, length); // length (elements count, 32-bits value)
++    __ blt(scratch_length, R0, L_failed);
++
++    __ load_klass(scratch_src_klass, src);
++#ifdef ASSERT
++    // assert(src->klass() != NULL);
++    {
++      __ block_comment("assert klasses not null {");
++      Label L1, L2;
++      __ bnez(scratch_src_klass, L2); // it is broken if klass is NULL
++      __ bind(L1);
++      __ stop("broken null klass");
++      __ bind(L2);
++      __ load_klass(SCR2, dst);
++      __ beqz(SCR2, L1);     // this would be broken also
++      __ block_comment("} assert klasses not null done");
++    }
++#endif
++
++    // Load layout helper (32-bits)
++    //
++    //  |array_tag|     | header_size | element_type |     |log2_element_size|
++    // 32        30    24            16              8     2                 0
++    //
++    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
++    //
++
++    const int lh_offset = in_bytes(Klass::layout_helper_offset());
++
++    // Handle objArrays completely differently...
++    const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
++    __ ld_w(lh, Address(scratch_src_klass, lh_offset));
++    __ li(SCR1, objArray_lh);
++    __ xorr(SCR2, lh, SCR1);
++    __ beqz(SCR2, L_objArray);
++
++    // if (src->klass() != dst->klass()) return -1;
++    __ load_klass(SCR2, dst);
++    __ xorr(SCR2, SCR2, scratch_src_klass);
++    __ bnez(SCR2, L_failed);
++
++    // if (!src->is_Array()) return -1;
++    __ bge(lh, R0, L_failed); // i.e. (lh >= 0)
++
++    // At this point, it is known to be a typeArray (array_tag 0x3).
++#ifdef ASSERT
++    {
++      __ block_comment("assert primitive array {");
++      Label L;
++      __ li(SCR2, (int)(Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
++      __ bge(lh, SCR2, L);
++      __ stop("must be a primitive array");
++      __ bind(L);
++      __ block_comment("} assert primitive array done");
++    }
++#endif
++
++    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, SCR2, L_failed);
++
++    // TypeArrayKlass
++    //
++    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
++    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
++    //
++
++    const Register scr1_offset = SCR1; // array offset
++    const Register elsize = lh; // element size
++
++    __ bstrpick_d(scr1_offset, lh, Klass::_lh_header_size_shift +
++                  exact_log2(Klass::_lh_header_size_mask+1) - 1,
++                  Klass::_lh_header_size_shift); // array_offset
++    __ add_d(src, src, scr1_offset); // src array offset
++    __ add_d(dst, dst, scr1_offset); // dst array offset
++    __ block_comment("choose copy loop based on element size");
++
++    // next registers should be set before the jump to corresponding stub
++    const Register from     = A0; // source array address
++    const Register to       = A1; // destination array address
++    const Register count    = A2; // elements count
++
++    // 'from', 'to', 'count' registers should be set in such order
++    // since they are the same as 'src', 'src_pos', 'dst'.
++
++    assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
++
++    // The possible values of elsize are 0-3, i.e. exact_log2(element
++    // size in bytes).  We do a simple bitwise binary search.
++    __ bind(L_copy_bytes);
++    __ andi(tmp1, elsize, 2);
++    __ bnez(tmp1, L_copy_ints);
++    __ andi(tmp1, elsize, 1);
++    __ bnez(tmp1, L_copy_shorts);
++    __ lea(from, Address(src, src_pos, Address::times_1)); // src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::times_1)); // dst_addr
++    __ move(count, scratch_length); // length
++    __ b(StubRoutines::_jbyte_arraycopy);
++
++    __ bind(L_copy_shorts);
++    __ lea(from, Address(src, src_pos, Address::times_2)); // src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::times_2)); // dst_addr
++    __ move(count, scratch_length); // length
++    __ b(StubRoutines::_jshort_arraycopy);
++
++    __ bind(L_copy_ints);
++    __ andi(tmp1, elsize, 1);
++    __ bnez(tmp1, L_copy_longs);
++    __ lea(from, Address(src, src_pos, Address::times_4)); // src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::times_4)); // dst_addr
++    __ move(count, scratch_length); // length
++    __ b(StubRoutines::_jint_arraycopy);
++
++    __ bind(L_copy_longs);
++#ifdef ASSERT
++    {
++      __ block_comment("assert long copy {");
++      Label L;
++      __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> elsize
++      __ li(tmp1, LogBytesPerLong);
++      __ beq(elsize, tmp1, L);
++      __ stop("must be long copy, but elsize is wrong");
++      __ bind(L);
++      __ block_comment("} assert long copy done");
++    }
++#endif
++    __ lea(from, Address(src, src_pos, Address::times_8)); // src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::times_8)); // dst_addr
++    __ move(count, scratch_length); // length
++    __ b(StubRoutines::_jlong_arraycopy);
++
++    // ObjArrayKlass
++    __ bind(L_objArray);
++    // live at this point:  scratch_src_klass, scratch_length, src[_pos], dst[_pos]
++
++    Label L_plain_copy, L_checkcast_copy;
++    //  test array classes for subtyping
++    __ load_klass(tmp1, dst);
++    __ bne(scratch_src_klass, tmp1, L_checkcast_copy); // usual case is exact equality
++
++    // Identically typed arrays can be copied without element-wise checks.
++    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, SCR2, L_failed);
++
++    __ lea(from, Address(src, src_pos, Address::ScaleFactor(LogBytesPerHeapOop)));
++    __ addi_d(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++    __ lea(to, Address(dst, dst_pos, Address::ScaleFactor(LogBytesPerHeapOop)));
++    __ addi_d(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++    __ move(count, scratch_length); // length
++    __ bind(L_plain_copy);
++    __ b(StubRoutines::_oop_arraycopy);
++
++    __ bind(L_checkcast_copy);
++    // live at this point:  scratch_src_klass, scratch_length, tmp1 (dst_klass)
++    {
++      // Before looking at dst.length, make sure dst is also an objArray.
++      __ ld_w(SCR1, Address(tmp1, lh_offset));
++      __ li(SCR2, objArray_lh);
++      __ xorr(SCR1, SCR1, SCR2);
++      __ bnez(SCR1, L_failed);
++
++      // It is safe to examine both src.length and dst.length.
++      arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, tmp1, L_failed);
++
++      __ load_klass(dst_klass, dst); // reload
++
++      // Marshal the base address arguments now, freeing registers.
++      __ lea(from, Address(src, src_pos, Address::ScaleFactor(LogBytesPerHeapOop)));
++      __ addi_d(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++      __ lea(to, Address(dst, dst_pos, Address::ScaleFactor(LogBytesPerHeapOop)));
++      __ addi_d(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++      __ move(count, length); // length (reloaded)
++      Register sco_temp = A3; // this register is free now
++      assert_different_registers(from, to, count, sco_temp, dst_klass, scratch_src_klass);
++      // assert_clean_int(count, sco_temp);
++
++      // Generate the type check.
++      const int sco_offset = in_bytes(Klass::super_check_offset_offset());
++      __ ld_w(sco_temp, Address(dst_klass, sco_offset));
++
++      // Smashes SCR1, SCR2
++      generate_type_check(scratch_src_klass, sco_temp, dst_klass, tmp1, tmp2, L_plain_copy);
++
++      // Fetch destination element klass from the ObjArrayKlass header.
++      int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
++      __ ld_d(dst_klass, Address(dst_klass, ek_offset));
++      __ ld_w(sco_temp, Address(dst_klass, sco_offset));
++
++      // the checkcast_copy loop needs two extra arguments:
++      assert(A3 == sco_temp, "#3 already in place");
++      // Set up arguments for checkcast_arraycopy.
++      __ move(A4, dst_klass); // dst.klass.element_klass
++      __ b(StubRoutines::_checkcast_arraycopy);
++    }
++
++    __ bind(L_failed);
++    __ li(V0, -1);
++    __ jr(RA);
++
++    return start;
++  }
++
++  void generate_arraycopy_stubs() {
++    Label disjoint_large_copy, conjoint_large_copy;
++    Label disjoint_large_copy_lsx, conjoint_large_copy_lsx;
++    Label disjoint_large_copy_lasx, conjoint_large_copy_lasx;
++    Label byte_small_copy, short_small_copy, int_small_copy, long_small_copy;
++    Label none;
++
++    generate_disjoint_large_copy(disjoint_large_copy, "disjoint_large_copy");
++    generate_conjoint_large_copy(conjoint_large_copy, "conjoint_large_copy");
++    if (UseLSX) {
++      generate_disjoint_large_copy_lsx(disjoint_large_copy_lsx, "disjoint_large_copy_lsx");
++      generate_conjoint_large_copy_lsx(conjoint_large_copy_lsx, "conjoint_large_copy_lsx");
++    }
++    if (UseLASX) {
++      generate_disjoint_large_copy_lasx(disjoint_large_copy_lasx, "disjoint_large_copy_lasx");
++      generate_conjoint_large_copy_lasx(conjoint_large_copy_lasx, "conjoint_large_copy_lasx");
++    }
++    generate_byte_small_copy(byte_small_copy, "jbyte_small_copy");
++    generate_short_small_copy(short_small_copy, "jshort_small_copy");
++    generate_int_small_copy(int_small_copy, "jint_small_copy");
++    generate_long_small_copy(long_small_copy, "jlong_small_copy");
++
++    if (UseCompressedOops) {
++      if (UseLSX) {
++        StubRoutines::_oop_disjoint_arraycopy        = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "oop_disjoint_arraycopy", 7);
++        StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "oop_disjoint_arraycopy_uninit", 7, true);
++      } else {
++        StubRoutines::_oop_disjoint_arraycopy        = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy", 7);
++        StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy_uninit", 7, true);
++      }
++      if (UseLASX) {
++        StubRoutines::_oop_arraycopy                 = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "oop_arraycopy", 9);
++        StubRoutines::_oop_arraycopy_uninit          = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "oop_arraycopy_uninit", 9, true);
++      } else if (UseLSX) {
++        StubRoutines::_oop_arraycopy                 = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "oop_arraycopy", 7);
++        StubRoutines::_oop_arraycopy_uninit          = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "oop_arraycopy_uninit", 7, true);
++      } else {
++        StubRoutines::_oop_arraycopy                 = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, none, "oop_arraycopy", 7);
++        StubRoutines::_oop_arraycopy_uninit          = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, none, "oop_arraycopy_uninit", 7, true);
++      }
++    } else {
++      if (UseLASX) {
++        StubRoutines::_oop_disjoint_arraycopy        = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lasx, "oop_disjoint_arraycopy", 5);
++        StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lasx, "oop_disjoint_arraycopy_uninit", 5, true);
++        StubRoutines::_oop_arraycopy                 = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lasx, "oop_arraycopy", 5);
++        StubRoutines::_oop_arraycopy_uninit          = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lasx, "oop_arraycopy_uninit", 5, true);
++      } else if (UseLSX) {
++        StubRoutines::_oop_disjoint_arraycopy        = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lsx, "oop_disjoint_arraycopy", 4);
++        StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lsx, "oop_disjoint_arraycopy_uninit", 4, true);
++        StubRoutines::_oop_arraycopy                 = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "oop_arraycopy", 4);
++        StubRoutines::_oop_arraycopy_uninit          = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "oop_arraycopy_uninit", 4, true);
++      } else {
++        StubRoutines::_oop_disjoint_arraycopy        = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy", 4);
++        StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy_uninit", 4, true);
++        StubRoutines::_oop_arraycopy                 = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, none, "oop_arraycopy", 4);
++        StubRoutines::_oop_arraycopy_uninit          = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "oop_arraycopy_uninit", 4, true);
++      }
++    }
++
++    if (UseLASX) {
++      StubRoutines::_jbyte_disjoint_arraycopy        = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy_lasx, disjoint_large_copy_lsx, "jbyte_disjoint_arraycopy");
++      StubRoutines::_jshort_disjoint_arraycopy       = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy_lasx, disjoint_large_copy, "jshort_disjoint_arraycopy");
++      StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy_lasx, disjoint_large_copy, "jint_disjoint_arraycopy", 9);
++
++      StubRoutines::_jbyte_arraycopy                 = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy_lasx, conjoint_large_copy_lsx, "jbyte_arraycopy");
++      StubRoutines::_jshort_arraycopy                = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "jshort_arraycopy");
++      StubRoutines::_jint_arraycopy                  = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "jint_arraycopy", 9);
++    } else if (UseLSX) {
++      StubRoutines::_jbyte_disjoint_arraycopy        = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy_lsx, none, "jbyte_disjoint_arraycopy");
++      StubRoutines::_jshort_disjoint_arraycopy       = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "jshort_disjoint_arraycopy");
++      StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "jint_disjoint_arraycopy", 7);
++
++      StubRoutines::_jbyte_arraycopy                 = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy_lsx, none, "jbyte_arraycopy");
++      StubRoutines::_jshort_arraycopy                = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "jshort_arraycopy");
++      StubRoutines::_jint_arraycopy                  = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "jint_arraycopy", 7);
++    } else {
++      StubRoutines::_jbyte_disjoint_arraycopy        = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy, none, "jbyte_disjoint_arraycopy");
++      StubRoutines::_jshort_disjoint_arraycopy       = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy, none, "jshort_disjoint_arraycopy");
++      StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy, none, "jint_disjoint_arraycopy", 7);
++
++      StubRoutines::_jbyte_arraycopy                 = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy, none, "jbyte_arraycopy");
++      StubRoutines::_jshort_arraycopy                = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy, none, "jshort_arraycopy");
++      StubRoutines::_jint_arraycopy                  = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy, none, "jint_arraycopy", 7);
++    }
++
++    if (UseLASX) {
++      StubRoutines::_jlong_disjoint_arraycopy        = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, disjoint_large_copy_lasx, "jlong_disjoint_arraycopy", 5);
++      StubRoutines::_jlong_arraycopy                 = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, conjoint_large_copy_lasx, "jlong_arraycopy", 5);
++    } else if (UseLSX) {
++      StubRoutines::_jlong_disjoint_arraycopy        = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, disjoint_large_copy_lsx, "jlong_disjoint_arraycopy", 4);
++      StubRoutines::_jlong_arraycopy                 = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "jlong_arraycopy", 4);
++    } else {
++      StubRoutines::_jlong_disjoint_arraycopy        = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, none, "jlong_disjoint_arraycopy", 4);
++      StubRoutines::_jlong_arraycopy                 = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, none, "jlong_arraycopy", 4);
++    }
++
++    // We don't generate specialized code for HeapWord-aligned source
++    // arrays, so just use the code we've already generated
++    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
++    StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
++
++    StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
++    StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
++
++    StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
++    StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
++
++    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
++    StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
++    StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
++    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
++
++    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy");
++    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", true);
++
++    StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy");
++
++    StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy");
++
++    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
++    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
++    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
++    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
++    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
++    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - source byte array address
++  //   A1        - destination byte array address
++  //   A2        - K (key) in little endian int array
++  //   A3        - r vector byte array address
++  //   A4        - input length
++  //
++  // Output:
++  //   A0        - input length
++  //
++  address generate_aescrypt_encryptBlock(bool cbc) {
++    static const uint32_t ft_consts[256] = {
++      0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
++      0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
++      0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
++      0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
++      0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
++      0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
++      0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
++      0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
++      0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
++      0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
++      0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
++      0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
++      0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
++      0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
++      0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
++      0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
++      0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
++      0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
++      0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
++      0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
++      0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
++      0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
++      0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
++      0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
++      0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
++      0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
++      0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
++      0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
++      0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
++      0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
++      0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
++      0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
++      0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
++      0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
++      0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
++      0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
++      0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
++      0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
++      0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
++      0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
++      0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
++      0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
++      0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
++      0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
++      0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
++      0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
++      0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
++      0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
++      0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
++      0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
++      0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
++      0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
++      0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
++      0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
++      0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
++      0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
++      0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
++      0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
++      0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
++      0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
++      0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
++      0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
++      0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
++      0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
++    };
++    static const uint8_t fsb_consts[256] = {
++      0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
++      0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
++      0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
++      0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
++      0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
++      0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
++      0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
++      0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
++      0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
++      0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
++      0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
++      0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
++      0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
++      0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
++      0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
++      0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
++      0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
++      0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
++      0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
++      0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
++      0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
++      0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
++      0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
++      0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
++      0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
++      0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
++      0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
++      0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
++      0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
++      0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
++      0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
++      0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
++    };
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
++
++    // Allocate registers
++    Register src = A0;
++    Register dst = A1;
++    Register key = A2;
++    Register rve = A3;
++    Register srclen = A4;
++    Register keylen = T8;
++    Register srcend = A5;
++    Register keyold = A6;
++    Register t0 = A7;
++    Register t1, t2, t3, ftp;
++    Register xa[4] = { T0, T1, T2, T3 };
++    Register ya[4] = { T4, T5, T6, T7 };
++
++    Label loop, tail, done;
++    address start = __ pc();
++
++    if (cbc) {
++      t1 = S0;
++      t2 = S1;
++      t3 = S2;
++      ftp = S3;
++
++      __ beqz(srclen, done);
++
++      __ addi_d(SP, SP, -4 * wordSize);
++      __ st_d(S3, SP, 3 * wordSize);
++      __ st_d(S2, SP, 2 * wordSize);
++      __ st_d(S1, SP, 1 * wordSize);
++      __ st_d(S0, SP, 0 * wordSize);
++
++      __ add_d(srcend, src, srclen);
++      __ move(keyold, key);
++    } else {
++      t1 = A3;
++      t2 = A4;
++      t3 = A5;
++      ftp = A6;
++    }
++
++    __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    // Round 1
++    if (cbc) {
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xa[i], rve, 4 * i);
++      }
++
++      __ bind(loop);
++
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(ya[i], src, 4 * i);
++      }
++      for (int i = 0; i < 4; i++) {
++        __ XOR(xa[i], xa[i], ya[i]);
++      }
++    } else {
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xa[i], src, 4 * i);
++      }
++    }
++    for (int i = 0; i < 4; i++) {
++      __ ld_w(ya[i], key, 4 * i);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ XOR(xa[i], xa[i], ya[i]);
++    }
++
++    __ li(ftp, (intptr_t)ft_consts);
++
++    // Round 2 - (N-1)
++    for (int r = 0; r < 14; r++) {
++      Register *xp;
++      Register *yp;
++
++      if (r & 1) {
++        xp = xa;
++        yp = ya;
++      } else {
++        xp = ya;
++        yp = xa;
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xp[i], key, 4 * (4 * (r + 1) + i));
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ bstrpick_d(t0, yp[(i + 3) & 3], 7, 0);
++        __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8);
++        __ bstrpick_d(t2, yp[(i + 1) & 3], 23, 16);
++        __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24);
++        __ slli_w(t0, t0, 2);
++        __ slli_w(t1, t1, 2);
++        __ slli_w(t2, t2, 2);
++        __ slli_w(t3, t3, 2);
++        __ ldx_w(t0, ftp, t0);
++        __ ldx_w(t1, ftp, t1);
++        __ ldx_w(t2, ftp, t2);
++        __ ldx_w(t3, ftp, t3);
++        __ rotri_w(t0, t0, 24);
++        __ rotri_w(t1, t1, 16);
++        __ rotri_w(t2, t2, 8);
++        __ XOR(xp[i], xp[i], t0);
++        __ XOR(t0, t1, t2);
++        __ XOR(xp[i], xp[i], t3);
++        __ XOR(xp[i], xp[i], t0);
++      }
++
++      if (r == 8) {
++        // AES 128
++        __ li(t0, 44);
++        __ beq(t0, keylen, tail);
++      } else if (r == 10) {
++        // AES 192
++        __ li(t0, 52);
++        __ beq(t0, keylen, tail);
++      }
++    }
++
++    __ bind(tail);
++    __ li(ftp, (intptr_t)fsb_consts);
++    __ alsl_d(key, keylen, key, 2 - 1);
++
++    // Round N
++    for (int i = 0; i < 4; i++) {
++      __ bstrpick_d(t0, ya[(i + 3) & 3], 7, 0);
++      __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8);
++      __ bstrpick_d(t2, ya[(i + 1) & 3], 23, 16);
++      __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24);
++      __ ldx_bu(t0, ftp, t0);
++      __ ldx_bu(t1, ftp, t1);
++      __ ldx_bu(t2, ftp, t2);
++      __ ldx_bu(t3, ftp, t3);
++      __ ld_w(xa[i], key, 4 * i - 16);
++      __ slli_w(t1, t1, 8);
++      __ slli_w(t2, t2, 16);
++      __ slli_w(t3, t3, 24);
++      __ XOR(xa[i], xa[i], t0);
++      __ XOR(t0, t1, t2);
++      __ XOR(xa[i], xa[i], t3);
++      __ XOR(xa[i], xa[i], t0);
++    }
++
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ st_w(xa[i], dst, 4 * i);
++    }
++
++    if (cbc) {
++      __ move(key, keyold);
++      __ addi_d(src, src, 16);
++      __ addi_d(dst, dst, 16);
++      __ blt(src, srcend, loop);
++
++      for (int i = 0; i < 4; i++) {
++        __ st_w(xa[i], rve, 4 * i);
++      }
++
++      __ ld_d(S3, SP, 3 * wordSize);
++      __ ld_d(S2, SP, 2 * wordSize);
++      __ ld_d(S1, SP, 1 * wordSize);
++      __ ld_d(S0, SP, 0 * wordSize);
++      __ addi_d(SP, SP, 4 * wordSize);
++
++      __ bind(done);
++      __ move(A0, srclen);
++    }
++
++    __ jr(RA);
++
++    return start;
++  }
++
++  address generate_mulAdd() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "mulAdd");
++
++    address entry = __ pc();
++
++    const Register out     = A0;
++    const Register in      = A1;
++    const Register offset  = A2;
++    const Register len     = A3;
++    const Register k       = A4;
++
++    __ block_comment("Entry:");
++    __ mul_add(out, in, offset, len, k);
++    __ jr(RA);
++
++    return entry;
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - source byte array address
++  //   A1        - destination byte array address
++  //   A2        - K (key) in little endian int array
++  //   A3        - r vector byte array address
++  //   A4        - input length
++  //
++  // Output:
++  //   A0        - input length
++  //
++  address generate_aescrypt_decryptBlock(bool cbc) {
++    static const uint32_t rt_consts[256] = {
++      0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
++      0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
++      0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
++      0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
++      0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
++      0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
++      0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
++      0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
++      0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
++      0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
++      0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
++      0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
++      0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
++      0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
++      0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
++      0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
++      0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
++      0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
++      0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
++      0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
++      0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
++      0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
++      0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
++      0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
++      0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
++      0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
++      0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
++      0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
++      0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
++      0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
++      0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
++      0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
++      0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
++      0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
++      0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
++      0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
++      0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
++      0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
++      0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
++      0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
++      0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
++      0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
++      0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
++      0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
++      0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
++      0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
++      0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
++      0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
++      0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
++      0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
++      0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
++      0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
++      0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
++      0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
++      0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
++      0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
++      0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
++      0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
++      0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
++      0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
++      0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
++      0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
++      0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
++      0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
++    };
++    static const uint8_t rsb_consts[256] = {
++      0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
++      0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
++      0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
++      0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
++      0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
++      0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
++      0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
++      0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
++      0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
++      0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
++      0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
++      0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
++      0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
++      0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
++      0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
++      0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
++      0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
++      0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
++      0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
++      0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
++      0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
++      0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
++      0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
++      0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
++      0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
++      0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
++      0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
++      0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
++      0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
++      0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
++      0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
++      0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
++    };
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
++
++    // Allocate registers
++    Register src = A0;
++    Register dst = A1;
++    Register key = A2;
++    Register rve = A3;
++    Register srclen = A4;
++    Register keylen = T8;
++    Register srcend = A5;
++    Register t0 = A6;
++    Register t1 = A7;
++    Register t2, t3, rtp, rvp;
++    Register xa[4] = { T0, T1, T2, T3 };
++    Register ya[4] = { T4, T5, T6, T7 };
++
++    Label loop, tail, done;
++    address start = __ pc();
++
++    if (cbc) {
++      t2 = S0;
++      t3 = S1;
++      rtp = S2;
++      rvp = S3;
++
++      __ beqz(srclen, done);
++
++      __ addi_d(SP, SP, -4 * wordSize);
++      __ st_d(S3, SP, 3 * wordSize);
++      __ st_d(S2, SP, 2 * wordSize);
++      __ st_d(S1, SP, 1 * wordSize);
++      __ st_d(S0, SP, 0 * wordSize);
++
++      __ add_d(srcend, src, srclen);
++      __ move(rvp, rve);
++    } else {
++      t2 = A3;
++      t3 = A4;
++      rtp = A5;
++    }
++
++    __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    __ bind(loop);
++
++    // Round 1
++    for (int i = 0; i < 4; i++) {
++      __ ld_w(xa[i], src, 4 * i);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ ld_w(ya[i], key, 4 * (4 + i));
++    }
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ XOR(xa[i], xa[i], ya[i]);
++    }
++
++    __ li(rtp, (intptr_t)rt_consts);
++
++    // Round 2 - (N-1)
++    for (int r = 0; r < 14; r++) {
++      Register *xp;
++      Register *yp;
++
++      if (r & 1) {
++        xp = xa;
++        yp = ya;
++      } else {
++        xp = ya;
++        yp = xa;
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(xp[i], key, 4 * (4 * (r + 1) + 4 + i));
++      }
++
++      for (int i = 0; i < 4; i++) {
++        __ bstrpick_d(t0, yp[(i + 1) & 3], 7, 0);
++        __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8);
++        __ bstrpick_d(t2, yp[(i + 3) & 3], 23, 16);
++        __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24);
++        __ slli_w(t0, t0, 2);
++        __ slli_w(t1, t1, 2);
++        __ slli_w(t2, t2, 2);
++        __ slli_w(t3, t3, 2);
++        __ ldx_w(t0, rtp, t0);
++        __ ldx_w(t1, rtp, t1);
++        __ ldx_w(t2, rtp, t2);
++        __ ldx_w(t3, rtp, t3);
++        __ rotri_w(t0, t0, 24);
++        __ rotri_w(t1, t1, 16);
++        __ rotri_w(t2, t2, 8);
++        __ XOR(xp[i], xp[i], t0);
++        __ XOR(t0, t1, t2);
++        __ XOR(xp[i], xp[i], t3);
++        __ XOR(xp[i], xp[i], t0);
++      }
++
++      if (r == 8) {
++        // AES 128
++        __ li(t0, 44);
++        __ beq(t0, keylen, tail);
++      } else if (r == 10) {
++        // AES 192
++        __ li(t0, 52);
++        __ beq(t0, keylen, tail);
++      }
++    }
++
++    __ bind(tail);
++    __ li(rtp, (intptr_t)rsb_consts);
++
++    // Round N
++    for (int i = 0; i < 4; i++) {
++      __ bstrpick_d(t0, ya[(i + 1) & 3], 7, 0);
++      __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8);
++      __ bstrpick_d(t2, ya[(i + 3) & 3], 23, 16);
++      __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24);
++      __ ldx_bu(t0, rtp, t0);
++      __ ldx_bu(t1, rtp, t1);
++      __ ldx_bu(t2, rtp, t2);
++      __ ldx_bu(t3, rtp, t3);
++      __ ld_w(xa[i], key, 4 * i);
++      __ slli_w(t1, t1, 8);
++      __ slli_w(t2, t2, 16);
++      __ slli_w(t3, t3, 24);
++      __ XOR(xa[i], xa[i], t0);
++      __ XOR(t0, t1, t2);
++      __ XOR(xa[i], xa[i], t3);
++      __ XOR(xa[i], xa[i], t0);
++    }
++
++    if (cbc) {
++      for (int i = 0; i < 4; i++) {
++        __ ld_w(ya[i], rvp, 4 * i);
++      }
++    }
++    for (int i = 0; i < 4; i++) {
++      __ revb_2h(xa[i], xa[i]);
++    }
++    for (int i = 0; i < 4; i++) {
++      __ rotri_w(xa[i], xa[i], 16);
++    }
++    if (cbc) {
++      for (int i = 0; i < 4; i++) {
++        __ XOR(xa[i], xa[i], ya[i]);
++      }
++    }
++    for (int i = 0; i < 4; i++) {
++      __ st_w(xa[i], dst, 4 * i);
++    }
++
++    if (cbc) {
++      __ move(rvp, src);
++      __ addi_d(src, src, 16);
++      __ addi_d(dst, dst, 16);
++      __ blt(src, srcend, loop);
++
++      __ ld_d(t0, src, -16);
++      __ ld_d(t1, src, -8);
++      __ st_d(t0, rve, 0);
++      __ st_d(t1, rve, 8);
++
++      __ ld_d(S3, SP, 3 * wordSize);
++      __ ld_d(S2, SP, 2 * wordSize);
++      __ ld_d(S1, SP, 1 * wordSize);
++      __ ld_d(S0, SP, 0 * wordSize);
++      __ addi_d(SP, SP, 4 * wordSize);
++
++      __ bind(done);
++      __ move(A0, srclen);
++    }
++
++    __ jr(RA);
++
++    return start;
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - byte[]  source+offset
++  //   A1        - int[]   SHA.state
++  //   A2        - int     offset
++  //   A3        - int     limit
++  //
++  void generate_sha1_implCompress(const char *name, address &entry, address &entry_mb) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    Label keys, loop;
++
++    // Keys
++    __ bind(keys);
++    __ emit_int32(0x5a827999);
++    __ emit_int32(0x6ed9eba1);
++    __ emit_int32(0x8f1bbcdc);
++    __ emit_int32(0xca62c1d6);
++
++    // Allocate registers
++    Register t0 = T5;
++    Register t1 = T6;
++    Register t2 = T7;
++    Register t3 = T8;
++    Register buf = A0;
++    Register state = A1;
++    Register ofs = A2;
++    Register limit = A3;
++    Register ka[4] = { A4, A5, A6, A7 };
++    Register sa[5] = { T0, T1, T2, T3, T4 };
++
++    // Entry
++    entry = __ pc();
++    __ move(ofs, R0);
++    __ move(limit, R0);
++
++    // Entry MB
++    entry_mb = __ pc();
++
++    // Allocate scratch space
++    __ addi_d(SP, SP, -64);
++
++    // Load keys
++    __ lipc(t0, keys);
++    __ ld_w(ka[0], t0, 0);
++    __ ld_w(ka[1], t0, 4);
++    __ ld_w(ka[2], t0, 8);
++    __ ld_w(ka[3], t0, 12);
++
++    __ bind(loop);
++    // Load arguments
++    __ ld_w(sa[0], state, 0);
++    __ ld_w(sa[1], state, 4);
++    __ ld_w(sa[2], state, 8);
++    __ ld_w(sa[3], state, 12);
++    __ ld_w(sa[4], state, 16);
++
++    // 80 rounds of hashing
++    for (int i = 0; i < 80; i++) {
++      Register a = sa[(5 - (i % 5)) % 5];
++      Register b = sa[(6 - (i % 5)) % 5];
++      Register c = sa[(7 - (i % 5)) % 5];
++      Register d = sa[(8 - (i % 5)) % 5];
++      Register e = sa[(9 - (i % 5)) % 5];
++
++      if (i < 16) {
++        __ ld_w(t0, buf, i * 4);
++        __ revb_2h(t0, t0);
++        __ rotri_w(t0, t0, 16);
++        __ add_w(e, e, t0);
++        __ st_w(t0, SP, i * 4);
++        __ XOR(t0, c, d);
++        __ AND(t0, t0, b);
++        __ XOR(t0, t0, d);
++      } else {
++        __ ld_w(t0, SP, ((i - 3) & 0xF) * 4);
++        __ ld_w(t1, SP, ((i - 8) & 0xF) * 4);
++        __ ld_w(t2, SP, ((i - 14) & 0xF) * 4);
++        __ ld_w(t3, SP, ((i - 16) & 0xF) * 4);
++        __ XOR(t0, t0, t1);
++        __ XOR(t0, t0, t2);
++        __ XOR(t0, t0, t3);
++        __ rotri_w(t0, t0, 31);
++        __ add_w(e, e, t0);
++        __ st_w(t0, SP, (i & 0xF) * 4);
++
++        if (i < 20) {
++          __ XOR(t0, c, d);
++          __ AND(t0, t0, b);
++          __ XOR(t0, t0, d);
++        } else if (i < 40 || i >= 60) {
++          __ XOR(t0, b, c);
++          __ XOR(t0, t0, d);
++        } else if (i < 60) {
++          __ OR(t0, c, d);
++          __ AND(t0, t0, b);
++          __ AND(t2, c, d);
++          __ OR(t0, t0, t2);
++        }
++      }
++
++      __ rotri_w(b, b, 2);
++      __ add_w(e, e, t0);
++      __ add_w(e, e, ka[i / 20]);
++      __ rotri_w(t0, a, 27);
++      __ add_w(e, e, t0);
++    }
++
++    // Save updated state
++    __ ld_w(t0, state, 0);
++    __ ld_w(t1, state, 4);
++    __ ld_w(t2, state, 8);
++    __ ld_w(t3, state, 12);
++    __ add_w(sa[0], sa[0], t0);
++    __ ld_w(t0, state, 16);
++    __ add_w(sa[1], sa[1], t1);
++    __ add_w(sa[2], sa[2], t2);
++    __ add_w(sa[3], sa[3], t3);
++    __ add_w(sa[4], sa[4], t0);
++    __ st_w(sa[0], state, 0);
++    __ st_w(sa[1], state, 4);
++    __ st_w(sa[2], state, 8);
++    __ st_w(sa[3], state, 12);
++    __ st_w(sa[4], state, 16);
++
++    __ addi_w(ofs, ofs, 64);
++    __ addi_d(buf, buf, 64);
++    __ bge(limit, ofs, loop);
++    __ move(V0, ofs); // return ofs
++
++    __ addi_d(SP, SP, 64);
++    __ jr(RA);
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   A0        - byte[]  source+offset
++  //   A1        - int[]   SHA.state
++  //   A2        - int     offset
++  //   A3        - int     limit
++  //
++  void generate_sha256_implCompress(const char *name, address &entry, address &entry_mb) {
++    static const uint32_t round_consts[64] = {
++      0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
++      0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
++      0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
++      0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
++      0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
++      0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
++      0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
++      0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
++      0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
++      0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
++      0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
++      0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
++      0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
++      0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
++      0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
++      0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
++    };
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    Label loop;
++
++    // Allocate registers
++    Register t0 = A4;
++    Register t1 = A5;
++    Register t2 = A6;
++    Register t3 = A7;
++    Register buf = A0;
++    Register state = A1;
++    Register ofs = A2;
++    Register limit = A3;
++    Register kptr = T8;
++    Register sa[8] = { T0, T1, T2, T3, T4, T5, T6, T7 };
++
++    // Entry
++    entry = __ pc();
++    __ move(ofs, R0);
++    __ move(limit, R0);
++
++    // Entry MB
++    entry_mb = __ pc();
++
++    // Allocate scratch space
++    __ addi_d(SP, SP, -64);
++
++    // Load keys base address
++    __ li(kptr, (intptr_t)round_consts);
++
++    __ bind(loop);
++    // Load state
++    __ ld_w(sa[0], state, 0);
++    __ ld_w(sa[1], state, 4);
++    __ ld_w(sa[2], state, 8);
++    __ ld_w(sa[3], state, 12);
++    __ ld_w(sa[4], state, 16);
++    __ ld_w(sa[5], state, 20);
++    __ ld_w(sa[6], state, 24);
++    __ ld_w(sa[7], state, 28);
++
++    // Do 64 rounds of hashing
++    for (int i = 0; i < 64; i++) {
++      Register a = sa[(0 - i) & 7];
++      Register b = sa[(1 - i) & 7];
++      Register c = sa[(2 - i) & 7];
++      Register d = sa[(3 - i) & 7];
++      Register e = sa[(4 - i) & 7];
++      Register f = sa[(5 - i) & 7];
++      Register g = sa[(6 - i) & 7];
++      Register h = sa[(7 - i) & 7];
++
++      if (i < 16) {
++        __ ld_w(t1, buf, i * 4);
++        __ revb_2h(t1, t1);
++        __ rotri_w(t1, t1, 16);
++      } else {
++        __ ld_w(t0, SP, ((i - 15) & 0xF) * 4);
++        __ ld_w(t1, SP, ((i - 16) & 0xF) * 4);
++        __ ld_w(t2, SP, ((i - 7) & 0xF) * 4);
++        __ add_w(t1, t1, t2);
++        __ rotri_w(t2, t0, 18);
++        __ srli_w(t3, t0, 3);
++        __ rotri_w(t0, t0, 7);
++        __ XOR(t2, t2, t3);
++        __ XOR(t0, t0, t2);
++        __ add_w(t1, t1, t0);
++        __ ld_w(t0, SP, ((i - 2) & 0xF) * 4);
++        __ rotri_w(t2, t0, 19);
++        __ srli_w(t3, t0, 10);
++        __ rotri_w(t0, t0, 17);
++        __ XOR(t2, t2, t3);
++        __ XOR(t0, t0, t2);
++        __ add_w(t1, t1, t0);
++      }
++
++      __ rotri_w(t2, e, 11);
++      __ rotri_w(t3, e, 25);
++      __ rotri_w(t0, e, 6);
++      __ XOR(t2, t2, t3);
++      __ XOR(t0, t0, t2);
++      __ XOR(t2, g, f);
++      __ ld_w(t3, kptr, i * 4);
++      __ AND(t2, t2, e);
++      __ XOR(t2, t2, g);
++      __ add_w(t0, t0, t2);
++      __ add_w(t0, t0, t3);
++      __ add_w(h, h, t1);
++      __ add_w(h, h, t0);
++      __ add_w(d, d, h);
++      __ rotri_w(t2, a, 13);
++      __ rotri_w(t3, a, 22);
++      __ rotri_w(t0, a, 2);
++      __ XOR(t2, t2, t3);
++      __ XOR(t0, t0, t2);
++      __ add_w(h, h, t0);
++      __ OR(t0, c, b);
++      __ AND(t2, c, b);
++      __ AND(t0, t0, a);
++      __ OR(t0, t0, t2);
++      __ add_w(h, h, t0);
++      __ st_w(t1, SP, (i & 0xF) * 4);
++    }
++
++    // Add to state
++    __ ld_w(t0, state, 0);
++    __ ld_w(t1, state, 4);
++    __ ld_w(t2, state, 8);
++    __ ld_w(t3, state, 12);
++    __ add_w(sa[0], sa[0], t0);
++    __ add_w(sa[1], sa[1], t1);
++    __ add_w(sa[2], sa[2], t2);
++    __ add_w(sa[3], sa[3], t3);
++    __ ld_w(t0, state, 16);
++    __ ld_w(t1, state, 20);
++    __ ld_w(t2, state, 24);
++    __ ld_w(t3, state, 28);
++    __ add_w(sa[4], sa[4], t0);
++    __ add_w(sa[5], sa[5], t1);
++    __ add_w(sa[6], sa[6], t2);
++    __ add_w(sa[7], sa[7], t3);
++    __ st_w(sa[0], state, 0);
++    __ st_w(sa[1], state, 4);
++    __ st_w(sa[2], state, 8);
++    __ st_w(sa[3], state, 12);
++    __ st_w(sa[4], state, 16);
++    __ st_w(sa[5], state, 20);
++    __ st_w(sa[6], state, 24);
++    __ st_w(sa[7], state, 28);
++
++    __ addi_w(ofs, ofs, 64);
++    __ addi_d(buf, buf, 64);
++    __ bge(limit, ofs, loop);
++    __ move(V0, ofs); // return ofs
++
++    __ addi_d(SP, SP, 64);
++    __ jr(RA);
++  }
++
++  // Do NOT delete this node which stands for stub routine placeholder
++  address generate_updateBytesCRC32() {
++    assert(UseCRC32Intrinsics, "need CRC32 instructions support");
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
++
++    address start = __ pc();
++
++    const Register crc = A0;  // crc
++    const Register buf = A1;  // source java byte array address
++    const Register len = A2;  // length
++    const Register tmp = A3;
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ kernel_crc32(crc, buf, len, tmp);
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ jr(RA);
++
++    return start;
++  }
++
++  // Do NOT delete this node which stands for stub routine placeholder
++  address generate_updateBytesCRC32C() {
++    assert(UseCRC32CIntrinsics, "need CRC32C instructions support");
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");
++
++    address start = __ pc();
++
++    const Register crc = A0;  // crc
++    const Register buf = A1;  // source java byte array address
++    const Register len = A2;  // length
++    const Register tmp = A3;
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ kernel_crc32c(crc, buf, len, tmp);
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ jr(RA);
++
++    return start;
++  }
++
++  address generate_dsin_dcos(bool isCos) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", isCos ? "libmDcos" : "libmDsin");
++    address start = __ pc();
++    __ generate_dsin_dcos(isCos, (address)StubRoutines::la::_npio2_hw,
++                                 (address)StubRoutines::la::_two_over_pi,
++                                 (address)StubRoutines::la::_pio2,
++                                 (address)StubRoutines::la::_dsin_coef,
++                                 (address)StubRoutines::la::_dcos_coef);
++    return start;
++  }
++
++  // add a function to implement SafeFetch32 and SafeFetchN
++  void generate_safefetch(const char* name, int size, address* entry,
++                          address* fault_pc, address* continuation_pc) {
++    // safefetch signatures:
++    //   int      SafeFetch32(int*      adr, int      errValue);
++    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
++    //
++    // arguments:
++    //   A0 = adr
++    //   A1 = errValue
++    //
++    // result:
++    //   PPC_RET  = *adr or errValue
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    // Entry point, pc or function descriptor.
++    *entry = __ pc();
++
++    // Load *adr into A1, may fault.
++    *fault_pc = __ pc();
++    switch (size) {
++      case 4:
++        // int32_t
++        __ ld_w(A1, A0, 0);
++        break;
++      case 8:
++        // int64_t
++        __ ld_d(A1, A0, 0);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++
++    // return errValue or *adr
++    *continuation_pc = __ pc();
++    __ add_d(V0, A1, R0);
++    __ jr(RA);
++  }
++
++
++#undef __
++#define __ masm->
++
++  // Continuation point for throwing of implicit exceptions that are
++  // not handled in the current activation. Fabricates an exception
++  // oop and initiates normal exception dispatching in this
++  // frame. Since we need to preserve callee-saved values (currently
++  // only for C2, but done for C1 as well) we need a callee-saved oop
++  // map and therefore have to make these stubs into RuntimeStubs
++  // rather than BufferBlobs.  If the compiler needs all registers to
++  // be preserved between the fault point and the exception handler
++  // then it must assume responsibility for that in
++  // AbstractCompiler::continuation_for_implicit_null_exception or
++  // continuation_for_implicit_division_by_zero_exception. All other
++  // implicit exceptions (e.g., NullPointerException or
++  // AbstractMethodError on entry) are either at call sites or
++  // otherwise assume that stack unwinding will be initiated, so
++  // caller saved registers were assumed volatile in the compiler.
++  address generate_throw_exception(const char* name,
++                                   address runtime_entry,
++                                   bool restore_saved_exception_pc) {
++    // Information about frame layout at time of blocking runtime call.
++    // Note that we only have to preserve callee-saved registers since
++    // the compilers are responsible for supplying a continuation point
++    // if they expect all registers to be preserved.
++    enum layout {
++      thread_off,    // last_java_sp
++      S7_off,        // callee saved register      sp + 1
++      S6_off,        // callee saved register      sp + 2
++      S5_off,        // callee saved register      sp + 3
++      S4_off,        // callee saved register      sp + 4
++      S3_off,        // callee saved register      sp + 5
++      S2_off,        // callee saved register      sp + 6
++      S1_off,        // callee saved register      sp + 7
++      S0_off,        // callee saved register      sp + 8
++      FP_off,
++      ret_address,
++      framesize
++    };
++
++    int insts_size = 2048;
++    int locs_size  = 32;
++
++    //  CodeBuffer* code     = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
++    //  NULL, NULL, NULL, false, NULL, name, false);
++    CodeBuffer code (name , insts_size, locs_size);
++    OopMapSet* oop_maps  = new OopMapSet();
++    MacroAssembler* masm = new MacroAssembler(&code);
++
++    address start = __ pc();
++
++    // This is an inlined and slightly modified version of call_VM
++    // which has the ability to fetch the return PC out of
++    // thread-local storage and also sets up last_Java_sp slightly
++    // differently than the real call_VM
++#ifndef OPT_THREAD
++    Register java_thread = TREG;
++    __ get_thread(java_thread);
++#else
++    Register java_thread = TREG;
++#endif
++    if (restore_saved_exception_pc) {
++      __ ld_d(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    }
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ addi_d(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
++    __ st_d(S0, SP, S0_off * wordSize);
++    __ st_d(S1, SP, S1_off * wordSize);
++    __ st_d(S2, SP, S2_off * wordSize);
++    __ st_d(S3, SP, S3_off * wordSize);
++    __ st_d(S4, SP, S4_off * wordSize);
++    __ st_d(S5, SP, S5_off * wordSize);
++    __ st_d(S6, SP, S6_off * wordSize);
++    __ st_d(S7, SP, S7_off * wordSize);
++
++    int frame_complete = __ pc() - start;
++    // push java thread (becomes first argument of C function)
++    __ st_d(java_thread, SP, thread_off * wordSize);
++    if (java_thread != A0)
++      __ move(A0, java_thread);
++
++    // Set up last_Java_sp and last_Java_fp
++    Label before_call;
++    address the_pc = __ pc();
++    __ bind(before_call);
++    __ set_last_Java_frame(java_thread, SP, FP, before_call);
++    // Align stack
++    __ li(AT, -(StackAlignmentInBytes));
++    __ andr(SP, SP, AT);
++
++    // Call runtime
++    // TODO: confirm reloc
++    __ call(runtime_entry, relocInfo::runtime_call_type);
++    // Generate oop map
++    OopMap* map =  new OopMap(framesize, 0);
++    oop_maps->add_gc_map(the_pc - start,  map);
++
++    // restore the thread (cannot use the pushed argument since arguments
++    // may be overwritten by C code generated by an optimizing compiler);
++    // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++    __ get_thread(java_thread);
++#endif
++
++    __ ld_d(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++    __ reset_last_Java_frame(java_thread, true);
++
++    // Restore callee save registers.  This must be done after resetting the Java frame
++    __ ld_d(S0, SP, S0_off * wordSize);
++    __ ld_d(S1, SP, S1_off * wordSize);
++    __ ld_d(S2, SP, S2_off * wordSize);
++    __ ld_d(S3, SP, S3_off * wordSize);
++    __ ld_d(S4, SP, S4_off * wordSize);
++    __ ld_d(S5, SP, S5_off * wordSize);
++    __ ld_d(S6, SP, S6_off * wordSize);
++    __ ld_d(S7, SP, S7_off * wordSize);
++
++    // discard arguments
++    __ move(SP, FP); // epilog
++    __ pop(FP);
++    // check for pending exceptions
++#ifdef ASSERT
++    Label L;
++    __ ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, L);
++    __ should_not_reach_here();
++    __ bind(L);
++#endif //ASSERT
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name,
++                                                      &code,
++                                                      frame_complete,
++                                                      framesize,
++                                                      oop_maps, false);
++    return stub->entry_point();
++  }
++
++  class MontgomeryMultiplyGenerator : public MacroAssembler {
++
++    Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Rlen2, Ra, Rb, Rm,
++      Rn, Iam, Ibn, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj;
++
++    bool _squaring;
++
++  public:
++    MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
++      : MacroAssembler(as->code()), _squaring(squaring) {
++
++      // Register allocation
++
++      Register reg = A0;
++      Pa_base = reg;      // Argument registers:
++      if (squaring)
++        Pb_base = Pa_base;
++      else
++        Pb_base = ++reg;
++      Pn_base = ++reg;
++      Rlen = ++reg;
++      inv = ++reg;
++      Rlen2 = inv;        // Reuse inv
++      Pm_base = ++reg;
++
++                          // Working registers:
++      Ra = ++reg;         // The current digit of a, b, n, and m.
++      Rb = ++reg;
++      Rm = ++reg;
++      Rn = ++reg;
++
++      Iam = ++reg;        // Index to the current/next digit of a, b, n, and m.
++      Ibn = ++reg;
++
++      t0 = ++reg;         // Three registers which form a
++      t1 = ++reg;         // triple-precision accumuator.
++      t2 = ++reg;
++
++      Ri = ++reg;         // Inner and outer loop indexes.
++      Rj = ++reg;
++
++      if (squaring) {
++        Rhi_ab = ++reg;   // Product registers: low and high parts
++        reg = S0;
++        Rlo_ab = ++reg;   // of a*b and m*n.
++      } else {
++        reg = S0;
++        Rhi_ab = reg;     // Product registers: low and high parts
++        Rlo_ab = ++reg;   // of a*b and m*n.
++      }
++
++      Rhi_mn = ++reg;
++      Rlo_mn = ++reg;
++    }
++
++  private:
++    void enter() {
++      addi_d(SP, SP, -6 * wordSize);
++      st_d(FP, SP, 0 * wordSize);
++      move(FP, SP);
++    }
++
++    void leave() {
++      addi_d(T0, FP, 6 * wordSize);
++      ld_d(FP, FP, 0 * wordSize);
++      move(SP, T0);
++    }
++
++    void save_regs() {
++      if (!_squaring)
++        st_d(Rhi_ab, FP, 5 * wordSize);
++      st_d(Rlo_ab, FP, 4 * wordSize);
++      st_d(Rhi_mn, FP, 3 * wordSize);
++      st_d(Rlo_mn, FP, 2 * wordSize);
++      st_d(Pm_base, FP, 1 * wordSize);
++    }
++
++    void restore_regs() {
++      if (!_squaring)
++        ld_d(Rhi_ab, FP, 5 * wordSize);
++      ld_d(Rlo_ab, FP, 4 * wordSize);
++      ld_d(Rhi_mn, FP, 3 * wordSize);
++      ld_d(Rlo_mn, FP, 2 * wordSize);
++      ld_d(Pm_base, FP, 1 * wordSize);
++    }
++
++    template <typename T>
++    void unroll_2(Register count, T block, Register tmp) {
++      Label loop, end, odd;
++      andi(tmp, count, 1);
++      bnez(tmp, odd);
++      beqz(count, end);
++      align(16);
++      bind(loop);
++      (this->*block)();
++      bind(odd);
++      (this->*block)();
++      addi_w(count, count, -2);
++      blt(R0, count, loop);
++      bind(end);
++    }
++
++    template <typename T>
++    void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
++      Label loop, end, odd;
++      andi(tmp, count, 1);
++      bnez(tmp, odd);
++      beqz(count, end);
++      align(16);
++      bind(loop);
++      (this->*block)(d, s, tmp);
++      bind(odd);
++      (this->*block)(d, s, tmp);
++      addi_w(count, count, -2);
++      blt(R0, count, loop);
++      bind(end);
++    }
++
++    void acc(Register Rhi, Register Rlo,
++             Register t0, Register t1, Register t2, Register t, Register c) {
++      add_d(t0, t0, Rlo);
++      OR(t, t1, Rhi);
++      sltu(c, t0, Rlo);
++      add_d(t1, t1, Rhi);
++      add_d(t1, t1, c);
++      sltu(c, t1, t);
++      add_d(t2, t2, c);
++    }
++
++    void pre1(Register i) {
++      block_comment("pre1");
++      // Iam = 0;
++      // Ibn = i;
++
++      slli_w(Ibn, i, LogBytesPerWord);
++
++      // Ra = Pa_base[Iam];
++      // Rb = Pb_base[Ibn];
++      // Rm = Pm_base[Iam];
++      // Rn = Pn_base[Ibn];
++
++      ld_d(Ra, Pa_base, 0);
++      ldx_d(Rb, Pb_base, Ibn);
++      ld_d(Rm, Pm_base, 0);
++      ldx_d(Rn, Pn_base, Ibn);
++
++      move(Iam, R0);
++
++      // Zero the m*n result.
++      move(Rhi_mn, R0);
++      move(Rlo_mn, R0);
++    }
++
++    // The core multiply-accumulate step of a Montgomery
++    // multiplication.  The idea is to schedule operations as a
++    // pipeline so that instructions with long latencies (loads and
++    // multiplies) have time to complete before their results are
++    // used.  This most benefits in-order implementations of the
++    // architecture but out-of-order ones also benefit.
++    void step() {
++      block_comment("step");
++      // MACC(Ra, Rb, t0, t1, t2);
++      // Ra = Pa_base[++Iam];
++      // Rb = Pb_base[--Ibn];
++      addi_d(Iam, Iam, wordSize);
++      addi_d(Ibn, Ibn, -wordSize);
++      mul_d(Rlo_ab, Ra, Rb);
++      mulh_du(Rhi_ab, Ra, Rb);
++      acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n from the
++                                               // previous iteration.
++      ldx_d(Ra, Pa_base, Iam);
++      ldx_d(Rb, Pb_base, Ibn);
++
++      // MACC(Rm, Rn, t0, t1, t2);
++      // Rm = Pm_base[Iam];
++      // Rn = Pn_base[Ibn];
++      mul_d(Rlo_mn, Rm, Rn);
++      mulh_du(Rhi_mn, Rm, Rn);
++      acc(Rhi_ab, Rlo_ab, t0, t1, t2, Rm, Rn);
++      ldx_d(Rm, Pm_base, Iam);
++      ldx_d(Rn, Pn_base, Ibn);
++    }
++
++    void post1() {
++      block_comment("post1");
++
++      // MACC(Ra, Rb, t0, t1, t2);
++      mul_d(Rlo_ab, Ra, Rb);
++      mulh_du(Rhi_ab, Ra, Rb);
++      acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb);  // The pending m*n
++      acc(Rhi_ab, Rlo_ab, t0, t1, t2, Ra, Rb);
++
++      // Pm_base[Iam] = Rm = t0 * inv;
++      mul_d(Rm, t0, inv);
++      stx_d(Rm, Pm_base, Iam);
++
++      // MACC(Rm, Rn, t0, t1, t2);
++      // t0 = t1; t1 = t2; t2 = 0;
++      mulh_du(Rhi_mn, Rm, Rn);
++
++#ifndef PRODUCT
++      // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply");
++      {
++        mul_d(Rlo_mn, Rm, Rn);
++        add_d(Rlo_mn, t0, Rlo_mn);
++        Label ok;
++        beqz(Rlo_mn, ok); {
++          stop("broken Montgomery multiply");
++        } bind(ok);
++      }
++#endif
++
++      // We have very carefully set things up so that
++      // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate
++      // the lower half of Rm * Rn because we know the result already:
++      // it must be -t0.  t0 + (-t0) must generate a carry iff
++      // t0 != 0.  So, rather than do a mul and an adds we just set
++      // the carry flag iff t0 is nonzero.
++      //
++      // mul_d(Rlo_mn, Rm, Rn);
++      // add_d(t0, t0, Rlo_mn);
++      OR(Ra, t1, Rhi_mn);
++      sltu(Rb, R0, t0);
++      add_d(t0, t1, Rhi_mn);
++      add_d(t0, t0, Rb);
++      sltu(Rb, t0, Ra);
++      add_d(t1, t2, Rb);
++      move(t2, R0);
++    }
++
++    void pre2(Register i, Register len) {
++      block_comment("pre2");
++
++      // Rj == i-len
++      sub_w(Rj, i, len);
++
++      // Iam = i - len;
++      // Ibn = len;
++      slli_w(Iam, Rj, LogBytesPerWord);
++      slli_w(Ibn, len, LogBytesPerWord);
++
++      // Ra = Pa_base[++Iam];
++      // Rb = Pb_base[--Ibn];
++      // Rm = Pm_base[++Iam];
++      // Rn = Pn_base[--Ibn];
++      addi_d(Iam, Iam, wordSize);
++      addi_d(Ibn, Ibn, -wordSize);
++
++      ldx_d(Ra, Pa_base, Iam);
++      ldx_d(Rb, Pb_base, Ibn);
++      ldx_d(Rm, Pm_base, Iam);
++      ldx_d(Rn, Pn_base, Ibn);
++
++      move(Rhi_mn, R0);
++      move(Rlo_mn, R0);
++    }
++
++    void post2(Register i, Register len) {
++      block_comment("post2");
++
++      sub_w(Rj, i, len);
++      slli_w(Iam, Rj, LogBytesPerWord);
++
++      add_d(t0, t0, Rlo_mn); // The pending m*n, low part
++
++      // As soon as we know the least significant digit of our result,
++      // store it.
++      // Pm_base[i-len] = t0;
++      stx_d(t0, Pm_base, Iam);
++
++      // t0 = t1; t1 = t2; t2 = 0;
++      OR(Ra, t1, Rhi_mn);
++      sltu(Rb, t0, Rlo_mn);
++      add_d(t0, t1, Rhi_mn); // The pending m*n, high part
++      add_d(t0, t0, Rb);
++      sltu(Rb, t0, Ra);
++      add_d(t1, t2, Rb);
++      move(t2, R0);
++    }
++
++    // A carry in t0 after Montgomery multiplication means that we
++    // should subtract multiples of n from our result in m.  We'll
++    // keep doing that until there is no carry.
++    void normalize(Register len) {
++      block_comment("normalize");
++      // while (t0)
++      //   t0 = sub(Pm_base, Pn_base, t0, len);
++      Label loop, post, again;
++      Register cnt = t1, i = t2, b = Ra, t = Rb; // Re-use registers; we're done with them now
++      beqz(t0, post); {
++        bind(again); {
++          move(i, R0);
++          move(b, R0);
++          slli_w(cnt, len, LogBytesPerWord);
++          align(16);
++          bind(loop); {
++            ldx_d(Rm, Pm_base, i);
++            ldx_d(Rn, Pn_base, i);
++            sltu(t, Rm, b);
++            sub_d(Rm, Rm, b);
++            sltu(b, Rm, Rn);
++            sub_d(Rm, Rm, Rn);
++            OR(b, b, t);
++            stx_d(Rm, Pm_base, i);
++            addi_w(i, i, BytesPerWord);
++          } blt(i, cnt, loop);
++          sub_d(t0, t0, b);
++        } bnez(t0, again);
++      } bind(post);
++    }
++
++    // Move memory at s to d, reversing words.
++    //    Increments d to end of copied memory
++    //    Destroys tmp1, tmp2, tmp3
++    //    Preserves len
++    //    Leaves s pointing to the address which was in d at start
++    void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
++      assert(tmp1 < S0 && tmp2 < S0, "register corruption");
++
++      alsl_d(s, len, s, LogBytesPerWord - 1);
++      move(tmp1, len);
++      unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
++      slli_w(s, len, LogBytesPerWord);
++      sub_d(s, d, s);
++    }
++
++    // where
++    void reverse1(Register d, Register s, Register tmp) {
++      ld_d(tmp, s, -wordSize);
++      addi_d(s, s, -wordSize);
++      addi_d(d, d, wordSize);
++      rotri_d(tmp, tmp, 32);
++      st_d(tmp, d, -wordSize);
++    }
++
++  public:
++    /**
++     * Fast Montgomery multiplication.  The derivation of the
++     * algorithm is in A Cryptographic Library for the Motorola
++     * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
++     *
++     * Arguments:
++     *
++     * Inputs for multiplication:
++     *   A0   - int array elements a
++     *   A1   - int array elements b
++     *   A2   - int array elements n (the modulus)
++     *   A3   - int length
++     *   A4   - int inv
++     *   A5   - int array elements m (the result)
++     *
++     * Inputs for squaring:
++     *   A0   - int array elements a
++     *   A1   - int array elements n (the modulus)
++     *   A2   - int length
++     *   A3   - int inv
++     *   A4   - int array elements m (the result)
++     *
++     */
++    address generate_multiply() {
++      Label argh, nothing;
++      bind(argh);
++      stop("MontgomeryMultiply total_allocation must be <= 8192");
++
++      align(CodeEntryAlignment);
++      address entry = pc();
++
++      beqz(Rlen, nothing);
++
++      enter();
++
++      // Make room.
++      sltui(Ra, Rlen, 513);
++      beqz(Ra, argh);
++      slli_w(Ra, Rlen, exact_log2(4 * sizeof (jint)));
++      sub_d(Ra, SP, Ra);
++
++      srli_w(Rlen, Rlen, 1); // length in longwords = len/2
++
++      {
++        // Copy input args, reversing as we go.  We use Ra as a
++        // temporary variable.
++        reverse(Ra, Pa_base, Rlen, t0, t1);
++        if (!_squaring)
++          reverse(Ra, Pb_base, Rlen, t0, t1);
++        reverse(Ra, Pn_base, Rlen, t0, t1);
++      }
++
++      // Push all call-saved registers and also Pm_base which we'll need
++      // at the end.
++      save_regs();
++
++#ifndef PRODUCT
++      // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++      {
++        ld_d(Rn, Pn_base, 0);
++        li(t0, -1);
++        mul_d(Rlo_mn, Rn, inv);
++        Label ok;
++        beq(Rlo_mn, t0, ok); {
++          stop("broken inverse in Montgomery multiply");
++        } bind(ok);
++      }
++#endif
++
++      move(Pm_base, Ra);
++
++      move(t0, R0);
++      move(t1, R0);
++      move(t2, R0);
++
++      block_comment("for (int i = 0; i < len; i++) {");
++      move(Ri, R0); {
++        Label loop, end;
++        bge(Ri, Rlen, end);
++
++        bind(loop);
++        pre1(Ri);
++
++        block_comment("  for (j = i; j; j--) {"); {
++          move(Rj, Ri);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab);
++        } block_comment("  } // j");
++
++        post1();
++        addi_w(Ri, Ri, 1);
++        blt(Ri, Rlen, loop);
++        bind(end);
++        block_comment("} // i");
++      }
++
++      block_comment("for (int i = len; i < 2*len; i++) {");
++      move(Ri, Rlen);
++      slli_w(Rlen2, Rlen, 1); {
++        Label loop, end;
++        bge(Ri, Rlen2, end);
++
++        bind(loop);
++        pre2(Ri, Rlen);
++
++        block_comment("  for (j = len*2-i-1; j; j--) {"); {
++          sub_w(Rj, Rlen2, Ri);
++          addi_w(Rj, Rj, -1);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab);
++        } block_comment("  } // j");
++
++        post2(Ri, Rlen);
++        addi_w(Ri, Ri, 1);
++        blt(Ri, Rlen2, loop);
++        bind(end);
++      }
++      block_comment("} // i");
++
++      normalize(Rlen);
++
++      move(Ra, Pm_base);  // Save Pm_base in Ra
++      restore_regs();  // Restore caller's Pm_base
++
++      // Copy our result into caller's Pm_base
++      reverse(Pm_base, Ra, Rlen, t0, t1);
++
++      leave();
++      bind(nothing);
++      jr(RA);
++
++      return entry;
++    }
++    // In C, approximately:
++
++    // void
++    // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[],
++    //                     unsigned long Pn_base[], unsigned long Pm_base[],
++    //                     unsigned long inv, int len) {
++    //   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++    //   unsigned long Ra, Rb, Rn, Rm;
++    //   int i, Iam, Ibn;
++
++    //   assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply");
++
++    //   for (i = 0; i < len; i++) {
++    //     int j;
++
++    //     Iam = 0;
++    //     Ibn = i;
++
++    //     Ra = Pa_base[Iam];
++    //     Rb = Pb_base[Iam];
++    //     Rm = Pm_base[Ibn];
++    //     Rn = Pn_base[Ibn];
++
++    //     int iters = i;
++    //     for (j = 0; iters--; j++) {
++    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
++    //       MACC(Ra, Rb, t0, t1, t2);
++    //       Ra = Pa_base[++Iam];
++    //       Rb = pb_base[--Ibn];
++    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
++    //       MACC(Rm, Rn, t0, t1, t2);
++    //       Rm = Pm_base[++Iam];
++    //       Rn = Pn_base[--Ibn];
++    //     }
++
++    //     assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be");
++    //     MACC(Ra, Rb, t0, t1, t2);
++    //     Pm_base[Iam] = Rm = t0 * inv;
++    //     assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be");
++    //     MACC(Rm, Rn, t0, t1, t2);
++
++    //     assert(t0 == 0, "broken Montgomery multiply");
++
++    //     t0 = t1; t1 = t2; t2 = 0;
++    //   }
++
++    //   for (i = len; i < 2*len; i++) {
++    //     int j;
++
++    //     Iam = i - len;
++    //     Ibn = len;
++
++    //     Ra = Pa_base[++Iam];
++    //     Rb = Pb_base[--Ibn];
++    //     Rm = Pm_base[++Iam];
++    //     Rn = Pn_base[--Ibn];
++
++    //     int iters = len*2-i-1;
++    //     for (j = i-len+1; iters--; j++) {
++    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
++    //       MACC(Ra, Rb, t0, t1, t2);
++    //       Ra = Pa_base[++Iam];
++    //       Rb = Pb_base[--Ibn];
++    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
++    //       MACC(Rm, Rn, t0, t1, t2);
++    //       Rm = Pm_base[++Iam];
++    //       Rn = Pn_base[--Ibn];
++    //     }
++
++    //     Pm_base[i-len] = t0;
++    //     t0 = t1; t1 = t2; t2 = 0;
++    //   }
++
++    //   while (t0)
++    //     t0 = sub(Pm_base, Pn_base, t0, len);
++    // }
++  };
++
++  // Initialization
++  void generate_initial() {
++    // Generates all stubs and initializes the entry points
++
++    //-------------------------------------------------------------
++    //-----------------------------------------------------------
++    // entry points that exist in all platforms
++    // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
++    // than the disadvantage of having a much more complicated generator structure.
++    // See also comment in stubRoutines.hpp.
++    StubRoutines::_forward_exception_entry = generate_forward_exception();
++    StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
++    // is referenced by megamorphic call
++    StubRoutines::_catch_exception_entry = generate_catch_exception();
++
++    StubRoutines::_throw_StackOverflowError_entry =
++      generate_throw_exception("StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),
++                               false);
++    StubRoutines::_throw_delayed_StackOverflowError_entry =
++      generate_throw_exception("delayed StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError),
++                               false);
++
++    if (UseCRC32Intrinsics) {
++      // set table address before stub generation which use it
++      StubRoutines::_crc_table_adr = (address)StubRoutines::la::_crc_table;
++      StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
++    }
++
++    if (UseCRC32CIntrinsics) {
++      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
++    }
++  }
++
++  void generate_all() {
++    // Generates all stubs and initializes the entry points
++
++    // These entry points require SharedInfo::stack0 to be set up in
++    // non-core builds and need to be relocatable, so they each
++    // fabricate a RuntimeStub internally.
++    StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
++
++    StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false);
++
++    StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception",
++                                                                                        CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
++
++    // entry points that are platform specific
++
++    // support for verify_oop (must happen after universe_init)
++    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
++#ifndef CORE
++    // arraycopy stubs used by compilers
++    generate_arraycopy_stubs();
++#endif
++
++    if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) {
++      StubRoutines::_dsin = generate_dsin_dcos(/* isCos = */ false);
++    }
++
++    if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
++      StubRoutines::_dcos = generate_dsin_dcos(/* isCos = */ true);
++    }
++
++    // Safefetch stubs.
++    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
++                                                       &StubRoutines::_safefetch32_fault_pc,
++                                                       &StubRoutines::_safefetch32_continuation_pc);
++    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
++                                                       &StubRoutines::_safefetchN_fault_pc,
++                                                       &StubRoutines::_safefetchN_continuation_pc);
++
++#ifdef COMPILER2
++    if (UseMulAddIntrinsic) {
++      StubRoutines::_mulAdd = generate_mulAdd();
++    }
++
++    if (UseMontgomeryMultiplyIntrinsic) {
++      StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
++      MontgomeryMultiplyGenerator g(_masm, false /* squaring */);
++      StubRoutines::_montgomeryMultiply = g.generate_multiply();
++    }
++
++    if (UseMontgomerySquareIntrinsic) {
++      StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
++      MontgomeryMultiplyGenerator g(_masm, true /* squaring */);
++      // We use generate_multiply() rather than generate_square()
++      // because it's faster for the sizes of modulus we care about.
++      StubRoutines::_montgomerySquare = g.generate_multiply();
++    }
++#endif
++
++    if (UseAESIntrinsics) {
++      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(false);
++      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(false);
++      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_aescrypt_encryptBlock(true);
++      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_aescrypt_decryptBlock(true);
++    }
++
++    if (UseSHA1Intrinsics) {
++      generate_sha1_implCompress("sha1_implCompress", StubRoutines::_sha1_implCompress, StubRoutines::_sha1_implCompressMB);
++    }
++
++    if (UseSHA256Intrinsics) {
++      generate_sha256_implCompress("sha256_implCompress", StubRoutines::_sha256_implCompress, StubRoutines::_sha256_implCompressMB);
++    }
++  }
++
++ public:
++  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
++    if (all) {
++      generate_all();
++    } else {
++      generate_initial();
++    }
++  }
++}; // end class declaration
++
++void StubGenerator_generate(CodeBuffer* code, bool all) {
++  StubGenerator g(code, all);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,178 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++
++// a description of how to extend it, see the stubRoutines.hpp file.
++
++//find the last fp value
++address StubRoutines::la::_call_stub_compiled_return                       = NULL;
++
++/**
++ *  crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h
++ */
++juint StubRoutines::la::_crc_table[] =
++{
++    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
++    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
++    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
++    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
++    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
++    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
++    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
++    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
++    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
++    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
++    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
++    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
++    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
++    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
++    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
++    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
++    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
++    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
++    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
++    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
++    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
++    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
++    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
++    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
++    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
++    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
++    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
++    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
++    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
++    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
++    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
++    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
++    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
++    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
++    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
++    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
++    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
++    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
++    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
++    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
++    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
++    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
++    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
++    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
++    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
++    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
++    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
++    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
++    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
++    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
++    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
++    0x2d02ef8dUL
++};
++
++ATTRIBUTE_ALIGNED(64) juint StubRoutines::la::_npio2_hw[] = {
++    // first, various coefficient values: 0.5, invpio2, pio2_1, pio2_1t, pio2_2,
++    // pio2_2t, pio2_3, pio2_3t
++    // This is a small optimization wich keeping double[8] values in int[] table
++    // to have less address calculation instructions
++    //
++    // invpio2:  53 bits of 2/pi (enough for cases when trigonometric argument is small)
++    // pio2_1:   first  33 bit of pi/2
++    // pio2_1t:  pi/2 - pio2_1
++    // pio2_2:   second 33 bit of pi/2
++    // pio2_2t:  pi/2 - (pio2_1+pio2_2)
++    // pio2_3:   third  33 bit of pi/2
++    // pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
++    0x00000000, 0x3fe00000, // 0.5
++    0x6DC9C883, 0x3FE45F30, // invpio2 = 6.36619772367581382433e-01
++    0x54400000, 0x3FF921FB, // pio2_1 = 1.57079632673412561417e+00
++    0x1A626331, 0x3DD0B461, // pio2_1t = 6.07710050650619224932e-11
++    0x1A600000, 0x3DD0B461, // pio2_2 = 6.07710050630396597660e-11
++    0x2E037073, 0x3BA3198A, // pio2_2t = 2.02226624879595063154e-21
++    0x2E000000, 0x3BA3198A, // pio2_3 = 2.02226624871116645580e-21
++    0x252049C1, 0x397B839A, // pio2_3t = 8.47842766036889956997e-32
++    // now, npio2_hw itself
++    0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
++    0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
++    0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
++    0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
++    0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
++    0x404858EB, 0x404921FB
++};
++
++// Coefficients for sin(x) polynomial approximation: S1..S6.
++// See kernel_sin comments in macroAssembler_loongarch64_trig.cpp for details
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dsin_coef[] = {
++    -1.66666666666666324348e-01, // 0xBFC5555555555549
++     8.33333333332248946124e-03, // 0x3F8111111110F8A6
++    -1.98412698298579493134e-04, // 0xBF2A01A019C161D5
++     2.75573137070700676789e-06, // 0x3EC71DE357B1FE7D
++    -2.50507602534068634195e-08, // 0xBE5AE5E68A2B9CEB
++     1.58969099521155010221e-10  // 0x3DE5D93A5ACFD57C
++};
++
++// Coefficients for cos(x) polynomial approximation: C1..C6.
++// See kernel_cos comments in macroAssembler_loongarch64_trig.cpp for details
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dcos_coef[] = {
++     4.16666666666666019037e-02, // c0x3FA555555555554C
++    -1.38888888888741095749e-03, // 0xBF56C16C16C15177
++     2.48015872894767294178e-05, // 0x3EFA01A019CB1590
++    -2.75573143513906633035e-07, // 0xBE927E4F809C52AD
++     2.08757232129817482790e-09, // 0x3E21EE9EBDB4B1C4
++    -1.13596475577881948265e-11  // 0xBDA8FAE9BE8838D4
++};
++
++// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi.
++// Used in cases of very large argument. 396 hex digits is enough to support
++// required precision.
++// Converted to double to avoid unnecessary conversion in code
++// NOTE: table looks like original int table: {0xA2F983, 0x6E4E44,...} with
++//       only (double) conversion added
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_two_over_pi[] = {
++  (double)0xA2F983, (double)0x6E4E44, (double)0x1529FC, (double)0x2757D1, (double)0xF534DD, (double)0xC0DB62,
++  (double)0x95993C, (double)0x439041, (double)0xFE5163, (double)0xABDEBB, (double)0xC561B7, (double)0x246E3A,
++  (double)0x424DD2, (double)0xE00649, (double)0x2EEA09, (double)0xD1921C, (double)0xFE1DEB, (double)0x1CB129,
++  (double)0xA73EE8, (double)0x8235F5, (double)0x2EBB44, (double)0x84E99C, (double)0x7026B4, (double)0x5F7E41,
++  (double)0x3991D6, (double)0x398353, (double)0x39F49C, (double)0x845F8B, (double)0xBDF928, (double)0x3B1FF8,
++  (double)0x97FFDE, (double)0x05980F, (double)0xEF2F11, (double)0x8B5A0A, (double)0x6D1F6D, (double)0x367ECF,
++  (double)0x27CB09, (double)0xB74F46, (double)0x3F669E, (double)0x5FEA2D, (double)0x7527BA, (double)0xC7EBE5,
++  (double)0xF17B3D, (double)0x0739F7, (double)0x8A5292, (double)0xEA6BFB, (double)0x5FB11F, (double)0x8D5D08,
++  (double)0x560330, (double)0x46FC7B, (double)0x6BABF0, (double)0xCFBC20, (double)0x9AF436, (double)0x1DA9E3,
++  (double)0x91615E, (double)0xE61B08, (double)0x659985, (double)0x5F14A0, (double)0x68408D, (double)0xFFD880,
++  (double)0x4D7327, (double)0x310606, (double)0x1556CA, (double)0x73A8C9, (double)0x60E27B, (double)0xC08C6B,
++};
++
++// Pi over 2 value
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_pio2[] = {
++  1.57079625129699707031e+00, // 0x3FF921FB40000000
++  7.54978941586159635335e-08, // 0x3E74442D00000000
++  5.39030252995776476554e-15, // 0x3CF8469880000000
++  3.28200341580791294123e-22, // 0x3B78CC5160000000
++  1.27065575308067607349e-29, // 0x39F01B8380000000
++  1.22933308981111328932e-36, // 0x387A252040000000
++  2.73370053816464559624e-44, // 0x36E3822280000000
++  2.16741683877804819444e-51, // 0x3569F31D00000000
++};
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,67 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP
++#define CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP
++
++// This file holds the platform specific parts of the StubRoutines
++// definition. See stubRoutines.hpp for a description on how to
++// extend it.
++
++static bool    returns_to_call_stub(address return_pc){
++  return return_pc == _call_stub_return_address||return_pc == la::get_call_stub_compiled_return();
++}
++
++enum platform_dependent_constants {
++  code_size1 = 20000,    // simply increase if too small (assembler will crash if too small)
++  code_size2 = 60000    // simply increase if too small (assembler will crash if too small)
++};
++
++class la {
++  friend class StubGenerator;
++  friend class VMStructs;
++ private:
++  // If we call compiled code directly from the call stub we will
++  // need to adjust the return back to the call stub to a specialized
++  // piece of code that can handle compiled results and cleaning the fpu
++  // stack. The variable holds that location.
++  static address _call_stub_compiled_return;
++  static juint   _crc_table[];
++  // begin trigonometric tables block. See comments in .cpp file
++  static juint    _npio2_hw[];
++  static jdouble   _two_over_pi[];
++  static jdouble   _pio2[];
++  static jdouble   _dsin_coef[];
++  static jdouble  _dcos_coef[];
++  // end trigonometric tables block
++
++public:
++  // Call back points for traps in compiled code
++  static address get_call_stub_compiled_return()    { return _call_stub_compiled_return; }
++  static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; }
++
++};
++
++#endif // CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,2269 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/templateInterpreterGenerator.hpp"
++#include "interpreter/templateTable.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++int TemplateInterpreter::InterpreterCodeSize = 500 * K;
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++address TemplateInterpreterGenerator::generate_slow_signature_handler() {
++  address entry = __ pc();
++  // Rmethod: method
++  // LVP: pointer to locals
++  // A3: first stack arg
++  __ move(A3, SP);
++  __ addi_d(SP, SP, -18 * wordSize);
++  __ st_d(RA, SP, 0);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::slow_signature_handler),
++             Rmethod, LVP, A3);
++
++  // V0: result handler
++
++  // Stack layout:
++  //        ...
++  //     18 stack arg0   <--- old sp
++  //     17 floatReg arg7
++  //        ...
++  //     10 floatReg arg0
++  //      9 float/double identifiers
++  //      8 IntReg arg7
++  //        ...
++  //      2 IntReg arg1
++  //      1 aligned slot
++  // SP:  0 return address
++
++  // Do FP first so we can use A3 as temp
++  __ ld_d(A3, Address(SP, 9 * wordSize)); // float/double identifiers
++
++  for (int i= 0; i < Argument::n_float_register_parameters; i++) {
++    FloatRegister floatreg = as_FloatRegister(i + FA0->encoding());
++    Label isdouble, done;
++
++    __ andi(AT, A3, 1 << i);
++    __ bnez(AT, isdouble);
++    __ fld_s(floatreg, SP, (10 + i) * wordSize);
++    __ b(done);
++    __ bind(isdouble);
++    __ fld_d(floatreg, SP, (10 + i) * wordSize);
++    __ bind(done);
++  }
++
++  // A0 is for env.
++  // If the mothed is not static, A1 will be corrected in generate_native_entry.
++  for (int i= 1; i < Argument::n_register_parameters; i++) {
++    Register reg = as_Register(i + A0->encoding());
++    __ ld_d(reg, SP, (1 + i) * wordSize);
++  }
++
++  // A0/V0 contains the result from the call of
++  // InterpreterRuntime::slow_signature_handler so we don't touch it
++  // here.  It will be loaded with the JNIEnv* later.
++  __ ld_d(RA, SP, 0);
++  __ addi_d(SP, SP, 18 * wordSize);
++  __ jr(RA);
++  return entry;
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.update(int crc, int b)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++
++    // rmethod: Method*
++    // Rsender: senderSP must preserved for slow path
++    // SP: args
++
++    Label slow_path;
++    // If we need a safepoint check, generate full interpreter entry.
++    __ li(AT, SafepointSynchronize::_not_synchronized);
++    __ li(T8, (long)SafepointSynchronize::address_of_state());
++    __ bne(T8, AT, slow_path);
++
++    // We don't generate local frame and don't align stack because
++    // we call stub code and there is no safepoint on this path.
++
++    const Register crc = A0;  // crc
++    const Register val = A1;  // source java byte value
++    const Register tbl = A2;  // scratch
++
++    // Arguments are reversed on java expression stack
++    __ ld_w(val, SP, 0);              // byte value
++    __ ld_w(crc, SP, wordSize);       // Initial CRC
++
++    __ li(tbl, (long)StubRoutines::crc_table_addr());
++
++    __ nor(crc, crc, R0); // ~crc
++    __ update_byte_crc32(crc, val, tbl);
++    __ nor(crc, crc, R0); // ~crc
++
++    // restore caller SP
++    __ move(SP, Rsender);
++    __ jr(RA);
++
++    // generate a vanilla native entry as the slow path
++    __ bind(slow_path);
++    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
++    return entry;
++  }
++  return NULL;
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
++ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++
++    // rmethod: Method*
++    // Rsender: senderSP must preserved for slow path
++    // SP: args
++
++    Label slow_path;
++    // If we need a safepoint check, generate full interpreter entry.
++    __ li(AT, SafepointSynchronize::_not_synchronized);
++    __ li(T8, (long)SafepointSynchronize::address_of_state());
++    __ bne(T8, AT, slow_path);
++
++    // We don't generate local frame and don't align stack because
++    // we call stub code and there is no safepoint on this path.
++
++    const Register crc = A0;  // crc
++    const Register buf = A1;  // source java byte array address
++    const Register len = A2;  // length
++    const Register tmp = A3;
++
++    const Register off = len; // offset (never overlaps with 'len')
++
++    // Arguments are reversed on java expression stack
++    // Calculate address of start element
++    __ ld_w(off, SP, wordSize);       // int offset
++    __ ld_d(buf, SP, 2 * wordSize);   // byte[] buf | long buf
++    __ add_d(buf, buf, off);          // + offset
++    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
++      __ ld_w(crc, SP, 4 * wordSize); // long crc
++    } else {
++      __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
++      __ ld_w(crc, SP, 3 * wordSize); // long crc
++    }
++
++    // Can now load 'len' since we're finished with 'off'
++    __ ld_w(len, SP, 0); // length
++
++    __ kernel_crc32(crc, buf, len, tmp);
++
++    // restore caller SP
++    __ move(SP, Rsender);
++    __ jr(RA);
++
++    // generate a vanilla native entry as the slow path
++    __ bind(slow_path);
++    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
++    return entry;
++  }
++  return NULL;
++}
++
++/**
++ * Method entry for intrinsic-candidate (non-native) methods:
++ *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
++ *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
++ * Unlike CRC32, CRC32C does not have any methods marked as native
++ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses
++ */
++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  if (UseCRC32CIntrinsics) {
++    address entry = __ pc();
++
++    const Register crc = A0; // initial crc
++    const Register buf = A1; // source java byte array address
++    const Register len = A2; // len argument to the kernel
++    const Register tmp = A3;
++
++    const Register end = len; // index of last element to process
++    const Register off = crc; // offset
++
++    __ ld_w(end, SP, 0);              // int end
++    __ ld_w(off, SP, wordSize);       // int offset
++    __ sub_w(len, end, off);          // calculate length
++    __ ld_d(buf, SP, 2 * wordSize);   // byte[] buf | long buf
++    __ add_d(buf, buf, off);          // + offset
++    if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
++      __ ld_w(crc, SP, 4 * wordSize); // int crc
++    } else {
++      __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
++      __ ld_w(crc, SP, 3 * wordSize); // int crc
++    }
++
++    __ kernel_crc32c(crc, buf, len, tmp);
++
++    // restore caller SP
++    __ move(SP, Rsender);
++    __ jr(RA);
++
++    return entry;
++  }
++  return NULL;
++}
++
++//
++// Various method entries
++//
++
++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
++  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
++
++  // These don't need a safepoint check because they aren't virtually
++  // callable. We won't enter these intrinsics from compiled code.
++  // If in the future we added an intrinsic which was virtually callable
++  // we'd have to worry about how to safepoint so that this code is used.
++
++  // mathematical functions inlined by compiler
++  // (interpreter must provide identical implementation
++  // in order to avoid monotonicity bugs when switching
++  // from interpreter to compiler in the middle of some
++  // computation)
++  //
++  // stack:
++  //        [ arg ] <-- sp
++  //        [ arg ]
++  // retaddr in ra
++
++  address entry_point = NULL;
++  switch (kind) {
++  case Interpreter::java_lang_math_abs:
++    entry_point = __ pc();
++    __ fld_d(FA0, SP, 0);
++    __ fabs_d(F0, FA0);
++    __ move(SP, Rsender);
++    break;
++  case Interpreter::java_lang_math_sqrt:
++    entry_point = __ pc();
++    __ fld_d(FA0, SP, 0);
++    __ fsqrt_d(F0, FA0);
++    __ move(SP, Rsender);
++    break;
++  case Interpreter::java_lang_math_sin :
++  case Interpreter::java_lang_math_cos :
++  case Interpreter::java_lang_math_tan :
++  case Interpreter::java_lang_math_log :
++  case Interpreter::java_lang_math_log10 :
++  case Interpreter::java_lang_math_exp :
++    entry_point = __ pc();
++    __ fld_d(FA0, SP, 0);
++    __ move(SP, Rsender);
++    __ movgr2fr_d(FS0, RA);
++    __ movgr2fr_d(FS1, SP);
++    __ bstrins_d(SP, R0, exact_log2(StackAlignmentInBytes) - 1, 0);
++    generate_transcendental_entry(kind, 1);
++    __ movfr2gr_d(SP, FS1);
++    __ movfr2gr_d(RA, FS0);
++    break;
++  case Interpreter::java_lang_math_pow :
++    entry_point = __ pc();
++    __ fld_d(FA0, SP, 2 * Interpreter::stackElementSize);
++    __ fld_d(FA1, SP, 0);
++    __ move(SP, Rsender);
++    __ movgr2fr_d(FS0, RA);
++    __ movgr2fr_d(FS1, SP);
++    __ bstrins_d(SP, R0, exact_log2(StackAlignmentInBytes) - 1, 0);
++    generate_transcendental_entry(kind, 2);
++    __ movfr2gr_d(SP, FS1);
++    __ movfr2gr_d(RA, FS0);
++    break;
++  case Interpreter::java_lang_math_fmaD :
++    if (UseFMA) {
++      entry_point = __ pc();
++      __ fld_d(FA0, SP, 4 * Interpreter::stackElementSize);
++      __ fld_d(FA1, SP, 2 * Interpreter::stackElementSize);
++      __ fld_d(FA2, SP, 0);
++      __ fmadd_d(F0, FA0, FA1, FA2);
++      __ move(SP, Rsender);
++    }
++    break;
++  case Interpreter::java_lang_math_fmaF :
++    if (UseFMA) {
++      entry_point = __ pc();
++      __ fld_s(FA0, SP, 2 * Interpreter::stackElementSize);
++      __ fld_s(FA1, SP, Interpreter::stackElementSize);
++      __ fld_s(FA2, SP, 0);
++      __ fmadd_s(F0, FA0, FA1, FA2);
++      __ move(SP, Rsender);
++    }
++    break;
++  default:
++    ;
++  }
++  if (entry_point) {
++    __ jr(RA);
++  }
++
++  return entry_point;
++}
++
++  // double trigonometrics and transcendentals
++  // static jdouble dsin(jdouble x);
++  // static jdouble dcos(jdouble x);
++  // static jdouble dtan(jdouble x);
++  // static jdouble dlog(jdouble x);
++  // static jdouble dlog10(jdouble x);
++  // static jdouble dexp(jdouble x);
++  // static jdouble dpow(jdouble x, jdouble y);
++
++void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) {
++  address fn;
++  switch (kind) {
++  case Interpreter::java_lang_math_sin :
++    if (StubRoutines::dsin() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin());
++    }
++    break;
++  case Interpreter::java_lang_math_cos :
++    if (StubRoutines::dcos() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos());
++    }
++    break;
++  case Interpreter::java_lang_math_tan :
++    if (StubRoutines::dtan() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan());
++    }
++    break;
++  case Interpreter::java_lang_math_log :
++    if (StubRoutines::dlog() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog());
++    }
++    break;
++  case Interpreter::java_lang_math_log10 :
++    if (StubRoutines::dlog10() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10());
++    }
++    break;
++  case Interpreter::java_lang_math_exp :
++    if (StubRoutines::dexp() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp());
++    }
++    break;
++  case Interpreter::java_lang_math_pow :
++    if (StubRoutines::dpow() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow());
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++    fn = NULL;  // unreachable
++  }
++  __ li(T4, fn);
++  __ jalr(T4);
++}
++
++// Abstract method entry
++// Attempt to execute abstract method. Throw exception
++address TemplateInterpreterGenerator::generate_abstract_entry(void) {
++
++  // Rmethod: methodOop
++  // V0: receiver (unused)
++  // Rsender : sender 's sp
++  address entry_point = __ pc();
++
++  // abstract method entry
++  // throw exception
++  // adjust stack to what a normal return would do
++  __ empty_expression_stack();
++  __ restore_bcp();
++  __ restore_locals();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  return entry_point;
++}
++
++
++const int method_offset = frame::interpreter_frame_method_offset * wordSize;
++const int bci_offset    = frame::interpreter_frame_bcp_offset    * wordSize;
++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize;
++
++//-----------------------------------------------------------------------------
++
++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
++  address entry = __ pc();
++
++#ifdef ASSERT
++  {
++    Label L;
++    __ addi_d(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ sub_d(T1, T1, SP); // T1 = maximal sp for current fp
++    __ bge(T1, R0, L);     // check if frame is complete
++    __ stop("interpreter frame not set up");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // Restore bcp under the assumption that the current frame is still
++  // interpreted
++  __ restore_bcp();
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // throw exception
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
++  address entry = __ pc();
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // ??? convention: expect array in register A1
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
++  address entry = __ pc();
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException),  FSR);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_exception_handler_common(
++        const char* name, const char* message, bool pass_oop) {
++  assert(!pass_oop || message == NULL, "either oop or message but not both");
++  address entry = __ pc();
++
++  // expression stack must be empty before entering the VM if an exception happened
++  __ empty_expression_stack();
++  // setup parameters
++  __ li(A1, (long)name);
++  if (pass_oop) {
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR);
++  } else {
++    __ li(A2, (long)message);
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2);
++  }
++  // throw exception
++  __ jmp(Interpreter::throw_exception_entry(), relocInfo::none);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
++
++  address entry = __ pc();
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++  // Restore stack bottom in case i2c adjusted stack
++  __ ld_d(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
++  // and NULL it as marker that sp is now tos until next java call
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  __ restore_bcp();
++  __ restore_locals();
++
++  // mdp: T8
++  // ret: FSR
++  // tmp: T4
++  if (state == atos) {
++    Register mdp = T8;
++    Register tmp = T4;
++    __ profile_return_type(mdp, FSR, tmp);
++  }
++
++
++  const Register cache = T4;
++  const Register index = T3;
++  __ get_cache_and_index_at_bcp(cache, index, 1, index_size);
++
++  const Register flags = cache;
++  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
++  __ ld_w(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++  __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask);
++  __ alsl_d(SP, flags, SP, Interpreter::logStackElementSize - 1);
++
++  Register java_thread;
++#ifndef OPT_THREAD
++    java_thread = T4;
++    __ get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++
++  __ check_and_handle_popframe(java_thread);
++  __ check_and_handle_earlyret(java_thread);
++
++  __ dispatch_next(state, step);
++
++  return entry;
++}
++
++
++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
++                                                               int step,
++                                                               address continuation) {
++  address entry = __ pc();
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++  // NULL last_sp until next java call
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ restore_bcp();
++  __ restore_locals();
++
++#if INCLUDE_JVMCI
++  // Check if we need to take lock at entry of synchronized method.  This can
++  // only occur on method entry so emit it only for vtos with step 0.
++  if (EnableJVMCI && state == vtos && step == 0) {
++    Label L;
++    __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset()));
++    __ beqz(AT, L);
++    // Clear flag.
++    __ st_b(R0, Address(TREG, JavaThread::pending_monitorenter_offset()));
++    // Take lock.
++    lock_method();
++    __ bind(L);
++  } else {
++#ifdef ASSERT
++    if (EnableJVMCI) {
++      Label L;
++      __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset()));
++      __ beqz(AT, L);
++      __ stop("unexpected pending monitor in deopt entry");
++      __ bind(L);
++    }
++#endif
++  }
++#endif
++
++  // handle exceptions
++  {
++    Label L;
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(AT, R0, L);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++  if (continuation == NULL) {
++    __ dispatch_next(state, step);
++  } else {
++    __ jump_to_entry(continuation);
++  }
++  return entry;
++}
++
++int AbstractInterpreter::BasicType_as_index(BasicType type) {
++  int i = 0;
++  switch (type) {
++    case T_BOOLEAN: i = 0; break;
++    case T_CHAR   : i = 1; break;
++    case T_BYTE   : i = 2; break;
++    case T_SHORT  : i = 3; break;
++    case T_INT    : // fall through
++    case T_LONG   : // fall through
++    case T_VOID   : i = 4; break;
++    case T_FLOAT  : i = 5; break;
++    case T_DOUBLE : i = 6; break;
++    case T_OBJECT : // fall through
++    case T_ARRAY  : i = 7; break;
++    default       : ShouldNotReachHere();
++  }
++  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
++         "index out of bounds");
++  return i;
++}
++
++
++address TemplateInterpreterGenerator::generate_result_handler_for(
++        BasicType type) {
++  address entry = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ c2bool(V0);                break;
++    case T_CHAR   : __ bstrpick_d(V0, V0, 15, 0); break;
++    case T_BYTE   : __ sign_extend_byte (V0);     break;
++    case T_SHORT  : __ sign_extend_short(V0);     break;
++    case T_INT    : /* nothing to do */           break;
++    case T_FLOAT  : /* nothing to do */           break;
++    case T_DOUBLE : /* nothing to do */           break;
++    case T_OBJECT :
++    {
++      __ ld_d(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++      __ verify_oop(V0);         // and verify it
++    }
++    break;
++    default       : ShouldNotReachHere();
++  }
++  __ jr(RA);                                  // return from result handler
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_safept_entry_for(
++        TosState state,
++        address runtime_entry) {
++  address entry = __ pc();
++  __ push(state);
++  __ call_VM(noreg, runtime_entry);
++  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
++  return entry;
++}
++
++
++
++// Helpers for commoning out cases in the various type of method entries.
++//
++
++
++// increment invocation count & check for overflow
++//
++// Note: checking for negative value instead of overflow
++//       so we have a 'sticky' overflow test
++//
++// prerequisites : method in T0, invocation counter in T3
++void TemplateInterpreterGenerator::generate_counter_incr(
++        Label* overflow,
++        Label* profile_method,
++        Label* profile_method_continue) {
++  Label done;
++  // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
++  if (TieredCompilation) {
++    int increment = InvocationCounter::count_increment;
++    int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
++    Label no_mdo;
++    if (ProfileInterpreter) {
++      // Are we profiling?
++      __ ld_d(FSR, Address(Rmethod, Method::method_data_offset()));
++      __ beqz(FSR, no_mdo);
++      // Increment counter in the MDO
++      const Address mdo_invocation_counter(FSR, in_bytes(MethodData::invocation_counter_offset()) +
++                                                in_bytes(InvocationCounter::counter_offset()));
++      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T3, false, Assembler::zero, overflow);
++      __ b(done);
++    }
++    __ bind(no_mdo);
++    // Increment counter in MethodCounters
++    const Address invocation_counter(FSR,
++                  MethodCounters::invocation_counter_offset() +
++                  InvocationCounter::counter_offset());
++    __ get_method_counters(Rmethod, FSR, done);
++    __ increment_mask_and_jump(invocation_counter, increment, mask, T3, false, Assembler::zero, overflow);
++    __ bind(done);
++  } else { // not TieredCompilation
++    const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset())
++        + in_bytes(InvocationCounter::counter_offset()));
++    const Address backedge_counter  (FSR, in_bytes(MethodCounters::backedge_counter_offset())
++        + in_bytes(InvocationCounter::counter_offset()));
++
++    __ get_method_counters(Rmethod, FSR, done);
++
++    if (ProfileInterpreter) { // %%% Merge this into methodDataOop
++      __ ld_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
++      __ addi_d(T4, T4, 1);
++      __ st_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
++    }
++    // Update standard invocation counters
++    __ ld_w(T3, invocation_counter);
++    __ increment(T3, InvocationCounter::count_increment);
++    __ st_w(T3, invocation_counter);  // save invocation count
++
++    __ ld_w(FSR, backedge_counter);  // load backedge counter
++    __ li(AT, InvocationCounter::count_mask_value);   // mask out the status bits
++    __ andr(FSR, FSR, AT);
++
++    __ add_d(T3, T3, FSR);          // add both counters
++
++    if (ProfileInterpreter && profile_method != NULL) {
++      // Test to see if we should create a method data oop
++      if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) {
++        __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit);
++        __ bne_far(AT, R0, *profile_method_continue);
++      } else {
++        __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
++        __ ld_w(AT, AT, 0);
++        __ blt_far(T3, AT, *profile_method_continue, true /* signed */);
++      }
++
++      // if no method data exists, go to profile_method
++      __ test_method_data_pointer(FSR, *profile_method);
++    }
++
++    if (Assembler::is_simm(CompileThreshold, 12)) {
++      __ srli_w(AT, T3, InvocationCounter::count_shift);
++      __ slti(AT, AT, CompileThreshold);
++      __ beq_far(AT, R0, *overflow);
++    } else {
++      __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit);
++      __ ld_w(AT, AT, 0);
++      __ bge_far(T3, AT, *overflow, true /* signed */);
++    }
++
++    __ bind(done);
++  }
++}
++
++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
++
++  // Asm interpreter on entry
++  // S7 - locals
++  // S0 - bcp
++  // Rmethod - method
++  // FP - interpreter frame
++
++  // On return (i.e. jump to entry_point)
++  // Rmethod - method
++  // RA - return address of interpreter caller
++  // tos - the last parameter to Java method
++  // SP - sender_sp
++
++  // the bcp is valid if and only if it's not null
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), R0);
++  __ ld_d(Rmethod, FP, method_offset);
++  // Preserve invariant that S0/S7 contain bcp/locals of sender frame
++  __ b_far(do_continue);
++}
++
++// See if we've got enough room on the stack for locals plus overhead.
++// The expression stack grows down incrementally, so the normal guard
++// page mechanism will work for that.
++//
++// NOTE: Since the additional locals are also always pushed (wasn't
++// obvious in generate_method_entry) so the guard should work for them
++// too.
++//
++// Args:
++//      T2: number of additional locals this frame needs (what we must check)
++//      T0: Method*
++//
++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
++  // see if we've got enough room on the stack for locals plus overhead.
++  // the expression stack grows down incrementally, so the normal guard
++  // page mechanism will work for that.
++  //
++  // Registers live on entry:
++  //
++  // T0: Method*
++  // T2: number of additional locals this frame needs (what we must check)
++
++  // NOTE:  since the additional locals are also always pushed (wasn't obvious in
++  // generate_method_entry) so the guard should work for them too.
++  //
++
++  const int entry_size    = frame::interpreter_frame_monitor_size() * wordSize;
++
++  // total overhead size: entry_size + (saved fp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize)
++    + entry_size;
++
++  const int page_size = os::vm_page_size();
++  Label after_frame_check;
++
++  // see if the frame is greater than one page in size. If so,
++  // then we need to verify there is enough stack space remaining
++  // for the additional locals.
++  __ li(AT, (page_size - overhead_size) / Interpreter::stackElementSize);
++  __ bge(AT, T2, after_frame_check);
++
++  // compute sp as if this were going to be the last frame on
++  // the stack before the red zone
++#ifndef OPT_THREAD
++  Register thread = T1;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++
++  // locals + overhead, in bytes
++  __ slli_d(T3, T2, Interpreter::logStackElementSize);
++  __ addi_d(T3, T3, overhead_size);   // locals * 4 + overhead_size --> T3
++
++#ifdef ASSERT
++  Label stack_base_okay, stack_size_okay;
++  // verify that thread stack base is non-zero
++  __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset()));
++  __ bne(AT, R0, stack_base_okay);
++  __ stop("stack base is zero");
++  __ bind(stack_base_okay);
++  // verify that thread stack size is non-zero
++  __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset()));
++  __ bne(AT, R0, stack_size_okay);
++  __ stop("stack size is zero");
++  __ bind(stack_size_okay);
++#endif
++
++  // Add stack base to locals and subtract stack size
++  __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT
++  __ add_d(T3, T3, AT);   // locals * 4 + overhead_size + stack_base--> T3
++  __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset()));  // stack_size --> AT
++  __ sub_d(T3, T3, AT);  // locals * 4 + overhead_size + stack_base - stack_size --> T3
++
++  // Use the bigger size for banging.
++  const int max_bang_size = (int)MAX2(JavaThread::stack_shadow_zone_size(), JavaThread::stack_guard_zone_size());
++
++  // add in the redzone and yellow size
++  __ li(AT, max_bang_size);
++  __ add_d(T3, T3, AT);
++
++  // check against the current stack bottom
++  __ blt(T3, SP, after_frame_check);
++
++  // Note: the restored frame is not necessarily interpreted.
++  // Use the shared runtime version of the StackOverflowError.
++  __ move(SP, Rsender);
++  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
++  __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type);
++
++  // all done with frame size check
++  __ bind(after_frame_check);
++}
++
++// Allocate monitor and lock method (asm interpreter)
++// Rmethod - Method*
++void TemplateInterpreterGenerator::lock_method(void) {
++  // synchronize method
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++
++#ifdef ASSERT
++  { Label L;
++    __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T0, T0, JVM_ACC_SYNCHRONIZED);
++    __ bne(T0, R0, L);
++    __ stop("method doesn't need synchronization");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // get synchronization object
++  {
++    Label done;
++    __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T2, T0, JVM_ACC_STATIC);
++    __ ld_d(T0, LVP, Interpreter::local_offset_in_bytes(0));
++    __ beq(T2, R0, done);
++    __ load_mirror(T0, Rmethod, T4);
++    __ bind(done);
++  }
++  // add space for monitor & lock
++  __ addi_d(SP, SP, (-1) * entry_size);           // add space for a monitor entry
++  __ st_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  // set new monitor block top
++  __ st_d(T0, SP, BasicObjectLock::obj_offset_in_bytes());   // store object
++  // FIXME: I do not know what lock_object will do and what it will need
++  __ move(c_rarg0, SP);      // object address
++  __ lock_object(c_rarg0);
++}
++
++// Generate a fixed interpreter frame. This is identical setup for
++// interpreted methods and for native methods hence the shared code.
++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
++
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argumnet word n-1  ] <--- T0(sender's sp)
++  //   ...
++  // [ argument word 0    ] <--- S7
++
++  // initialize fixed part of activation frame
++  // sender's sp in Rsender
++  int i = 0;
++  int frame_size = 10;
++#ifndef CORE
++  ++frame_size;
++#endif
++  __ addi_d(SP, SP, (-frame_size) * wordSize);
++  __ st_d(RA, SP, (frame_size - 1) * wordSize);   // save return address
++  __ st_d(FP, SP, (frame_size - 2) * wordSize);  // save sender's fp
++  __ addi_d(FP, SP, (frame_size - 2) * wordSize);
++  __ st_d(Rsender, FP, (-++i) * wordSize);  // save sender's sp
++  __ st_d(R0, FP,(-++i) * wordSize);       //save last_sp as null
++  __ st_d(LVP, FP, (-++i) * wordSize);  // save locals offset
++  __ ld_d(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop
++  __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase
++  __ st_d(Rmethod, FP, (-++i) * wordSize);                              // save Method*
++  // Get mirror and store it in the frame as GC root for this Method*
++  __ load_mirror(T2, Rmethod, T4);
++  __ st_d(T2, FP, (-++i) * wordSize); // Mirror
++#ifndef CORE
++  if (ProfileInterpreter) {
++    Label method_data_continue;
++    __ ld_d(AT, Rmethod,  in_bytes(Method::method_data_offset()));
++    __ beq(AT, R0, method_data_continue);
++    __ addi_d(AT, AT, in_bytes(MethodData::data_offset()));
++    __ bind(method_data_continue);
++    __ st_d(AT, FP,  (-++i) * wordSize);
++  } else {
++    __ st_d(R0, FP, (-++i) * wordSize);
++  }
++#endif // !CORE
++
++  __ ld_d(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld_d(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld_d(T2, T2, ConstantPool::cache_offset_in_bytes());
++  __ st_d(T2, FP, (-++i) * wordSize);                    // set constant pool cache
++  if (native_call) {
++    __ st_d(R0, FP, (-++i) * wordSize);          // no bcp
++  } else {
++    __ st_d(BCP, FP, (-++i) * wordSize);          // set bcp
++  }
++  __ st_d(SP, FP, (-++i) * wordSize);               // reserve word for pointer to expression stack bottom
++  assert(i + 2 == frame_size, "i + 2 should be equal to frame_size");
++}
++
++// End of helpers
++
++// Various method entries
++//------------------------------------------------------------------------------------------------------------------------
++//
++//
++
++// Method entry for java.lang.ref.Reference.get.
++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
++  // Code: _aload_0, _getfield, _areturn
++  // parameter size = 1
++  //
++  // The code that gets generated by this routine is split into 2 parts:
++  //    1. The "intrinsified" code for G1 (or any SATB based GC),
++  //    2. The slow path - which is an expansion of the regular method entry.
++  //
++  // Notes:-
++  // * In the G1 code we do not check whether we need to block for
++  //   a safepoint. If G1 is enabled then we must execute the specialized
++  //   code for Reference.get (except when the Reference object is null)
++  //   so that we can log the value in the referent field with an SATB
++  //   update buffer.
++  //   If the code for the getfield template is modified so that the
++  //   G1 pre-barrier code is executed when the current method is
++  //   Reference.get() then going through the normal method entry
++  //   will be fine.
++  // * The G1 code can, however, check the receiver object (the instance
++  //   of java.lang.Reference) and jump to the slow path if null. If the
++  //   Reference object is null then we obviously cannot fetch the referent
++  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
++  //   regular method entry code to generate the NPE.
++  //
++  // This code is based on generate_accessor_entry.
++  //
++  // Rmethod: Method*
++  // Rsender: senderSP must preserve for slow path, set SP to it on fast path
++  // RA is live. It must be saved around calls.
++
++  address entry = __ pc();
++
++  const int referent_offset = java_lang_ref_Reference::referent_offset;
++
++  Label slow_path;
++  const Register local_0 = A0;
++  // Check if local 0 != NULL
++  // If the receiver is null then it is OK to jump to the slow path.
++  __ ld_d(local_0, Address(SP, 0));
++  __ beqz(local_0, slow_path);
++
++  // Load the value of the referent field.
++  const Address field_address(local_0, referent_offset);
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ T4, /*tmp2*/ noreg);
++
++  // areturn
++  __ move(SP, Rsender);
++  __ jr(RA);
++
++  // generate a vanilla interpreter entry as the slow path
++  __ bind(slow_path);
++  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
++  return entry;
++}
++
++// Interpreter stub for calling a native method. (asm interpreter)
++// This sets up a somewhat different looking stack for calling the
++// native method than the typical interpreter frame setup.
++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++  // Rsender: sender's sp
++  // Rmethod: Method*
++  address entry_point = __ pc();
++
++#ifndef CORE
++  const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() +
++  InvocationCounter::counter_offset()));
++#endif
++  // get parameter size (always needed)
++  // the size in the java stack
++  __ ld_d(V0, Rmethod, in_bytes(Method::const_offset()));
++  __ ld_hu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // native calls don't need the stack size check since they have no expression stack
++  // and the arguments are already on the stack and we only add a handful of words
++  // to the stack
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++
++  // for natives the size of locals is zero
++
++  // compute beginning of parameters (S7)
++  __ slli_d(LVP, V0, Address::times_8);
++  __ addi_d(LVP, LVP, (-1) * wordSize);
++  __ add_d(LVP, LVP, SP);
++
++
++  // add 2 zero-initialized slots for native calls
++  // 1 slot for native oop temp offset (setup via runtime)
++  // 1 slot for static native result handler3 (setup via runtime)
++  __ push2(R0, R0);
++
++  // Layout of frame at this point
++  // [ method holder mirror  ] <--- sp
++  // [ result type info      ]
++  // [ argument word n-1     ] <--- T0
++  //   ...
++  // [ argument word 0       ] <--- LVP
++
++
++#ifndef CORE
++  if (inc_counter) __ ld_w(T3, invocation_counter);  // (pre-)fetch invocation count
++#endif
++
++  // initialize fixed part of activation frame
++  generate_fixed_frame(true);
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- sender's sp
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++
++  // make sure method is native & not abstract
++#ifdef ASSERT
++  __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_NATIVE);
++    __ bne(AT, R0, L);
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_ABSTRACT);
++    __ beq(AT, R0, L);
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ li(AT, (int)true);
++  __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++#endif // CORE
++
++  bang_stack_shadow_pages(true);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  if (synchronized) {
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    {
++      Label L;
++      __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(AT, T0, JVM_ACC_SYNCHRONIZED);
++      __ beq(AT, R0, L);
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // after method_lock, the layout of frame is as following
++  //
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ stop("broken stack frame setup in interpreter in asm");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  // work registers
++  const Register method = Rmethod;
++  const Register t      = T8;
++
++  __ get_method(method);
++  {
++    Label L, Lstatic;
++    __ ld_d(t,method,in_bytes(Method::const_offset()));
++    __ ld_hu(t, t, in_bytes(ConstMethod::size_of_parameters_offset()));
++    // LoongArch ABI: caller does not reserve space for the register auguments.
++    // A0 and A1(if needed)
++    __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, AT, JVM_ACC_STATIC);
++    __ beq(AT, R0, Lstatic);
++    __ addi_d(t, t, 1);
++    __ bind(Lstatic);
++    __ addi_d(t, t, -7);
++    __ bge(R0, t, L);
++    __ slli_d(t, t, Address::times_8);
++    __ sub_d(SP, SP, t);
++    __ bind(L);
++  }
++  __ li(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  __ move(AT, SP);
++  // [                          ] <--- sp
++  //   ...                        (size of parameters - 8 )
++  // [ monitor entry            ]
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++  // get signature handler
++  {
++    Label L;
++    __ ld_d(T4, method, in_bytes(Method::signature_handler_offset()));
++    __ bne(T4, R0, L);
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++               InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld_d(T4, method, in_bytes(Method::signature_handler_offset()));
++    __ bind(L);
++  }
++
++  // call signature handler
++  // FIXME: when change codes in InterpreterRuntime, note this point
++  // from: begin of parameters
++  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code");
++  // to: current sp
++  assert(InterpreterRuntime::SignatureHandlerGenerator::to  () == SP, "adjust this code");
++  // temp: T3
++  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t  , "adjust this code");
++
++  __ jalr(T4);
++  __ get_method(method);
++
++  //
++  // if native function is static, and its second parameter has type length of double word,
++  // and first parameter has type length of word, we have to reserve one word
++  // for the first parameter, according to LoongArch abi.
++  // if native function is not static, and its third parameter has type length of double word,
++  // and second parameter has type length of word, we have to reserve one word for the second
++  // parameter.
++  //
++
++
++  // result handler is in V0
++  // set result handler
++  __ st_d(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize);
++
++#define FIRSTPARA_SHIFT_COUNT 5
++#define SECONDPARA_SHIFT_COUNT 9
++#define THIRDPARA_SHIFT_COUNT 13
++#define PARA_MASK  0xf
++
++  // pass mirror handle if static call
++  {
++    Label L;
++    __ ld_w(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, t, JVM_ACC_STATIC);
++    __ beq(AT, R0, L);
++
++    // get mirror
++    __ load_mirror(t, method, T4);
++    // copy mirror into activation frame
++    __ st_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    // pass handle to mirror
++    __ addi_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    __ move(A1, t);
++    __ bind(L);
++  }
++
++  // [ mthd holder mirror ptr   ] <--- sp  --------------------| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters(or +1)    |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Mirror                   ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // get native function entry point
++  { Label L;
++    __ ld_d(T4, method, in_bytes(Method::native_function_offset()));
++    __ li(T6, SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
++    __ bne(T6, T4, L);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld_d(T4, method, in_bytes(Method::native_function_offset()));
++    __ bind(L);
++  }
++
++  // pass JNIEnv
++  // native function in T4
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ addi_d(t, thread, in_bytes(JavaThread::jni_environment_offset()));
++  __ move(A0, t);
++  // [ jni environment          ] <--- sp
++  // [ mthd holder mirror ptr   ] ---------------------------->| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters           |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Mirror                   ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // Set the last Java PC in the frame anchor to be the return address from
++  // the call to the native method: this will allow the debugger to
++  // generate an accurate stack trace.
++  Label native_return;
++  __ set_last_Java_frame(thread, SP, FP, native_return);
++
++  // change thread state
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++    __ addi_d(t, t, (-1) * _thread_in_Java);
++    __ beq(t, R0, L);
++    __ stop("Wrong thread state in native stub");
++    __ bind(L);
++  }
++#endif
++
++  __ li(t, _thread_in_native);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  // call native method
++  __ jalr(T4);
++  __ bind(native_return);
++  // result potentially in V0 or F0
++
++
++  // via _last_native_pc and not via _last_jave_sp
++  // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result.
++  //  If the order changes or anything else is added to the stack the code in
++  // interpreter_frame_result will have to be changed.
++  //FIXME, should modify here
++  // save return value to keep the value from being destroyed by other calls
++  __ push(dtos);
++  __ push(ltos);
++
++  // change thread state
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ li(t, _thread_in_native_trans);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) {
++    if (UseMembar) {
++      // Force this write out before the read below
++      __ membar(__ AnyAny);
++    } else {
++      // Write serialization page so VM thread can do a pseudo remote membar.
++      // We use the current thread pointer to calculate a thread specific
++      // offset to write to within the page. This minimizes bus traffic
++      // due to cache line collision.
++      __ serialize_memory(thread, A0);
++    }
++  }
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  { Label Continue;
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are
++    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
++    // by hand.
++    //
++    Label slow_path;
++
++    __ safepoint_poll_acquire(slow_path, thread);
++    __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ bind(slow_path);
++    __ move(A0, thread);
++    __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
++                             relocInfo::runtime_call_type);
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ li(t, _thread_in_Java);
++  if (os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
++  }
++  __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
++  __ reset_last_Java_frame(thread, true);
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ st_d(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  // reset handle block
++  __ ld_d(t, thread, in_bytes(JavaThread::active_handles_offset()));
++  __ st_w(R0, t, JNIHandleBlock::top_offset_in_bytes());
++
++  // If result was an oop then unbox and save it in the frame
++  {
++    Label no_oop;
++    __ ld_d(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize);
++    __ li(T0, AbstractInterpreter::result_handler(T_OBJECT));
++    __ bne(AT, T0, no_oop);
++    __ pop(ltos);
++    // Unbox oop result, e.g. JNIHandles::resolve value.
++    __ resolve_jobject(V0, thread, T4);
++    __ st_d(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize);
++    // keep stack depth as expected by pushing oop which will eventually be discarded
++    __ push(ltos);
++    __ bind(no_oop);
++  }
++  {
++    Label no_reguard;
++    __ ld_w(t, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++    __ li(AT, (u1)JavaThread::stack_guard_yellow_reserved_disabled);
++    __ bne(t, AT, no_reguard);
++    __ pushad();
++    __ move(S5_heapbase, SP);
++    __ li(AT, -StackAlignmentInBytes);
++    __ andr(SP, SP, AT);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type);
++    __ move(SP, S5_heapbase);
++    __ popad();
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(no_reguard);
++  }
++  // restore BCP to have legal interpreter frame,
++  // i.e., bci == 0 <=> BCP == code_base()
++  // Can't call_VM until bcp is within reasonable.
++  __ get_method(method);      // method is junk from thread_in_native to now.
++  __ ld_d(BCP, method, in_bytes(Method::const_offset()));
++  __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset())));
++  // handle exceptions (exception handling will handle unlocking!)
++  {
++    Label L;
++    __ ld_d(t, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(t, R0, L);
++    // Note: At some point we may want to unify this with the code used in
++    // call_VM_base();
++    // i.e., we should use the StubRoutines::forward_exception code. For now this
++    // doesn't work here because the sp is not correctly set at this point.
++    __ MacroAssembler::call_VM(noreg,
++                               CAST_FROM_FN_PTR(address,
++                               InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++
++  // do unlocking if necessary
++  {
++    Label L;
++    __ ld_w(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(t, t, JVM_ACC_SYNCHRONIZED);
++    __ addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock));
++    __ beq(t, R0, L);
++    // the code below should be shared with interpreter macro assembler implementation
++    {
++      Label unlock;
++      // BasicObjectLock will be first in list,
++      // since this is a synchronized method. However, need
++      // to check that the object has not been unlocked by
++      // an explicit monitorexit bytecode.
++      // address of first monitor
++
++      __ ld_d(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++      __ bne(t, R0, unlock);
++
++      // Entry already unlocked, need to throw exception
++      __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::throw_illegal_monitor_state_exception));
++      __ should_not_reach_here();
++
++      __ bind(unlock);
++      __ unlock_object(c_rarg0);
++    }
++    __ bind(L);
++  }
++
++  // jvmti/jvmpi support
++  // Note: This must happen _after_ handling/throwing any exceptions since
++  //       the exception handler code notifies the runtime of method exits
++  //       too. If this happens before, method entry/exit notifications are
++  //       not properly paired (was bug - gri 11/22/99).
++  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
++
++  // restore potential result in V0,
++  // call result handler to restore potential result in ST0 & handle result
++
++  __ pop(ltos);
++  __ pop(dtos);
++
++  __ ld_d(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize);
++  __ jalr(t);
++
++
++  // remove activation
++  __ ld_d(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp
++  __ ld_d(RA, FP, frame::java_frame_return_addr_offset * wordSize); // get return address
++  __ ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp
++  __ jr(RA);
++
++#ifndef CORE
++  if (inc_counter) {
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++    // entry_point is the beginning of this
++    // function and checks again for compiled code
++  }
++#endif
++  return entry_point;
++}
++
++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
++  // Quick & dirty stack overflow checking: bang the stack & handle trap.
++  // Note that we do the banging after the frame is setup, since the exception
++  // handling code expects to find a valid interpreter frame on the stack.
++  // Doing the banging earlier fails if the caller frame is not an interpreter
++  // frame.
++  // (Also, the exception throwing code expects to unlock any synchronized
++  // method receiever, so do the banging after locking the receiver.)
++
++  // Bang each page in the shadow zone. We can't assume it's been done for
++  // an interpreter frame with greater than a page of locals, so each page
++  // needs to be checked.  Only true for non-native.
++  if (UseStackBanging) {
++    const int page_size = os::vm_page_size();
++    const int n_shadow_pages = ((int)JavaThread::stack_shadow_zone_size()) / page_size;
++    const int start_page = native_call ? n_shadow_pages : 1;
++    BLOCK_COMMENT("bang_stack_shadow_pages:");
++    for (int pages = start_page; pages <= n_shadow_pages; pages++) {
++      __ bang_stack_with_offset(pages*page_size);
++    }
++  }
++}
++
++//
++// Generic interpreted method entry to (asm) interpreter
++//
++// Layout of frame just at the entry
++//
++//   [ argument word n-1  ] <--- sp
++//     ...
++//   [ argument word 0    ]
++// assume Method* in Rmethod before call this method.
++// prerequisites to the generated stub : the callee Method* in Rmethod
++// note you must save the caller bcp before call the generated stub
++//
++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++
++  // Rmethod: Method*
++  // Rsender: sender 's sp
++  address entry_point = __ pc();
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++  const Address invocation_counter(Rmethod,
++      in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset()));
++
++  // get parameter size (always needed)
++  __ ld_d(T3, Rmethod, in_bytes(Method::const_offset()));  //T3 --> Rmethod._constMethod
++  __ ld_hu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i
++  // get size of locals in words to T2
++  __ ld_hu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset()));
++  // T2 = no. of additional locals, locals include parameters
++  __ sub_d(T2, T2, V0);
++
++  // see if we've got enough room on the stack for locals plus overhead.
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++  generate_stack_overflow_check();
++  // after this function, the layout of frame does not change
++
++  // compute beginning of parameters (LVP)
++  __ slli_d(LVP, V0, LogBytesPerWord);
++  __ addi_d(LVP, LVP, (-1) * wordSize);
++  __ add_d(LVP, LVP, SP);
++
++  // T2 - # of additional locals
++  // allocate space for locals
++  // explicitly initialize locals
++  {
++    Label exit, loop;
++    __ beq(T2, R0, exit);
++
++    __ bind(loop);
++    __ addi_d(SP, SP, (-1) * wordSize);
++    __ addi_d(T2, T2, -1);               // until everything initialized
++    __ st_d(R0, SP, 0);                  // initialize local variables
++    __ bne(T2, R0, loop);
++
++    __ bind(exit);
++  }
++
++  //
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argument word n-1  ] <--- T0?
++  //   ...
++  // [ argument word 0    ] <--- LVP
++
++  // initialize fixed part of activation frame
++
++  generate_fixed_frame(false);
++
++
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ] <--- fp
++  // [ return address           ]
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // make sure method is not native & not abstract
++#ifdef ASSERT
++  __ ld_d(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_NATIVE);
++    __ beq(T2, R0, L);
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_ABSTRACT);
++    __ beq(T2, R0, L);
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ li(AT, (int)true);
++  __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++
++  // mdp : T8
++  // tmp1: T4
++  // tmp2: T2
++   __ profile_parameters_type(T8, T4, T2);
++
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  Label profile_method;
++  Label profile_method_continue;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow,
++                          &profile_method,
++                          &profile_method_continue);
++    if (ProfileInterpreter) {
++      __ bind(profile_method_continue);
++    }
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++
++#endif // CORE
++
++  bang_stack_shadow_pages(false);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  //
++  if (synchronized) {
++    // Allocate monitor and lock method
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    { Label L;
++      __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(T2, AT, JVM_ACC_SYNCHRONIZED);
++      __ beq(T2, R0, L);
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // layout of frame after lock_method
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ stop("broken stack frame setup in interpreter in native");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  __ dispatch_next(vtos);
++
++  // invocation counter overflow
++  if (inc_counter) {
++    if (ProfileInterpreter) {
++      // We have decided to profile this method in the interpreter
++      __ bind(profile_method);
++      __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                 InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      __ get_method(Rmethod);
++      __ b(profile_method_continue);
++    }
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++  }
++
++  return entry_point;
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateInterpreterGenerator::generate_throw_exception() {
++  // Entry point in previous activation (i.e., if the caller was
++  // interpreted)
++  Interpreter::_rethrow_exception_entry = __ pc();
++  // Restore sp to interpreter_frame_last_sp even though we are going
++  // to empty the expression stack for the exception processing.
++  __ st_d(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  // V0: exception
++  // V1: return address/pc that threw exception
++  __ restore_bcp();                              // BCP points to call/send
++  __ restore_locals();
++
++  //add for compressedoops
++  __ reinit_heapbase();
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++  // Entry point for exceptions thrown within interpreter code
++  Interpreter::_throw_exception_entry = __ pc();
++  // expression stack is undefined here
++  // V0: exception
++  // BCP: exception bcp
++  __ verify_oop(V0);
++
++  // expression stack must be empty before entering the VM in case of an exception
++  __ empty_expression_stack();
++  // find exception handler address and preserve exception oop
++  __ move(A1, V0);
++  __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1);
++  // V0: exception handler entry point
++  // V1: preserved exception oop
++  // S0: bcp for exception handler
++  __ push(V1);                                 // push exception which is now the only value on the stack
++  __ jr(V0);                                   // jump to exception handler (may be _remove_activation_entry!)
++
++  // If the exception is not handled in the current frame the frame is removed and
++  // the exception is rethrown (i.e. exception continuation is _rethrow_exception).
++  //
++  // Note: At this point the bci is still the bxi for the instruction which caused
++  //       the exception and the expression stack is empty. Thus, for any VM calls
++  //       at this point, GC will find a legal oop map (with empty expression stack).
++
++  // In current activation
++  // V0: exception
++  // BCP: exception bcp
++
++  //
++  // JVMTI PopFrame support
++  //
++
++  Interpreter::_remove_activation_preserving_args_entry = __ pc();
++  __ empty_expression_stack();
++  // Set the popframe_processing bit in pending_popframe_condition indicating that we are
++  // currently handling popframe, so that call_VMs that may happen later do not trigger new
++  // popframe handling cycles.
++#ifndef OPT_THREAD
++  Register thread = T2;
++  __ get_thread(T2);
++#else
++  Register thread = TREG;
++#endif
++  __ ld_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++  __ ori(T3, T3, JavaThread::popframe_processing_bit);
++  __ st_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#ifndef CORE
++  {
++    // Check to see whether we are returning to a deoptimized frame.
++    // (The PopFrame call ensures that the caller of the popped frame is
++    // either interpreted or compiled and deoptimizes it if compiled.)
++    // In this case, we can't call dispatch_next() after the frame is
++    // popped, but instead must save the incoming arguments and restore
++    // them after deoptimization has occurred.
++    //
++    // Note that we don't compare the return PC against the
++    // deoptimization blob's unpack entry because of the presence of
++    // adapter frames in C2.
++    Label caller_not_deoptimized;
++    __ ld_d(A0, FP, frame::java_frame_return_addr_offset * wordSize);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0);
++    __ bne(V0, R0, caller_not_deoptimized);
++
++    // Compute size of arguments for saving when returning to deoptimized caller
++    __ get_method(A1);
++    __ verify_oop(A1);
++    __ ld_d(A1, A1, in_bytes(Method::const_offset()));
++    __ ld_hu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset()));
++    __ shl(A1, Interpreter::logStackElementSize);
++    __ restore_locals();
++    __ sub_d(A2, LVP, A1);
++    __ addi_d(A2, A2, wordSize);
++    // Save these arguments
++#ifndef OPT_THREAD
++    __ get_thread(A0);
++#else
++    __ move(A0, TREG);
++#endif
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2);
++
++    __ remove_activation(vtos, T4, false, false, false);
++
++    // Inform deoptimization that it is responsible for restoring these arguments
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ li(AT, JavaThread::popframe_force_deopt_reexecution_bit);
++    __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++    // Continue in deoptimization handler
++    __ jr(T4);
++
++    __ bind(caller_not_deoptimized);
++  }
++#endif /* !CORE */
++
++  __ remove_activation(vtos, T3,
++                       /* throw_monitor_exception */ false,
++                       /* install_monitor_exception */ false,
++                       /* notify_jvmdi */ false);
++
++  // Clear the popframe condition flag
++  // Finish with popframe handling
++  // A previous I2C followed by a deoptimization might have moved the
++  // outgoing arguments further up the stack. PopFrame expects the
++  // mutations to those outgoing arguments to be preserved and other
++  // constraints basically require this frame to look exactly as
++  // though it had previously invoked an interpreted activation with
++  // no space between the top of the expression stack (current
++  // last_sp) and the top of stack. Rather than force deopt to
++  // maintain this kind of invariant all the time we call a small
++  // fixup routine to move the mutated arguments onto the top of our
++  // expression stack if necessary.
++  __ move(T8, SP);
++  __ ld_d(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // PC must point into interpreter here
++  Label L;
++  __ bind(L);
++  __ set_last_Java_frame(thread, noreg, FP, L);
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2);
++  __ reset_last_Java_frame(thread, true);
++  // Restore the last_sp and null it out
++  __ ld_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++
++
++  __ li(AT, JavaThread::popframe_inactive);
++  __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++  // Finish with popframe handling
++  __ restore_bcp();
++  __ restore_locals();
++  // S8 be used in C2
++  __ li(S8, (long)Interpreter::dispatch_table(itos));
++#ifndef CORE
++  // The method data pointer was incremented already during
++  // call profiling. We have to restore the mdp for the current bcp.
++  if (ProfileInterpreter) {
++    __ set_method_data_pointer_for_bcp();
++  }
++#endif // !CORE
++  // Clear the popframe condition flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ li(AT, JavaThread::popframe_inactive);
++  __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#if INCLUDE_JVMTI
++  {
++    Label L_done;
++
++    __ ld_bu(AT, BCP, 0);
++    __ addi_d(AT, AT, -1 * Bytecodes::_invokestatic);
++    __ bne(AT, R0, L_done);
++
++    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
++    // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
++
++    __ get_method(T4);
++    __ ld_d(T8, LVP, 0);
++    __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T4, BCP);
++
++    __ beq(T8, R0, L_done);
++
++    __ st_d(T8, SP, 0);
++    __ bind(L_done);
++  }
++#endif // INCLUDE_JVMTI
++
++  __ dispatch_next(vtos);
++  // end of PopFrame support
++
++  Interpreter::_remove_activation_entry = __ pc();
++
++  // preserve exception over this code sequence
++  __ pop(T0);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_d(T0, thread, in_bytes(JavaThread::vm_result_offset()));
++  // remove the activation (without doing throws on illegalMonitorExceptions)
++  __ remove_activation(vtos, T3, false, true, false);
++  // restore exception
++  __ get_vm_result(T0, thread);
++  __ verify_oop(T0);
++
++  // In between activations - previous activation type unknown yet
++  // compute continuation point - the continuation point expects
++  // the following registers set up:
++  //
++  // T0: exception
++  // T1: return address/pc that threw exception
++  // SP: expression stack of caller
++  // FP: fp of caller
++  __ push2(T0, T3);             // save exception and return address
++  __ move(A1, T3);
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++  __ move(T4, V0);                             // save exception handler
++  __ pop2(V0, V1);                   // restore return address and exception
++
++  // Note that an "issuing PC" is actually the next PC after the call
++  __ jr(T4);                                   // jump to exception handler of caller
++}
++
++
++//
++// JVMTI ForceEarlyReturn support
++//
++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
++  address entry = __ pc();
++  __ restore_bcp();
++  __ restore_locals();
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ load_earlyret_value(state);
++
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ ld_ptr(T4, TREG, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address cond_addr(T4, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++  // Clear the earlyret state
++  __ li(AT, JvmtiThreadState::earlyret_inactive);
++  __ st_w(AT, cond_addr);
++  __ membar(__ AnyAny);//no membar here for aarch64
++
++
++  __ remove_activation(state, T0,
++                         false, /* throw_monitor_exception */
++                         false, /* install_monitor_exception */
++                         true); /* notify_jvmdi */
++  __ membar(__ AnyAny);
++  __ jr(T0);
++
++  return entry;
++} // end of ForceEarlyReturn support
++
++
++//-----------------------------------------------------------------------------
++// Helper for vtos entry point generation
++
++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
++                                                         address& bep,
++                                                         address& cep,
++                                                         address& sep,
++                                                         address& aep,
++                                                         address& iep,
++                                                         address& lep,
++                                                         address& fep,
++                                                         address& dep,
++                                                         address& vep) {
++  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
++  Label L;
++  fep = __ pc(); __ push(ftos); __ b(L);
++  dep = __ pc(); __ push(dtos); __ b(L);
++  lep = __ pc(); __ push(ltos); __ b(L);
++  aep  =__ pc(); __ push(atos); __ b(L);
++  bep = cep = sep =
++  iep = __ pc(); __ push(itos);
++  vep = __ pc();
++  __ bind(L);
++  generate_and_dispatch(t);
++}
++
++//-----------------------------------------------------------------------------
++
++// Non-product code
++#ifndef PRODUCT
++address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
++  address entry = __ pc();
++
++  // prepare expression stack
++  __ push(state);       // save tosca
++
++  // tos & tos2
++  // trace_bytecode need actually 4 args, the last two is tos&tos2
++  // this work fine for x86. but LA ABI calling convention will store A2-A3
++  // to the stack position it think is the tos&tos2
++  // when the expression stack have no more than 2 data, error occur.
++  __ ld_d(A2, SP, 0);
++  __ ld_d(A3, SP, 1 * wordSize);
++
++  // pass arguments & call tracer
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3);
++  __ move(RA, V0);    // make sure return address is not destroyed by pop(state)
++
++  // restore expression stack
++  __ pop(state);        // restore tosca
++
++  // return
++  __ jr(RA);
++  return entry;
++}
++
++void TemplateInterpreterGenerator::count_bytecode() {
++  __ li(T8, (long)&BytecodeCounter::_counter_value);
++  __ ld_w(AT, T8, 0);
++  __ addi_d(AT, AT, 1);
++  __ st_w(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {
++  __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]);
++  __ ld_w(AT, T8, 0);
++  __ addi_d(AT, AT, 1);
++  __ st_w(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ ld_w(T4, T8, 0);
++  __ srli_d(T4, T4, BytecodePairHistogram::log2_number_of_codes);
++  __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes);
++  __ orr(T4, T4, T8);
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ st_w(T4, T8, 0);
++  __ slli_d(T4, T4, 2);
++  __ li(T8, (long)BytecodePairHistogram::_counters);
++  __ add_d(T8, T8, T4);
++  __ ld_w(AT, T8, 0);
++  __ addi_d(AT, AT, 1);
++  __ st_w(AT, T8, 0);
++}
++
++
++void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
++  // Call a little run-time stub to avoid blow-up for each bytecode.
++  // The run-time runtime saves the right registers, depending on
++  // the tosca in-state for the given template.
++  address entry = Interpreter::trace_code(t->tos_in());
++  assert(entry != NULL, "entry must have been generated");
++  __ call(entry, relocInfo::none);
++  //add for compressedoops
++  __ reinit_heapbase();
++}
++
++
++void TemplateInterpreterGenerator::stop_interpreter_at() {
++  Label L;
++  __ li(T8, long(&BytecodeCounter::_counter_value));
++  __ ld_w(T8, T8, 0);
++  __ li(AT, StopInterpreterAt);
++  __ bne(T8, AT, L);
++  __ brk(5);
++  __ bind(L);
++}
++#endif // !PRODUCT
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,4115 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/templateTable.hpp"
++#include "memory/universe.hpp"
++#include "oops/methodData.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "utilities/macros.hpp"
++
++
++#ifndef CC_INTERP
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T6 RT6
++#define T8 RT8
++
++// Platform-dependent initialization
++
++void TemplateTable::pd_initialize() {
++  // No LoongArch specific initialization
++}
++
++// Address computation: local variables
++
++static inline Address iaddress(int n) {
++  return Address(LVP, Interpreter::local_offset_in_bytes(n));
++}
++
++static inline Address laddress(int n) {
++  return iaddress(n + 1);
++}
++
++static inline Address faddress(int n) {
++  return iaddress(n);
++}
++
++static inline Address daddress(int n) {
++  return laddress(n);
++}
++
++static inline Address aaddress(int n) {
++  return iaddress(n);
++}
++static inline Address haddress(int n)            { return iaddress(n + 0); }
++
++
++static inline Address at_sp()             {  return Address(SP,   0); }
++static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
++static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
++
++// At top of Java expression stack which may be different than sp().
++// It isn't for category 1 objects.
++static inline Address at_tos   () {
++  Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
++  return tos;
++}
++
++static inline Address at_tos_p1() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(1));
++}
++
++static inline Address at_tos_p2() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(2));
++}
++
++static inline Address at_tos_p3() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(3));
++}
++
++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
++Address TemplateTable::at_bcp(int offset) {
++  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
++  return Address(BCP, offset);
++}
++
++// Miscelaneous helper routines
++// Store an oop (or NULL) at the address described by obj.
++// If val == noreg this means store a NULL
++static void do_oop_store(InterpreterMacroAssembler* _masm,
++                         Address dst,
++                         Register val,
++                         DecoratorSet decorators = 0) {
++  assert(val == noreg || val == V0, "parameter is just for looks");
++  __ store_heap_oop(dst, val, T4, T1, decorators);
++}
++
++static void do_oop_load(InterpreterMacroAssembler* _masm,
++                        Address src,
++                        Register dst,
++                        DecoratorSet decorators = 0) {
++  __ load_heap_oop(dst, src, T4, T1, decorators);
++}
++
++// bytecode folding
++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
++                                   Register tmp_reg, bool load_bc_into_bc_reg/*=true*/,
++                                   int byte_no) {
++  if (!RewriteBytecodes)  return;
++  Label L_patch_done;
++
++  switch (bc) {
++  case Bytecodes::_fast_aputfield:
++  case Bytecodes::_fast_bputfield:
++  case Bytecodes::_fast_zputfield:
++  case Bytecodes::_fast_cputfield:
++  case Bytecodes::_fast_dputfield:
++  case Bytecodes::_fast_fputfield:
++  case Bytecodes::_fast_iputfield:
++  case Bytecodes::_fast_lputfield:
++  case Bytecodes::_fast_sputfield:
++    {
++      // We skip bytecode quickening for putfield instructions when
++      // the put_code written to the constant pool cache is zero.
++      // This is required so that every execution of this instruction
++      // calls out to InterpreterRuntime::resolve_get_put to do
++      // additional, required work.
++      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
++      __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
++      __ addi_d(bc_reg, R0, bc);
++      __ beq(tmp_reg, R0, L_patch_done);
++    }
++    break;
++  default:
++    assert(byte_no == -1, "sanity");
++    // the pair bytecodes have already done the load.
++    if (load_bc_into_bc_reg) {
++      __ li(bc_reg, bc);
++    }
++  }
++
++  if (JvmtiExport::can_post_breakpoint()) {
++    Label L_fast_patch;
++    // if a breakpoint is present we can't rewrite the stream directly
++    __ ld_bu(tmp_reg, at_bcp(0));
++    __ li(AT, Bytecodes::_breakpoint);
++    __ bne(tmp_reg, AT, L_fast_patch);
++
++    __ get_method(tmp_reg);
++    // Let breakpoint table handling rewrite to quicker bytecode
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
++
++    __ b(L_patch_done);
++    __ bind(L_fast_patch);
++  }
++
++#ifdef ASSERT
++  Label L_okay;
++  __ ld_bu(tmp_reg, at_bcp(0));
++  __ li(AT, (int)Bytecodes::java_code(bc));
++  __ beq(tmp_reg, AT, L_okay);
++  __ beq(tmp_reg, bc_reg, L_patch_done);
++  __ stop("patching the wrong bytecode");
++  __ bind(L_okay);
++#endif
++
++  // patch bytecode
++  __ st_b(bc_reg, at_bcp(0));
++  __ bind(L_patch_done);
++}
++
++
++// Individual instructions
++
++void TemplateTable::nop() {
++  transition(vtos, vtos);
++  // nothing to do
++}
++
++void TemplateTable::shouldnotreachhere() {
++  transition(vtos, vtos);
++  __ stop("shouldnotreachhere bytecode");
++}
++
++void TemplateTable::aconst_null() {
++  transition(vtos, atos);
++  __ move(FSR, R0);
++}
++
++void TemplateTable::iconst(int value) {
++  transition(vtos, itos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ li(FSR, value);
++  }
++}
++
++void TemplateTable::lconst(int value) {
++  transition(vtos, ltos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ li(FSR, value);
++  }
++}
++
++void TemplateTable::fconst(int value) {
++  transition(vtos, ftos);
++  switch( value ) {
++    case 0:  __ movgr2fr_w(FSF, R0);    return;
++    case 1:  __ addi_d(AT, R0, 1); break;
++    case 2:  __ addi_d(AT, R0, 2); break;
++    default: ShouldNotReachHere();
++  }
++  __ movgr2fr_w(FSF, AT);
++  __ ffint_s_w(FSF, FSF);
++}
++
++void TemplateTable::dconst(int value) {
++  transition(vtos, dtos);
++  switch( value ) {
++    case 0:  __ movgr2fr_d(FSF, R0);
++             return;
++    case 1:  __ addi_d(AT, R0, 1);
++             __ movgr2fr_d(FSF, AT);
++             __ ffint_d_w(FSF, FSF);
++             break;
++    default: ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::bipush() {
++  transition(vtos, itos);
++  __ ld_b(FSR, at_bcp(1));
++}
++
++void TemplateTable::sipush() {
++  transition(vtos, itos);
++  __ ld_b(FSR, BCP, 1);
++  __ ld_bu(AT, BCP, 2);
++  __ slli_d(FSR, FSR, 8);
++  __ orr(FSR, FSR, AT);
++}
++
++// T1 : tags
++// T2 : index
++// T3 : cpool
++// T8 : tag
++void TemplateTable::ldc(bool wide) {
++  transition(vtos, vtos);
++  Label call_ldc, notFloat, notClass, notInt, Done;
++  // get index in cpool
++  if (wide) {
++    __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++  } else {
++    __ ld_bu(T2, at_bcp(1));
++  }
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type
++  __ add_d(AT, T1, T2);
++  __ ld_b(T1, AT, tags_offset);
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  }
++  //now T1 is the tag
++
++  // unresolved class - get the resolved class
++  __ addi_d(AT, T1, - JVM_CONSTANT_UnresolvedClass);
++  __ beq(AT, R0, call_ldc);
++
++  // unresolved class in error (resolution failed) - call into runtime
++  // so that the same error from first resolution attempt is thrown.
++  __ addi_d(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
++  __ beq(AT, R0, call_ldc);
++
++  // resolved class - need to call vm to get java mirror of the class
++  __ addi_d(AT, T1, - JVM_CONSTANT_Class);
++  __ slli_d(T2, T2, Address::times_8);
++  __ bne(AT, R0, notClass);
++
++  __ bind(call_ldc);
++  __ li(A1, wide);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
++  //__ push(atos);
++  __ addi_d(SP, SP, - Interpreter::stackElementSize);
++  __ st_d(FSR, SP, 0);
++  __ b(Done);
++
++  __ bind(notClass);
++  __ addi_d(AT, T1, -JVM_CONSTANT_Float);
++  __ bne(AT, R0, notFloat);
++  // ftos
++  __ add_d(AT, T3, T2);
++  __ fld_s(FSF, AT, base_offset);
++  //__ push_f();
++  __ addi_d(SP, SP, - Interpreter::stackElementSize);
++  __ fst_s(FSF, SP, 0);
++  __ b(Done);
++
++  __ bind(notFloat);
++  __ addi_d(AT, T1, -JVM_CONSTANT_Integer);
++  __ bne(AT, R0, notInt);
++  // itos
++  __ add_d(T0, T3, T2);
++  __ ld_w(FSR, T0, base_offset);
++  __ push(itos);
++  __ b(Done);
++
++  // assume the tag is for condy; if not, the VM runtime will tell us
++  __ bind(notInt);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++void TemplateTable::condy_helper(Label& Done) {
++  const Register obj = FSR;
++  const Register off = SSR;
++  const Register flags = T3;
++  const Register rarg = A1;
++  __ li(rarg, (int)bytecode());
++  __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg);
++  __ get_vm_result_2(flags, TREG);
++  // VMr = obj = base address to find primitive value to push
++  // VMr2 = flags = (tos, off) using format of CPCE::_flags
++  __ li(AT, ConstantPoolCacheEntry::field_index_mask);
++  __ andr(off, flags, AT);
++  __ add_d(obj, off, obj);
++  const Address field(obj, 0 * wordSize);
++
++  // What sort of thing are we loading?
++  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++
++  switch (bytecode()) {
++  case Bytecodes::_ldc:
++  case Bytecodes::_ldc_w:
++    {
++      // tos in (itos, ftos, stos, btos, ctos, ztos)
++      Label notInt, notFloat, notShort, notByte, notChar, notBool;
++      __ addi_d(AT, flags, -itos);
++      __ bne(AT, R0, notInt);
++      // itos
++      __ ld_d(obj, field);
++      __ push(itos);
++      __ b(Done);
++
++      __ bind(notInt);
++      __ addi_d(AT, flags, -ftos);
++      __ bne(AT, R0, notFloat);
++      // ftos
++      __ fld_s(FSF, field);
++      __ push(ftos);
++      __ b(Done);
++
++      __ bind(notFloat);
++      __ addi_d(AT, flags, -stos);
++      __ bne(AT, R0, notShort);
++      // stos
++      __ ld_h(obj, field);
++      __ push(stos);
++      __ b(Done);
++
++      __ bind(notShort);
++      __ addi_d(AT, flags, -btos);
++      __ bne(AT, R0, notByte);
++      // btos
++      __ ld_b(obj, field);
++      __ push(btos);
++      __ b(Done);
++
++      __ bind(notByte);
++      __ addi_d(AT, flags, -ctos);
++      __ bne(AT, R0, notChar);
++      // ctos
++      __ ld_hu(obj, field);
++      __ push(ctos);
++      __ b(Done);
++
++      __ bind(notChar);
++      __ addi_d(AT, flags, -ztos);
++      __ bne(AT, R0, notBool);
++      // ztos
++      __ ld_bu(obj, field);
++      __ push(ztos);
++      __ b(Done);
++
++      __ bind(notBool);
++      break;
++    }
++
++  case Bytecodes::_ldc2_w:
++    {
++      Label notLong, notDouble;
++      __ addi_d(AT, flags, -ltos);
++      __ bne(AT, R0, notLong);
++      // ltos
++      __ ld_d(obj, field);
++      __ push(ltos);
++      __ b(Done);
++
++      __ bind(notLong);
++      __ addi_d(AT, flags, -dtos);
++      __ bne(AT, R0, notDouble);
++      // dtos
++      __ fld_d(FSF, field);
++      __ push(dtos);
++      __ b(Done);
++
++      __ bind(notDouble);
++      break;
++    }
++
++  default:
++    ShouldNotReachHere();
++  }
++
++  __ stop("bad ldc/condy");
++}
++
++// Fast path for caching oop constants.
++void TemplateTable::fast_aldc(bool wide) {
++  transition(vtos, atos);
++
++  Register result = FSR;
++  Register tmp = SSR;
++  Register rarg = A1;
++  int index_size = wide ? sizeof(u2) : sizeof(u1);
++
++  Label resolved;
++
++  // We are resolved if the resolved reference cache entry contains a
++  // non-null object (String, MethodType, etc.)
++  assert_different_registers(result, tmp);
++  __ get_cache_index_at_bcp(tmp, 1, index_size);
++  __ load_resolved_reference_at_index(result, tmp, T4);
++  __ bne(result, R0, resolved);
++
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
++  // first time invocation - must resolve first
++  int i = (int)bytecode();
++  __ li(rarg, i);
++  __ call_VM(result, entry, rarg);
++
++  __ bind(resolved);
++
++  { // Check for the null sentinel.
++    // If we just called the VM, it already did the mapping for us,
++    // but it's harmless to retry.
++    Label notNull;
++    __ li(rarg, (long)Universe::the_null_sentinel_addr());
++    __ ld_ptr(tmp, Address(rarg));
++    __ bne(tmp, result, notNull);
++    __ xorr(result, result, result);  // NULL object reference
++    __ bind(notNull);
++  }
++
++  if (VerifyOops) {
++    __ verify_oop(result);
++  }
++}
++
++// used register: T2, T3, T1
++// T2 : index
++// T3 : cpool
++// T1 : tag
++void TemplateTable::ldc2_w() {
++  transition(vtos, vtos);
++  Label notDouble, notLong, Done;
++
++  // get index in cpool
++  __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type in T1
++  __ add_d(AT, T1, T2);
++  __ ld_b(T1, AT, tags_offset);
++
++  __ addi_d(AT, T1, -JVM_CONSTANT_Double);
++  __ bne(AT, R0, notDouble);
++
++  // dtos
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ fld_d(FSF, AT, base_offset);
++  __ push(dtos);
++  __ b(Done);
++
++  __ bind(notDouble);
++  __ addi_d(AT, T1, -JVM_CONSTANT_Long);
++  __ bne(AT, R0, notLong);
++
++  // ltos
++  __ slli_d(T2, T2, Address::times_8);
++  __ add_d(AT, T3, T2);
++  __ ld_d(FSR, AT, base_offset);
++  __ push(ltos);
++  __ b(Done);
++
++  __ bind(notLong);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++// we compute the actual local variable address here
++void TemplateTable::locals_index(Register reg, int offset) {
++  __ ld_bu(reg, at_bcp(offset));
++  __ slli_d(reg, reg, Address::times_8);
++  __ sub_d(reg, LVP, reg);
++}
++
++void TemplateTable::iload() {
++  iload_internal();
++}
++
++void TemplateTable::nofast_iload() {
++  iload_internal(may_not_rewrite);
++}
++
++// this method will do bytecode folding of the two form:
++// iload iload      iload caload
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::iload_internal(RewriteControl rc) {
++  transition(vtos, itos);
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
++    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
++    // last two iloads in a pair.  Comparing against fast_iload means that
++    // the next bytecode is neither an iload or a caload, and therefore
++    // an iload pair.
++    __ li(AT, Bytecodes::_iload);
++    __ beq(AT, T2, done);
++
++    __ li(T3, Bytecodes::_fast_iload2);
++    __ li(AT, Bytecodes::_fast_iload);
++    __ beq(AT, T2, rewrite);
++
++    // if _caload, rewrite to fast_icaload
++    __ li(T3, Bytecodes::_fast_icaload);
++    __ li(AT, Bytecodes::_caload);
++    __ beq(AT, T2, rewrite);
++
++    // rewrite so iload doesn't check again.
++    __ li(T3, Bytecodes::_fast_iload);
++
++    // rewrite
++    // T3 : fast bytecode
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_iload, T3, T2, false);
++    __ bind(done);
++  }
++
++  // Get the local value into tos
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload2() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++  __ push(itos);
++  locals_index(T2, 3);
++  __ ld_w(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::lload() {
++  transition(vtos, ltos);
++  locals_index(T2);
++  __ ld_d(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fload() {
++  transition(vtos, ftos);
++  locals_index(T2);
++  __ fld_s(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::dload() {
++  transition(vtos, dtos);
++  locals_index(T2);
++  __ fld_d(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::aload() {
++  transition(vtos, atos);
++  locals_index(T2);
++  __ ld_d(FSR, T2, 0);
++}
++
++void TemplateTable::locals_index_wide(Register reg) {
++  __ get_unsigned_2_byte_index_at_bcp(reg, 2);
++  __ slli_d(reg, reg, Address::times_8);
++  __ sub_d(reg, LVP, reg);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_iload() {
++  transition(vtos, itos);
++  locals_index_wide(T2);
++  __ ld_d(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_lload() {
++  transition(vtos, ltos);
++  locals_index_wide(T2);
++  __ ld_d(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_fload() {
++  transition(vtos, ftos);
++  locals_index_wide(T2);
++  __ fld_s(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_dload() {
++  transition(vtos, dtos);
++  locals_index_wide(T2);
++  __ fld_d(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_aload() {
++  transition(vtos, atos);
++  locals_index_wide(T2);
++  __ ld_d(FSR, T2, 0);
++}
++
++// we use A2 as the regiser for index, BE CAREFUL!
++// we dont use our tge 29 now, for later optimization
++void TemplateTable::index_check(Register array, Register index) {
++  // Pop ptr into array
++  __ pop_ptr(array);
++  index_check_without_pop(array, index);
++}
++
++void TemplateTable::index_check_without_pop(Register array, Register index) {
++  // destroys A2
++  // check array
++  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
++
++  // sign extend since tos (index) might contain garbage in upper bits
++  __ slli_w(index, index, 0);
++
++  // check index
++  Label ok;
++  __ ld_w(AT, array, arrayOopDesc::length_offset_in_bytes());
++  __ bltu(index, AT, ok);
++
++  //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
++  if (A1 != array) __ move(A1, array);
++  if (A2 != index) __ move(A2, index);
++  __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
++  __ bind(ok);
++}
++
++void TemplateTable::iaload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, 1);
++  __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg);
++}
++
++void TemplateTable::laload() {
++  transition(itos, ltos);
++  index_check(SSR, FSR);
++  __ alsl_d(T4, FSR, SSR, Address::times_8 - 1);
++  __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T4, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg);
++}
++
++void TemplateTable::faload() {
++  transition(itos, ftos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, Address::times_4 - 1);
++  __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg);
++}
++
++void TemplateTable::daload() {
++  transition(itos, dtos);
++  index_check(SSR, FSR);
++  __ alsl_d(T4, FSR, SSR, 2);
++  __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T4, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg);
++}
++
++void TemplateTable::aaload() {
++  transition(itos, atos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, (UseCompressedOops ? Address::times_4 : Address::times_8) - 1);
++  //add for compressedoops
++  do_oop_load(_masm,
++              Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
++              FSR,
++              IS_ARRAY);
++}
++
++void TemplateTable::baload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ add_d(FSR, SSR, FSR);
++  __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg);
++}
++
++void TemplateTable::caload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg);
++}
++
++// iload followed by caload frequent pair
++// used register : T2
++// T2 : index
++void TemplateTable::fast_icaload() {
++  transition(vtos, itos);
++  // load index out of locals
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, 0);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg);
++}
++
++void TemplateTable::saload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1);
++  __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg);
++}
++
++void TemplateTable::iload(int n) {
++  transition(vtos, itos);
++  __ ld_w(FSR, iaddress(n));
++}
++
++void TemplateTable::lload(int n) {
++  transition(vtos, ltos);
++  __ ld_d(FSR, laddress(n));
++}
++
++void TemplateTable::fload(int n) {
++  transition(vtos, ftos);
++  __ fld_s(FSF, faddress(n));
++}
++
++void TemplateTable::dload(int n) {
++  transition(vtos, dtos);
++  __ fld_d(FSF, laddress(n));
++}
++
++void TemplateTable::aload(int n) {
++  transition(vtos, atos);
++  __ ld_d(FSR, aaddress(n));
++}
++
++void TemplateTable::aload_0() {
++  aload_0_internal();
++}
++
++void TemplateTable::nofast_aload_0() {
++  aload_0_internal(may_not_rewrite);
++}
++
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::aload_0_internal(RewriteControl rc) {
++  transition(vtos, atos);
++  // According to bytecode histograms, the pairs:
++  //
++  // _aload_0, _fast_igetfield
++  // _aload_0, _fast_agetfield
++  // _aload_0, _fast_fgetfield
++  //
++  // occur frequently. If RewriteFrequentPairs is set, the (slow)
++  // _aload_0 bytecode checks if the next bytecode is either
++  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
++  // rewrites the current bytecode into a pair bytecode; otherwise it
++  // rewrites the current bytecode into _fast_aload_0 that doesn't do
++  // the pair check anymore.
++  //
++  // Note: If the next bytecode is _getfield, the rewrite must be
++  //       delayed, otherwise we may miss an opportunity for a pair.
++  //
++  // Also rewrite frequent pairs
++  //   aload_0, aload_1
++  //   aload_0, iload_1
++  // These bytecodes with a small amount of code are most profitable
++  // to rewrite
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
++
++    // do actual aload_0
++    aload(0);
++
++    // if _getfield then wait with rewrite
++    __ li(AT, Bytecodes::_getfield);
++    __ beq(AT, T2, done);
++
++    // if _igetfield then reqrite to _fast_iaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_iaccess_0);
++    __ li(AT, Bytecodes::_fast_igetfield);
++    __ beq(AT, T2, rewrite);
++
++    // if _agetfield then reqrite to _fast_aaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_aaccess_0);
++    __ li(AT, Bytecodes::_fast_agetfield);
++    __ beq(AT, T2, rewrite);
++
++    // if _fgetfield then reqrite to _fast_faccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_faccess_0);
++    __ li(AT, Bytecodes::_fast_fgetfield);
++    __ beq(AT, T2, rewrite);
++
++    // else rewrite to _fast_aload0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ li(T3, Bytecodes::_fast_aload_0);
++
++    // rewrite
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
++
++    __ bind(done);
++  } else {
++    aload(0);
++  }
++}
++
++void TemplateTable::istore() {
++  transition(itos, vtos);
++  locals_index(T2);
++  __ st_w(FSR, T2, 0);
++}
++
++void TemplateTable::lstore() {
++  transition(ltos, vtos);
++  locals_index(T2);
++  __ st_d(FSR, T2, -wordSize);
++}
++
++void TemplateTable::fstore() {
++  transition(ftos, vtos);
++  locals_index(T2);
++  __ fst_s(FSF, T2, 0);
++}
++
++void TemplateTable::dstore() {
++  transition(dtos, vtos);
++  locals_index(T2);
++  __ fst_d(FSF, T2, -wordSize);
++}
++
++void TemplateTable::astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index(T2);
++  __ st_d(FSR, T2, 0);
++}
++
++void TemplateTable::wide_istore() {
++  transition(vtos, vtos);
++  __ pop_i(FSR);
++  locals_index_wide(T2);
++  __ st_d(FSR, T2, 0);
++}
++
++void TemplateTable::wide_lstore() {
++  transition(vtos, vtos);
++  __ pop_l(FSR);
++  locals_index_wide(T2);
++  __ st_d(FSR, T2, -wordSize);
++}
++
++void TemplateTable::wide_fstore() {
++  wide_istore();
++}
++
++void TemplateTable::wide_dstore() {
++  wide_lstore();
++}
++
++void TemplateTable::wide_astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index_wide(T2);
++  __ st_d(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::iastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);   // T2: array  SSR: index
++  index_check(T2, SSR);  // prefer index in SSR
++  __ alsl_d(T2, SSR, T2, Address::times_4 - 1);
++  __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg);
++}
++
++// used register T2, T3
++void TemplateTable::lastore() {
++  transition(ltos, vtos);
++  __ pop_i (T2);
++  index_check(T3, T2);
++  __ alsl_d(T3, T2, T3, Address::times_8 - 1);
++  __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg);
++}
++
++// used register T2
++void TemplateTable::fastore() {
++  transition(ftos, vtos);
++  __ pop_i(SSR);
++  index_check(T2, SSR);
++  __ alsl_d(T2, SSR, T2, Address::times_4 - 1);
++  __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg);
++}
++
++// used register T2, T3
++void TemplateTable::dastore() {
++  transition(dtos, vtos);
++  __ pop_i (T2);
++  index_check(T3, T2);
++  __ alsl_d(T3, T2, T3, Address::times_8 - 1);
++  __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg);
++}
++
++// used register : T2, T3, T8
++// T2 : array
++// T3 : subklass
++// T8 : supklass
++void TemplateTable::aastore() {
++  Label is_null, ok_is_subtype, done;
++  transition(vtos, vtos);
++  // stack: ..., array, index, value
++  __ ld_d(FSR, at_tos());     // Value
++  __ ld_w(SSR, at_tos_p1());  // Index
++  __ ld_d(T2, at_tos_p2());  // Array
++
++  // index_check(T2, SSR);
++  index_check_without_pop(T2, SSR);
++  // do array store check - check for NULL value first
++  __ beq(FSR, R0, is_null);
++
++  // Move subklass into T3
++  //add for compressedoops
++  __ load_klass(T3, FSR);
++  // Move superklass into T8
++  //add for compressedoops
++  __ load_klass(T8, T2);
++  __ ld_d(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
++  // Compress array+index*4+12 into a single register. T2
++  __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1);
++  __ addi_d(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++
++  // Generate subtype check.
++  // Superklass in T8.  Subklass in T3.
++  __ gen_subtype_check(T8, T3, ok_is_subtype);
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ArrayStoreException_entry);
++  // Come here on success
++  __ bind(ok_is_subtype);
++  do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY);
++  __ b(done);
++
++  // Have a NULL in FSR, T2=array, SSR=index.  Store NULL at ary[idx]
++  __ bind(is_null);
++  __ profile_null_seen(T4);
++  __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1);
++  do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY);
++
++  __ bind(done);
++  __ addi_d(SP, SP, 3 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::bastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  index_check(T2, SSR);
++
++  // Need to check whether array is boolean or byte
++  // since both types share the bastore bytecode.
++  __ load_klass(T4, T2);
++  __ ld_w(T4, T4, in_bytes(Klass::layout_helper_offset()));
++
++  int diffbit = Klass::layout_helper_boolean_diffbit();
++  __ li(AT, diffbit);
++
++  Label L_skip;
++  __ andr(AT, T4, AT);
++  __ beq(AT, R0, L_skip);
++  __ andi(FSR, FSR, 0x1);
++  __ bind(L_skip);
++
++  __ add_d(SSR, T2, SSR);
++  __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg);
++}
++
++void TemplateTable::castore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  index_check(T2, SSR);
++  __ alsl_d(SSR, SSR, T2, Address::times_2 - 1);
++  __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg);
++}
++
++void TemplateTable::sastore() {
++  castore();
++}
++
++void TemplateTable::istore(int n) {
++  transition(itos, vtos);
++  __ st_w(FSR, iaddress(n));
++}
++
++void TemplateTable::lstore(int n) {
++  transition(ltos, vtos);
++  __ st_d(FSR, laddress(n));
++}
++
++void TemplateTable::fstore(int n) {
++  transition(ftos, vtos);
++  __ fst_s(FSF, faddress(n));
++}
++
++void TemplateTable::dstore(int n) {
++  transition(dtos, vtos);
++  __ fst_d(FSF, laddress(n));
++}
++
++void TemplateTable::astore(int n) {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  __ st_d(FSR, aaddress(n));
++}
++
++void TemplateTable::pop() {
++  transition(vtos, vtos);
++  __ addi_d(SP, SP, Interpreter::stackElementSize);
++}
++
++void TemplateTable::pop2() {
++  transition(vtos, vtos);
++  __ addi_d(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::dup() {
++  transition(vtos, vtos);
++  // stack: ..., a
++  __ load_ptr(0, FSR);
++  __ push_ptr(FSR);
++  // stack: ..., a, a
++}
++
++// blows FSR
++void TemplateTable::dup_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(0, FSR);  // load b
++  __ load_ptr(1, A5);  // load a
++  __ store_ptr(1, FSR); // store b
++  __ store_ptr(0, A5); // store a
++  __ push_ptr(FSR);             // push b
++  // stack: ..., b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, FSR);  // load c
++  __ load_ptr(2, A5);  // load a
++  __ store_ptr(2, FSR); // store c in a
++  __ push_ptr(FSR);             // push c
++  // stack: ..., c, b, c, c
++  __ load_ptr(2, FSR);  // load b
++  __ store_ptr(2, A5); // store a in b
++  // stack: ..., c, a, c, c
++  __ store_ptr(1, FSR); // store b in c
++  // stack: ..., c, a, b, c
++}
++
++// blows FSR
++void TemplateTable::dup2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(1, FSR);  // load a
++  __ push_ptr(FSR);             // push a
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  // stack: ..., a, b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup2_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, T2);  // load c
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  __ push_ptr(T2);             // push c
++  // stack: ..., a, b, c, b, c
++  __ store_ptr(3, T2); // store c in b
++  // stack: ..., a, c, c, b, c
++  __ load_ptr(4, T2);  // load a
++  __ store_ptr(2, T2); // store a in 2nd c
++  // stack: ..., a, c, a, b, c
++  __ store_ptr(4, FSR); // store b in a
++  // stack: ..., b, c, a, b, c
++
++  // stack: ..., b, c, a, b, c
++}
++
++// blows FSR, SSR
++void TemplateTable::dup2_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c, d
++  // stack: ..., a, b, c, d
++  __ load_ptr(0, T2);  // load d
++  __ load_ptr(1, FSR);  // load c
++  __ push_ptr(FSR);             // push c
++  __ push_ptr(T2);             // push d
++  // stack: ..., a, b, c, d, c, d
++  __ load_ptr(4, FSR);  // load b
++  __ store_ptr(2, FSR); // store b in d
++  __ store_ptr(4, T2); // store d in b
++  // stack: ..., a, d, c, b, c, d
++  __ load_ptr(5, T2);  // load a
++  __ load_ptr(3, FSR);  // load c
++  __ store_ptr(3, T2); // store a in c
++  __ store_ptr(5, FSR); // store c in a
++  // stack: ..., c, d, a, b, c, d
++
++  // stack: ..., c, d, a, b, c, d
++}
++
++// blows FSR
++void TemplateTable::swap() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++
++  __ load_ptr(1, A5);  // load a
++  __ load_ptr(0, FSR);  // load b
++  __ store_ptr(0, A5); // store a in b
++  __ store_ptr(1, FSR); // store b in a
++
++  // stack: ..., b, a
++}
++
++void TemplateTable::iop2(Operation op) {
++  transition(itos, itos);
++
++  __ pop_i(SSR);
++  switch (op) {
++    case add  : __ add_w(FSR, SSR, FSR); break;
++    case sub  : __ sub_w(FSR, SSR, FSR); break;
++    case mul  : __ mul_w(FSR, SSR, FSR);    break;
++    case _and : __ andr(FSR, SSR, FSR);   break;
++    case _or  : __ orr(FSR, SSR, FSR);    break;
++    case _xor : __ xorr(FSR, SSR, FSR);   break;
++    case shl  : __ sll_w(FSR, SSR, FSR);   break;
++    case shr  : __ sra_w(FSR, SSR, FSR);   break;
++    case ushr : __ srl_w(FSR, SSR, FSR);   break;
++    default   : ShouldNotReachHere();
++  }
++}
++
++// the result stored in FSR, SSR,
++// used registers : T2, T3
++void TemplateTable::lop2(Operation op) {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++
++  switch (op) {
++    case add : __ add_d(FSR, T2, FSR); break;
++    case sub : __ sub_d(FSR, T2, FSR); break;
++    case _and: __ andr(FSR, T2, FSR);  break;
++    case _or : __ orr(FSR, T2, FSR);   break;
++    case _xor: __ xorr(FSR, T2, FSR);  break;
++    default : ShouldNotReachHere();
++  }
++}
++
++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
++// the result is 0x80000000
++// the godson2 cpu do the same, so we need not handle this specially like x86
++void TemplateTable::idiv() {
++  transition(itos, itos);
++  Label not_zero;
++
++  __ bne(FSR, R0, not_zero);
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ bind(not_zero);
++
++  __ pop_i(SSR);
++  __ div_w(FSR, SSR, FSR);
++}
++
++void TemplateTable::irem() {
++  transition(itos, itos);
++  Label not_zero;
++  __ pop_i(SSR);
++
++  __ bne(FSR, R0, not_zero);
++  //__ brk(7);
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++
++  __ bind(not_zero);
++  __ mod_w(FSR, SSR, FSR);
++}
++
++void TemplateTable::lmul() {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++  __ mul_d(FSR, T2, FSR);
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::ldiv() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++
++  //__ brk(7);    //generate FPE
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++
++  __ bind(normal);
++  __ pop_l(A2);
++  __ div_d(FSR, A2, FSR);
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::lrem() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++
++  __ bind(normal);
++  __ pop_l (A2);
++
++  __ mod_d(FSR, A2, FSR);
++}
++
++// result in FSR
++// used registers : T0
++void TemplateTable::lshl() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ sll_d(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lshr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ sra_d(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lushr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ srl_d(FSR, T0, FSR);
++}
++
++// result in FSF
++void TemplateTable::fop2(Operation op) {
++  transition(ftos, ftos);
++  switch (op) {
++    case add:
++      __ fld_s(fscratch, at_sp());
++      __ fadd_s(FSF, fscratch, FSF);
++      break;
++    case sub:
++      __ fld_s(fscratch, at_sp());
++      __ fsub_s(FSF, fscratch, FSF);
++      break;
++    case mul:
++      __ fld_s(fscratch, at_sp());
++      __ fmul_s(FSF, fscratch, FSF);
++      break;
++    case div:
++      __ fld_s(fscratch, at_sp());
++      __ fdiv_s(FSF, fscratch, FSF);
++      break;
++    case rem:
++      __ fmov_s(FA1, FSF);
++      __ fld_s(FA0, at_sp());
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ addi_d(SP, SP, 1 * wordSize);
++}
++
++// result in SSF||FSF
++// i dont handle the strict flags
++void TemplateTable::dop2(Operation op) {
++  transition(dtos, dtos);
++  switch (op) {
++    case add:
++      __ fld_d(fscratch, at_sp());
++      __ fadd_d(FSF, fscratch, FSF);
++      break;
++    case sub:
++      __ fld_d(fscratch, at_sp());
++      __ fsub_d(FSF, fscratch, FSF);
++      break;
++    case mul:
++      __ fld_d(fscratch, at_sp());
++      __ fmul_d(FSF, fscratch, FSF);
++      break;
++    case div:
++      __ fld_d(fscratch, at_sp());
++      __ fdiv_d(FSF, fscratch, FSF);
++      break;
++    case rem:
++      __ fmov_d(FA1, FSF);
++      __ fld_d(FA0, at_sp());
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ addi_d(SP, SP, 2 * wordSize);
++}
++
++void TemplateTable::ineg() {
++  transition(itos, itos);
++  __ sub_w(FSR, R0, FSR);
++}
++
++void TemplateTable::lneg() {
++  transition(ltos, ltos);
++  __ sub_d(FSR, R0, FSR);
++}
++
++void TemplateTable::fneg() {
++  transition(ftos, ftos);
++  __ fneg_s(FSF, FSF);
++}
++
++void TemplateTable::dneg() {
++  transition(dtos, dtos);
++  __ fneg_d(FSF, FSF);
++}
++
++// used registers : T2
++void TemplateTable::iinc() {
++  transition(vtos, vtos);
++  locals_index(T2);
++  __ ld_w(FSR, T2, 0);
++  __ ld_b(AT, at_bcp(2));           // get constant
++  __ add_d(FSR, FSR, AT);
++  __ st_w(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::wide_iinc() {
++  transition(vtos, vtos);
++  locals_index_wide(T2);
++  __ get_2_byte_integer_at_bcp(FSR, AT, 4);
++  __ hswap(FSR);
++  __ ld_w(AT, T2, 0);
++  __ add_d(FSR, AT, FSR);
++  __ st_w(FSR, T2, 0);
++}
++
++void TemplateTable::convert() {
++  // Checking
++#ifdef ASSERT
++  {
++    TosState tos_in  = ilgl;
++    TosState tos_out = ilgl;
++    switch (bytecode()) {
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_in = itos; break;
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_l2d: tos_in = ltos; break;
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_f2d: tos_in = ftos; break;
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_d2l: // fall through
++      case Bytecodes::_d2f: tos_in = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    switch (bytecode()) {
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_out = itos; break;
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_d2l: tos_out = ltos; break;
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_d2f: tos_out = ftos; break;
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_l2d: // fall through
++      case Bytecodes::_f2d: tos_out = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    transition(tos_in, tos_out);
++  }
++#endif // ASSERT
++  // Conversion
++  switch (bytecode()) {
++    case Bytecodes::_i2l:
++      __ slli_w(FSR, FSR, 0);
++      break;
++    case Bytecodes::_i2f:
++      __ movgr2fr_w(FSF, FSR);
++      __ ffint_s_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2d:
++      __ movgr2fr_w(FSF, FSR);
++      __ ffint_d_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2b:
++      __ ext_w_b(FSR, FSR);
++      break;
++    case Bytecodes::_i2c:
++      __ bstrpick_d(FSR, FSR, 15, 0);  // truncate upper 56 bits
++      break;
++    case Bytecodes::_i2s:
++      __ ext_w_h(FSR, FSR);
++      break;
++    case Bytecodes::_l2i:
++      __ slli_w(FSR, FSR, 0);
++      break;
++    case Bytecodes::_l2f:
++      __ movgr2fr_d(FSF, FSR);
++      __ ffint_s_l(FSF, FSF);
++      break;
++    case Bytecodes::_l2d:
++      __ movgr2fr_d(FSF, FSR);
++      __ ffint_d_l(FSF, FSF);
++      break;
++    case Bytecodes::_f2i:
++      __ ftintrz_w_s(fscratch, FSF);
++      __ movfr2gr_s(FSR, fscratch);
++      break;
++    case Bytecodes::_f2l:
++      __ ftintrz_l_s(fscratch, FSF);
++      __ movfr2gr_d(FSR, fscratch);
++      break;
++    case Bytecodes::_f2d:
++      __ fcvt_d_s(FSF, FSF);
++      break;
++    case Bytecodes::_d2i:
++      __ ftintrz_w_d(fscratch, FSF);
++      __ movfr2gr_s(FSR, fscratch);
++      break;
++    case Bytecodes::_d2l:
++      __ ftintrz_l_d(fscratch, FSF);
++      __ movfr2gr_d(FSR, fscratch);
++      break;
++    case Bytecodes::_d2f:
++      __ fcvt_s_d(FSF, FSF);
++      break;
++    default             :
++      ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::lcmp() {
++  transition(ltos, itos);
++
++  __ pop(T0);
++  __ pop(R0);
++
++  __ slt(AT, T0, FSR);
++  __ slt(FSR, FSR, T0);
++  __ sub_d(FSR, FSR, AT);
++}
++
++void TemplateTable::float_cmp(bool is_float, int unordered_result) {
++  if (is_float) {
++    __ fld_s(fscratch, at_sp());
++    __ addi_d(SP, SP, 1 * wordSize);
++
++    if (unordered_result < 0) {
++      __ fcmp_clt_s(FCC0, FSF, fscratch);
++      __ fcmp_cult_s(FCC1, fscratch, FSF);
++    } else {
++      __ fcmp_cult_s(FCC0, FSF, fscratch);
++      __ fcmp_clt_s(FCC1, fscratch, FSF);
++    }
++  } else {
++    __ fld_d(fscratch, at_sp());
++    __ addi_d(SP, SP, 2 * wordSize);
++
++    if (unordered_result < 0) {
++      __ fcmp_clt_d(FCC0, FSF, fscratch);
++      __ fcmp_cult_d(FCC1, fscratch, FSF);
++    } else {
++      __ fcmp_cult_d(FCC0, FSF, fscratch);
++      __ fcmp_clt_d(FCC1, fscratch, FSF);
++    }
++  }
++
++  __ movcf2gr(FSR, FCC0);
++  __ movcf2gr(AT, FCC1);
++  __ sub_d(FSR, FSR, AT);
++}
++
++// used registers : T3, A7, Rnext
++// FSR : return bci, this is defined by the vm specification
++// T2 : MDO taken count
++// T3 : method
++// A7 : offset
++// Rnext : next bytecode, this is required by dispatch_base
++void TemplateTable::branch(bool is_jsr, bool is_wide) {
++  __ get_method(T3);
++  __ profile_taken_branch(A7, T2);    // only C2 meaningful
++
++  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
++                             InvocationCounter::counter_offset();
++  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
++                              InvocationCounter::counter_offset();
++
++  // Load up T4 with the branch displacement
++  if (!is_wide) {
++    __ ld_b(A7, BCP, 1);
++    __ ld_bu(AT, BCP, 2);
++    __ slli_d(A7, A7, 8);
++    __ orr(A7, A7, AT);
++  } else {
++    __ get_4_byte_integer_at_bcp(A7, 1);
++    __ swap(A7);
++  }
++
++  // Handle all the JSR stuff here, then exit.
++  // It's much shorter and cleaner than intermingling with the non-JSR
++  // normal-branch stuff occuring below.
++  if (is_jsr) {
++    // Pre-load the next target bytecode into Rnext
++    __ ldx_bu(Rnext, BCP, A7);
++
++    // compute return address as bci in FSR
++    __ addi_d(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
++    __ ld_d(AT, T3, in_bytes(Method::const_offset()));
++    __ sub_d(FSR, FSR, AT);
++    // Adjust the bcp in BCP by the displacement in A7
++    __ add_d(BCP, BCP, A7);
++    // jsr returns atos that is not an oop
++    // Push return address
++    __ push_i(FSR);
++    // jsr returns vtos
++    __ dispatch_only_noverify(vtos);
++
++    return;
++  }
++
++  // Normal (non-jsr) branch handling
++
++  // Adjust the bcp in S0 by the displacement in T4
++  __ add_d(BCP, BCP, A7);
++
++  assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
++  Label backedge_counter_overflow;
++  Label profile_method;
++  Label dispatch;
++  if (UseLoopCounter) {
++    // increment backedge counter for backward branches
++    // T3: method
++    // T4: target offset
++    // BCP: target bcp
++    // LVP: locals pointer
++    __ blt(R0, A7, dispatch);  // check if forward or backward branch
++
++    // check if MethodCounters exists
++    Label has_counters;
++    __ ld_d(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ bne(AT, R0, has_counters);
++    __ push2(T3, A7);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
++               T3);
++    __ pop2(T3, A7);
++    __ ld_d(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ beq(AT, R0, dispatch);
++    __ bind(has_counters);
++
++    if (TieredCompilation) {
++      Label no_mdo;
++      int increment = InvocationCounter::count_increment;
++      int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
++      if (ProfileInterpreter) {
++        // Are we profiling?
++        __ ld_d(T0, Address(T3, in_bytes(Method::method_data_offset())));
++        __ beq(T0, R0, no_mdo);
++        // Increment the MDO backedge counter
++        const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) +
++                                           in_bytes(InvocationCounter::counter_offset()));
++        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
++                                   T1, false, Assembler::zero, &backedge_counter_overflow);
++        __ beq(R0, R0, dispatch);
++      }
++      __ bind(no_mdo);
++      // Increment backedge counter in MethodCounters*
++      __ ld_d(T0, Address(T3, Method::method_counters_offset()));
++      __ increment_mask_and_jump(Address(T0, be_offset), increment, mask,
++                                 T1, false, Assembler::zero, &backedge_counter_overflow);
++      if (!UseOnStackReplacement) {
++        __ bind(backedge_counter_overflow);
++      }
++    } else {
++      // increment back edge counter
++      __ ld_d(T1, T3, in_bytes(Method::method_counters_offset()));
++      __ ld_w(T0, T1, in_bytes(be_offset));
++      __ increment(T0, InvocationCounter::count_increment);
++      __ st_w(T0, T1, in_bytes(be_offset));
++
++      // load invocation counter
++      __ ld_w(T1, T1, in_bytes(inv_offset));
++      // buffer bit added, mask no needed
++
++      // dadd backedge counter & invocation counter
++      __ add_d(T1, T1, T0);
++
++      if (ProfileInterpreter) {
++        // Test to see if we should create a method data oop
++        // T1 : backedge counter & invocation counter
++        if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) {
++          __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit);
++          __ bne(AT, R0, dispatch);
++        } else {
++          __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
++          __ ld_w(AT, AT, 0);
++          __ blt(T1, AT, dispatch);
++        }
++
++        // if no method data exists, go to profile method
++        __ test_method_data_pointer(T1, profile_method);
++
++        if (UseOnStackReplacement) {
++          if (Assembler::is_simm(InvocationCounter::InterpreterBackwardBranchLimit, 12)) {
++            __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit);
++            __ bne(AT, R0, dispatch);
++          } else {
++            __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
++            __ ld_w(AT, AT, 0);
++            __ blt(T2, AT, dispatch);
++          }
++
++          // When ProfileInterpreter is on, the backedge_count comes
++          // from the methodDataOop, which value does not get reset on
++          // the call to  frequency_counter_overflow().
++          // To avoid excessive calls to the overflow routine while
++          // the method is being compiled, dadd a second test to make
++          // sure the overflow function is called only once every
++          // overflow_frequency.
++          const int overflow_frequency = 1024;
++          __ andi(AT, T2, overflow_frequency-1);
++          __ beq(AT, R0, backedge_counter_overflow);
++        }
++      } else {
++        if (UseOnStackReplacement) {
++          // check for overflow against AT, which is the sum of the counters
++          __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
++          __ ld_w(AT, AT, 0);
++          __ bge(T1, AT, backedge_counter_overflow);
++        }
++      }
++    }
++    __ bind(dispatch);
++  }
++
++  // Pre-load the next target bytecode into Rnext
++  __ ld_bu(Rnext, BCP, 0);
++
++  // continue with the bytecode @ target
++  // FSR: return bci for jsr's, unused otherwise
++  // Rnext: target bytecode
++  // BCP: target bcp
++  __ dispatch_only(vtos, true);
++
++  if (UseLoopCounter) {
++    if (ProfileInterpreter) {
++      // Out-of-line code to allocate method data oop.
++      __ bind(profile_method);
++      __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      __ b(dispatch);
++    }
++
++    if (UseOnStackReplacement) {
++      // invocation counter overflow
++      __ bind(backedge_counter_overflow);
++      __ sub_d(A7, BCP, A7);  // branch bcp
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), A7);
++
++      // V0: osr nmethod (osr ok) or NULL (osr not possible)
++      // V1: osr adapter frame return address
++      // LVP: locals pointer
++      // BCP: bcp
++      __ beq(V0, R0, dispatch);
++      // nmethod may have been invalidated (VM may block upon call_VM return)
++      __ ld_b(T3, V0, nmethod::state_offset());
++      __ li(AT, nmethod::in_use);
++      __ bne(AT, T3, dispatch);
++
++      // We have the address of an on stack replacement routine in rax.
++      // In preparation of invoking it, first we must migrate the locals
++      // and monitors from off the interpreter frame on the stack.
++      // Ensure to save the osr nmethod over the migration call,
++      // it will be preserved in Rnext.
++      __ move(Rnext, V0);
++      const Register thread = TREG;
++#ifndef OPT_THREAD
++      __ get_thread(thread);
++#endif
++      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
++
++      // V0 is OSR buffer, move it to expected parameter location
++      // refer to osrBufferPointer in c1_LIRAssembler_loongarch.cpp
++      __ move(T0, V0);
++
++      // pop the interpreter frame
++      __ ld_d(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
++      // remove frame anchor
++      __ leave();
++      __ move(LVP, RA);
++      __ move(SP, A7);
++
++      __ li(AT, -(StackAlignmentInBytes));
++      __ andr(SP , SP , AT);
++
++      // push the (possibly adjusted) return address
++      // refer to osr_entry in c1_LIRAssembler_loongarch.cpp
++      __ ld_d(AT, Rnext, nmethod::osr_entry_point_offset());
++      __ jr(AT);
++    }
++  }
++}
++
++void TemplateTable::if_0cmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    case less:
++      __ bge(FSR, R0, not_taken);
++      break;
++    case less_equal:
++      __ blt(R0, FSR, not_taken);
++      break;
++    case greater:
++      __ bge(R0, FSR, not_taken);
++      break;
++    case greater_equal:
++      __ blt(FSR, R0, not_taken);
++      break;
++  }
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_icmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++
++  __ pop_i(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    case less:
++      __ bge(SSR, FSR, not_taken);
++      break;
++    case less_equal:
++      __ blt(FSR, SSR, not_taken);
++      break;
++    case greater:
++      __ bge(FSR, SSR, not_taken);
++      break;
++    case greater_equal:
++      __ blt(SSR, FSR, not_taken);
++      break;
++  }
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_nullcmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++
++void TemplateTable::if_acmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  //  __ ld_w(SSR, SP, 0);
++  __ pop_ptr(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::ret() {
++  transition(vtos, vtos);
++
++  locals_index(T2);
++  __ ld_d(T2, T2, 0);
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld_d(BCP, T1, in_bytes(Method::const_offset()));
++  __ add_d(BCP, BCP, T2);
++  __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos, 0, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::wide_ret() {
++  transition(vtos, vtos);
++
++  locals_index_wide(T2);
++  __ ld_d(T2, T2, 0);                   // get return bci, compute return bcp
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld_d(BCP, T1, in_bytes(Method::const_offset()));
++  __ add_d(BCP, BCP, T2);
++  __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos, 0, true);
++}
++
++// used register T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : low
++// A7 : high
++// Rnext : dest bytecode, required by dispatch_base
++void TemplateTable::tableswitch() {
++  Label default_case, continue_execution;
++  transition(itos, vtos);
++
++  // align BCP
++  __ addi_d(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // load lo & hi
++  __ ld_w(T3, T2, 1 * BytesPerInt);
++  __ swap(T3);
++  __ ld_w(A7, T2, 2 * BytesPerInt);
++  __ swap(A7);
++
++  // check against lo & hi
++  __ blt(FSR, T3, default_case);
++  __ blt(A7, FSR, default_case);
++
++  // lookup dispatch offset, in A7 big endian
++  __ sub_d(FSR, FSR, T3);
++  __ alsl_d(AT, FSR, T2, Address::times_4 - 1);
++  __ ld_w(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(FSR, T4, T3);
++
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ add_d(BCP, BCP, A7);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++
++  // handle default
++  __ bind(default_case);
++  __ profile_switch_default(FSR);
++  __ ld_w(A7, T2, 0);
++  __ b(continue_execution);
++}
++
++void TemplateTable::lookupswitch() {
++  transition(itos, itos);
++  __ stop("lookupswitch bytecode should have been rewritten");
++}
++
++// used registers : T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : pair index
++// A7 : offset
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_linearswitch() {
++  transition(itos, vtos);
++  Label loop_entry, loop, found, continue_execution;
++
++  // swap FSR so we can avoid swapping the table entries
++  __ swap(FSR);
++
++  // align BCP
++  __ addi_d(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // set counter
++  __ ld_w(T3, T2, BytesPerInt);
++  __ swap(T3);
++  __ b(loop_entry);
++
++  // table search
++  __ bind(loop);
++  // get the entry value
++  __ alsl_d(AT, T3, T2, Address::times_8 - 1);
++  __ ld_w(AT, AT, 2 * BytesPerInt);
++
++  // found?
++  __ beq(FSR, AT, found);
++
++  __ bind(loop_entry);
++  Label L1;
++  __ bge(R0, T3, L1);
++  __ addi_d(T3, T3, -1);
++  __ b(loop);
++  __ bind(L1);
++  __ addi_d(T3, T3, -1);
++
++  // default case
++  __ profile_switch_default(FSR);
++  __ ld_w(A7, T2, 0);
++  __ b(continue_execution);
++
++  // entry found -> get offset
++  __ bind(found);
++  __ alsl_d(AT, T3, T2, Address::times_8 - 1);
++  __ ld_w(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(T3, FSR, T2);
++
++  // continue execution
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ add_d(BCP, BCP, A7);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++}
++
++// used registers : T0, T1, T2, T3, A7, Rnext
++// T2 : pairs address(array)
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_binaryswitch() {
++  transition(itos, vtos);
++  // Implementation using the following core algorithm:
++  //
++  // int binary_search(int key, LookupswitchPair* array, int n) {
++  //   // Binary search according to "Methodik des Programmierens" by
++  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
++  //   int i = 0;
++  //   int j = n;
++  //   while (i+1 < j) {
++  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
++  //     // with      Q: for all i: 0 <= i < n: key < a[i]
++  //     // where a stands for the array and assuming that the (inexisting)
++  //     // element a[n] is infinitely big.
++  //     int h = (i + j) >> 1;
++  //     // i < h < j
++  //     if (key < array[h].fast_match()) {
++  //       j = h;
++  //     } else {
++  //       i = h;
++  //     }
++  //   }
++  //   // R: a[i] <= key < a[i+1] or Q
++  //   // (i.e., if key is within array, i is the correct index)
++  //   return i;
++  // }
++
++  // register allocation
++  const Register array = T2;
++  const Register i = T3, j = A7;
++  const Register h = T1;
++  const Register temp = T0;
++  const Register key = FSR;
++
++  // setup array
++  __ addi_d(array, BCP, 3*BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(array, array, AT);
++
++  // initialize i & j
++  __ move(i, R0);
++  __ ld_w(j, array, - 1 * BytesPerInt);
++  // Convert j into native byteordering
++  __ swap(j);
++
++  // and start
++  Label entry;
++  __ b(entry);
++
++  // binary search loop
++  {
++    Label loop;
++    __ bind(loop);
++    // int h = (i + j) >> 1;
++    __ add_d(h, i, j);
++    __ srli_d(h, h, 1);
++    // if (key < array[h].fast_match()) {
++    //   j = h;
++    // } else {
++    //   i = h;
++    // }
++    // Convert array[h].match to native byte-ordering before compare
++    __ alsl_d(AT, h, array, Address::times_8 - 1);
++    __ ld_w(temp, AT, 0 * BytesPerInt);
++    __ swap(temp);
++
++    __ slt(AT, key, temp);
++    __ maskeqz(i, i, AT);
++    __ masknez(temp, h, AT);
++    __ OR(i, i, temp);
++    __ masknez(j, j, AT);
++    __ maskeqz(temp, h, AT);
++    __ OR(j, j, temp);
++
++    // while (i+1 < j)
++    __ bind(entry);
++    __ addi_d(h, i, 1);
++    __ blt(h, j, loop);
++  }
++
++  // end of binary search, result index is i (must check again!)
++  Label default_case;
++  // Convert array[i].match to native byte-ordering before compare
++  __ alsl_d(AT, i, array, Address::times_8 - 1);
++  __ ld_w(temp, AT, 0 * BytesPerInt);
++  __ swap(temp);
++  __ bne(key, temp, default_case);
++
++  // entry found -> j = offset
++  __ alsl_d(AT, i, array, Address::times_8 - 1);
++  __ ld_w(j, AT, 1 * BytesPerInt);
++  __ profile_switch_case(i, key, array);
++  __ swap(j);
++
++  __ add_d(BCP, BCP, j);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++
++  // default case -> j = default offset
++  __ bind(default_case);
++  __ profile_switch_default(i);
++  __ ld_w(j, array, - 2 * BytesPerInt);
++  __ swap(j);
++  __ add_d(BCP, BCP, j);
++  __ ld_bu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++}
++
++void TemplateTable::_return(TosState state) {
++  transition(state, state);
++  assert(_desc->calls_vm(),
++      "inconsistent calls_vm information"); // call in remove_activation
++
++  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
++    assert(state == vtos, "only valid state");
++    __ ld_d(T1, aaddress(0));
++    __ load_klass(LVP, T1);
++    __ ld_w(LVP, LVP, in_bytes(Klass::access_flags_offset()));
++    __ li(AT, JVM_ACC_HAS_FINALIZER);
++    __ andr(AT, AT, LVP);
++    Label skip_register_finalizer;
++    __ beq(AT, R0, skip_register_finalizer);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::register_finalizer), T1);
++    __ bind(skip_register_finalizer);
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) {
++    Label no_safepoint;
++    NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll"));
++    __ ld_b(AT, thread, in_bytes(Thread::polling_page_offset()));
++    __ andi(AT, AT, SafepointMechanism::poll_bit());
++    __ beq(AT, R0, no_safepoint);
++    __ push(state);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::at_safepoint));
++    __ pop(state);
++    __ bind(no_safepoint);
++  }
++
++  // Narrow result if state is itos but result type is smaller.
++  // Need to narrow in the return bytecode rather than in generate_return_entry
++  // since compiled code callers expect the result to already be narrowed.
++  if (state == itos) {
++    __ narrow(FSR);
++  }
++
++  __ remove_activation(state, T4);
++  __ membar(__ StoreStore);
++
++  __ jr(T4);
++}
++
++// we dont shift left 2 bits in get_cache_and_index_at_bcp
++// for we always need shift the index we use it. the ConstantPoolCacheEntry
++// is 16-byte long, index is the index in
++// ConstantPoolCache, so cache + base_offset() + index * 16 is
++// the corresponding ConstantPoolCacheEntry
++// used registers : T2
++// NOTE : the returned index need also shift left 4 to get the address!
++void TemplateTable::resolve_cache_and_index(int byte_no,
++                                            Register Rcache,
++                                            Register index,
++                                            size_t index_size) {
++  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++  const Register temp = A1;
++  assert_different_registers(Rcache, index);
++
++  Label resolved;
++
++  Bytecodes::Code code = bytecode();
++  switch (code) {
++  case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
++  case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
++  default: break;
++  }
++
++  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
++  // is resolved?
++  int i = (int)code;
++  __ addi_d(temp, temp, -i);
++  __ beq(temp, R0, resolved);
++
++  // resolve first time through
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
++
++  __ li(temp, i);
++  __ call_VM(NOREG, entry, temp);
++
++  // Update registers with resolved info
++  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
++  __ bind(resolved);
++}
++//END: LA
++
++// The Rcache and index registers must be set before call
++void TemplateTable::load_field_cp_cache_entry(Register obj,
++                                              Register cache,
++                                              Register index,
++                                              Register off,
++                                              Register flags,
++                                              bool is_static = false) {
++  assert_different_registers(cache, index, flags, off);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++  // Field offset
++  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
++  __ ld_d(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
++  // Flags
++  __ ld_d(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
++
++  // klass overwrite register
++  if (is_static) {
++    __ ld_d(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ ld_d(obj, Address(obj, mirror_offset));
++
++    __ resolve_oop_handle(obj, T4);
++  }
++}
++
++// get the method, itable_index and flags of the current invoke
++void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
++                                               Register method,
++                                               Register itable_index,
++                                               Register flags,
++                                               bool is_invokevirtual,
++                                               bool is_invokevfinal, /*unused*/
++                                               bool is_invokedynamic) {
++  // setup registers
++  const Register cache = T3;
++  const Register index = T1;
++  assert_different_registers(method, flags);
++  assert_different_registers(method, cache, index);
++  assert_different_registers(itable_index, flags);
++  assert_different_registers(itable_index, cache, index);
++  assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
++  // determine constant pool cache field offsets
++  const int method_offset = in_bytes(
++    ConstantPoolCache::base_offset() +
++      ((byte_no == f2_byte)
++       ? ConstantPoolCacheEntry::f2_offset()
++       : ConstantPoolCacheEntry::f1_offset()));
++  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::flags_offset());
++  // access constant pool cache fields
++  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::f2_offset());
++
++  size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
++  resolve_cache_and_index(byte_no, cache, index, index_size);
++
++  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
++  __ ld_d(method, AT, method_offset);
++
++  if (itable_index != NOREG) {
++    __ ld_d(itable_index, AT, index_offset);
++  }
++  __ ld_d(flags, AT, flags_offset);
++}
++
++// The registers cache and index expected to be set before call.
++// Correct values of the cache and index registers are preserved.
++void TemplateTable::jvmti_post_field_access(Register cache, Register index,
++                                            bool is_static, bool has_tos) {
++  // do the JVMTI work here to avoid disturbing the register state below
++  // We use c_rarg registers here because we want to use the register used in
++  // the call to the VM
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the time to call into the VM.
++    Label L1;
++    // kill FSR
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    assert_different_registers(cache, index, AT);
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ ld_w(AT, AT, 0);
++    __ beq(AT, R0, L1);
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp3, 1);
++
++    // cache entry pointer
++    __ addi_d(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset()));
++    __ alsl_d(tmp2, tmp3, tmp2, LogBytesPerWord - 1);
++
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      __ ld_d(tmp1, SP, 0);
++      __ verify_oop(tmp1);
++    }
++    // tmp1: object pointer or NULL
++    // tmp2: cache entry pointer
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                       InterpreterRuntime::post_field_access),
++               tmp1, tmp2);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++void TemplateTable::pop_and_check_object(Register r) {
++  __ pop_ptr(r);
++  __ null_check(r);  // for field access must check obj.
++  __ verify_oop(r);
++}
++
++// used registers : T1, T2, T3, T1
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T1 : field address
++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
++// following mapping to the TosState states:
++// btos: 0
++// ctos: 1
++// stos: 2
++// itos: 3
++// ltos: 4
++// ftos: 5
++// dtos: 6
++// atos: 7
++// vtos: 8
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_access(cache, index, is_static, false);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  {
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(MacroAssembler::AnyAny);
++    __ bind(notVolatile);
++  }
++
++  if (!is_static) pop_and_check_object(obj);
++  __ add_d(index, obj, off);
++
++  const Address field(index, 0);
++
++  Label Done, notByte, notBool, notInt, notShort, notChar,
++              notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++
++  // btos
++  __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(btos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++
++
++  __ bind(notByte);
++  __ li(AT, ztos);
++  __ bne(flags, AT, notBool);
++
++  // ztos
++  __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ztos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++
++
++  __ bind(notBool);
++  __ li(AT, itos);
++  __ bne(flags, AT, notInt);
++
++  // itos
++  __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(itos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notInt);
++  __ li(AT, atos);
++  __ bne(flags, AT, notObj);
++
++  // atos
++  //add for compressedoops
++  do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP);
++  __ push(atos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notObj);
++  __ li(AT, ctos);
++  __ bne(flags, AT, notChar);
++
++  // ctos
++  __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ctos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notChar);
++  __ li(AT, stos);
++  __ bne(flags, AT, notShort);
++
++  // stos
++  __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(stos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notShort);
++  __ li(AT, ltos);
++  __ bne(flags, AT, notLong);
++
++  // ltos
++  __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg);
++  __ push(ltos);
++
++  // Don't rewrite to _fast_lgetfield for potential volatile case.
++  __ b(Done);
++
++  __ bind(notLong);
++  __ li(AT, ftos);
++  __ bne(flags, AT, notFloat);
++
++  // ftos
++  __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++  __ push(ftos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
++  }
++  __ b(Done);
++
++  __ bind(notFloat);
++  __ li(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++#endif
++
++  // dtos
++  __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
++  __ push(dtos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++    __ bind(notVolatile);
++  }
++}
++
++void TemplateTable::getfield(int byte_no) {
++  getfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_getfield(int byte_no) {
++  getfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::getstatic(int byte_no) {
++  getfield_or_static(byte_no, true);
++}
++
++// The registers cache and index expected to be set before call.
++// The function may destroy various registers, just not the cache and index registers.
++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
++  transition(vtos, vtos);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L1;
++    //kill AT, T1, T2, T3, T4
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T4;
++    assert_different_registers(cache, index, tmp4);
++
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ ld_w(AT, AT, 0);
++    __ beq(AT, R0, L1);
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp4, 1);
++
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      // Life is harder. The stack holds the value on top, followed by
++      // the object.  We don't know the size of the value, though; it
++      // could be one or two words depending on its type. As a result,
++      // we must find the type to determine where the object is.
++      Label two_word, valsize_known;
++      __ alsl_d(AT, tmp4, tmp2, Address::times_8 - 1);
++      __ ld_d(tmp3, AT, in_bytes(cp_base_offset +
++                                 ConstantPoolCacheEntry::flags_offset()));
++      __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift);
++
++      ConstantPoolCacheEntry::verify_tos_state_shift();
++      __ move(tmp1, SP);
++      __ li(AT, ltos);
++      __ beq(tmp3, AT, two_word);
++      __ li(AT, dtos);
++      __ beq(tmp3, AT, two_word);
++      __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) );
++      __ b(valsize_known);
++
++      __ bind(two_word);
++      __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2));
++
++      __ bind(valsize_known);
++      // setup object pointer
++      __ ld_d(tmp1, tmp1, 0 * wordSize);
++    }
++    // cache entry pointer
++    __ addi_d(tmp2, tmp2, in_bytes(cp_base_offset));
++    __ alsl_d(tmp2, tmp4, tmp2, LogBytesPerWord - 1);
++    // object (tos)
++    __ move(tmp3, SP);
++    // tmp1: object pointer set up above (NULL if static)
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++// used registers : T0, T1, T2, T3, T8
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T8 : volatile bit
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++  const Register bc    = T3;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_mod(cache, index, is_static);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  Label Done;
++  {
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore));
++    __ bind(notVolatile);
++  }
++
++
++  Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++
++  // btos
++  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++
++  __ pop(btos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_BYTE, IN_HEAP, Address(T4), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ztos
++  __ bind(notByte);
++  __ li(AT, ztos);
++  __ bne(flags, AT, notBool);
++
++  __ pop(ztos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ andi(FSR, FSR, 0x1);
++  __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T4), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // itos
++  __ bind(notBool);
++  __ li(AT, itos);
++  __ bne(flags, AT, notInt);
++
++  __ pop(itos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_INT, IN_HEAP, Address(T4), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // atos
++  __ bind(notInt);
++  __ li(AT, atos);
++  __ bne(flags, AT, notObj);
++
++  __ pop(atos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++
++  do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ctos
++  __ bind(notObj);
++  __ li(AT, ctos);
++  __ bne(flags, AT, notChar);
++
++  __ pop(ctos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_CHAR, IN_HEAP, Address(T4), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // stos
++  __ bind(notChar);
++  __ li(AT, stos);
++  __ bne(flags, AT, notShort);
++
++  __ pop(stos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_SHORT, IN_HEAP, Address(T4), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ltos
++  __ bind(notShort);
++  __ li(AT, ltos);
++  __ bne(flags, AT, notLong);
++
++  __ pop(ltos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_LONG, IN_HEAP, Address(T4), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++  // ftos
++  __ bind(notLong);
++  __ li(AT, ftos);
++  __ bne(flags, AT, notFloat);
++
++  __ pop(ftos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_FLOAT, IN_HEAP, Address(T4), noreg, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++
++
++  // dtos
++  __ bind(notFloat);
++  __ li(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++#endif
++
++  __ pop(dtos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ add_d(T4, obj, off);
++  __ access_store_at(T_DOUBLE, IN_HEAP, Address(T4), noreg, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore));
++    __ bind(notVolatile);
++  }
++}
++
++void TemplateTable::putfield(int byte_no) {
++  putfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_putfield(int byte_no) {
++  putfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::putstatic(int byte_no) {
++  putfield_or_static(byte_no, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : cp_entry
++// T2 : obj
++// T3 : value pointer
++void TemplateTable::jvmti_post_fast_field_mod() {
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L2;
++    //kill AT, T1, T2, T3, T4
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T4;
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ ld_w(tmp3, AT, 0);
++    __ beq(tmp3, R0, L2);
++    __ pop_ptr(tmp1);
++    __ verify_oop(tmp1);
++    __ push_ptr(tmp1);
++    switch (bytecode()) {          // load values into the jvalue object
++    case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ push_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ push_d(FSF); break;
++    case Bytecodes::_fast_fputfield: __ push_f(); break;
++    case Bytecodes::_fast_lputfield: __ push_l(FSR); break;
++      default:  ShouldNotReachHere();
++    }
++    __ move(tmp3, SP);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1);
++    __ verify_oop(tmp1);
++    // tmp1: object pointer copied above
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++
++    switch (bytecode()) {             // restore tos values
++    case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ pop_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ pop_d(); break;
++    case Bytecodes::_fast_fputfield: __ pop_f(); break;
++    case Bytecodes::_fast_lputfield: __ pop_l(FSR); break;
++    }
++    __ bind(L2);
++  }
++}
++
++// used registers : T2, T3, T1
++// T2 : index & off & field address
++// T3 : cache & obj
++// T1 : flags
++void TemplateTable::fast_storefield(TosState state) {
++  transition(state, vtos);
++
++  const Register scratch = T8;
++
++  ByteSize base = ConstantPoolCache::base_offset();
++
++  jvmti_post_fast_field_mod();
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ membar(__ LoadLoad);
++
++  // test for volatile with T1
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ ld_d(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
++
++  // replace index with field offset from cache entry
++  __ ld_d(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
++
++  Label Done;
++  {
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, T1);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore));
++    __ bind(notVolatile);
++  }
++
++  // Get object from stack
++  pop_and_check_object(T3);
++
++  if (bytecode() != Bytecodes::_fast_aputfield) {
++    // field address
++    __ add_d(T2, T3, T2);
++  }
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_zputfield:
++      __ andi(FSR, FSR, 0x1);  // boolean is true if LSB is 1
++      __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_bputfield:
++      __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_sputfield:
++      __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_cputfield:
++      __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_iputfield:
++      __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_lputfield:
++      __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_fputfield:
++      __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg);
++      break;
++    case Bytecodes::_fast_dputfield:
++      __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg);
++      break;
++    case Bytecodes::_fast_aputfield:
++      do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore));
++    __ bind(notVolatile);
++  }
++}
++
++// used registers : T2, T3, T1
++// T3 : cp_entry & cache
++// T2 : index & offset
++void TemplateTable::fast_accessfield(TosState state) {
++  transition(atos, state);
++
++  const Register scratch = T8;
++
++  // do the JVMTI work here to avoid disturbing the register state below
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we take
++    // the time to call into the VM.
++    Label L1;
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ ld_w(T3, AT, 0);
++    __ beq(T3, R0, L1);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
++    __ move(TSR, FSR);
++    __ verify_oop(FSR);
++    // FSR: object pointer copied above
++    // T3: cache entry pointer
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
++               FSR, T3);
++    __ move(FSR, TSR);
++    __ bind(L1);
++  }
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ membar(__ LoadLoad);
++
++  // replace index with field offset from cache entry
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(MacroAssembler::AnyAny);
++    __ bind(notVolatile);
++  }
++
++  // FSR: object
++  __ verify_oop(FSR);
++  __ null_check(FSR);
++  // field addresses
++  __ add_d(FSR, FSR, T2);
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_bgetfield:
++      __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_sgetfield:
++      __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_cgetfield:
++      __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_igetfield:
++      __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_lgetfield:
++      __ stop("should not be rewritten");
++      break;
++    case Bytecodes::_fast_fgetfield:
++      __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_dgetfield:
++      __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_agetfield:
++      do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP);
++      __ verify_oop(FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++    __ bind(notVolatile);
++  }
++}
++
++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
++// used registers : T1, T2, T3, T1
++// T1 : obj & field address
++// T2 : off
++// T3 : cache
++// T1 : index
++void TemplateTable::fast_xaccess(TosState state) {
++  transition(vtos, state);
++
++  const Register scratch = T8;
++
++  // get receiver
++  __ ld_d(T1, aaddress(0));
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 2);
++  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
++  __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(MacroAssembler::AnyAny);
++    __ bind(notVolatile);
++  }
++
++  // make sure exception is reported in correct bcp range (getfield is
++  // next instruction)
++  __ addi_d(BCP, BCP, 1);
++  __ null_check(T1);
++  __ add_d(T1, T1, T2);
++
++  if (state == itos) {
++    __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg);
++  } else if (state == atos) {
++    do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP);
++    __ verify_oop(FSR);
++  } else if (state == ftos) {
++    __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg);
++  } else {
++    ShouldNotReachHere();
++  }
++  __ addi_d(BCP, BCP, -1);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
++    __ bind(notVolatile);
++  }
++}
++
++
++//-----------------------------------------------------------------------------
++// Calls
++
++void TemplateTable::count_calls(Register method, Register temp) {
++  // implemented elsewhere
++  ShouldNotReachHere();
++}
++
++// method, index, recv, flags: T1, T2, T3, T1
++// byte_no = 2 for _invokevirtual, 1 else
++// T0 : return address
++// get the method & index of the invoke, and push the return address of
++// the invoke(first word in the frame)
++// this address is where the return code jmp to.
++// NOTE : this method will set T3&T1 as recv&flags
++void TemplateTable::prepare_invoke(int byte_no,
++                                   Register method,  // linked method (or i-klass)
++                                   Register index,   // itable index, MethodType, etc.
++                                   Register recv,    // if caller wants to see it
++                                   Register flags    // if caller wants to test it
++                                   ) {
++  // determine flags
++  const Bytecodes::Code code = bytecode();
++  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
++  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
++  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
++  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
++  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
++  const bool load_receiver       = (recv  != noreg);
++  const bool save_flags          = (flags != noreg);
++  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
++  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
++  assert(flags == noreg || flags == T1, "error flags reg.");
++  assert(recv  == noreg || recv  == T3, "error recv reg.");
++
++  // setup registers & access constant pool cache
++  if(recv == noreg) recv  = T3;
++  if(flags == noreg) flags  = T1;
++  assert_different_registers(method, index, recv, flags);
++
++  // save 'interpreter return address'
++  __ save_bcp();
++
++  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
++
++  if (is_invokedynamic || is_invokehandle) {
++   Label L_no_push;
++     __ li(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
++     __ andr(AT, AT, flags);
++     __ beq(AT, R0, L_no_push);
++     // Push the appendix as a trailing parameter.
++     // This must be done before we get the receiver,
++     // since the parameter_size includes it.
++     Register tmp = SSR;
++     __ push(tmp);
++     __ move(tmp, index);
++     assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
++     __ load_resolved_reference_at_index(index, tmp, recv);
++     __ pop(tmp);
++     __ push(index);  // push appendix (MethodType, CallSite, etc.)
++     __ bind(L_no_push);
++  }
++
++  // load receiver if needed (after appendix is pushed so parameter size is correct)
++  // Note: no return address pushed yet
++  if (load_receiver) {
++    __ li(AT, ConstantPoolCacheEntry::parameter_size_mask);
++    __ andr(recv, flags, AT);
++    // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
++    const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
++    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
++    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
++    __ ld_d(recv, recv_addr);
++    __ verify_oop(recv);
++  }
++  if(save_flags) {
++    __ move(BCP, flags);
++  }
++
++  // compute return type
++  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, 0xf);
++
++  // Make sure we don't need to mask flags for tos_state_shift after the above shift
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++  // load return address
++  {
++    const address table = (address) Interpreter::invoke_return_entry_table_for(code);
++    __ li(AT, (long)table);
++    __ alsl_d(AT, flags, AT, LogBytesPerWord - 1);
++    __ ld_d(RA, AT, 0);
++  }
++
++  if (save_flags) {
++    __ move(flags, BCP);
++    __ restore_bcp();
++  }
++}
++
++// used registers : T0, T3, T1, T2
++// T3 : recv, this two register using convention is by prepare_invoke
++// T1 : flags, klass
++// Rmethod : method, index must be Rmethod
++void TemplateTable::invokevirtual_helper(Register index,
++                                         Register recv,
++                                         Register flags) {
++
++  assert_different_registers(index, recv, flags, T2);
++
++  // Test for an invoke of a final method
++  Label notFinal;
++  __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, flags, AT);
++  __ beq(AT, R0, notFinal);
++
++  Register method = index;  // method must be Rmethod
++  assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention");
++
++  // do the call - the index is actually the method to call
++  // the index is indeed methodOop, for this is vfinal,
++  // see ConstantPoolCacheEntry::set_method for more info
++
++  // It's final, need a null check here!
++  __ null_check(recv);
++
++  // profile this call
++  __ profile_final_call(T2);
++
++  // T2: tmp, used for mdp
++  // method: callee
++  // T4: tmp
++  // is_virtual: true
++  __ profile_arguments_type(T2, method, T4, true);
++
++  __ jump_from_interpreted(method, T2);
++
++  __ bind(notFinal);
++
++  // get receiver klass
++  __ null_check(recv, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T2, recv);
++
++  // profile this call
++  __ profile_virtual_call(T2, T0, T1);
++
++  // get target methodOop & entry point
++  __ lookup_virtual_method(T2, index, method);
++  __ profile_arguments_type(T2, method, T4, true);
++  __ jump_from_interpreted(method, T2);
++}
++
++void TemplateTable::invokevirtual(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
++  // now recv & flags in T3, T1
++  invokevirtual_helper(Rmethod, T3, T1);
++}
++
++// T4 : entry
++// Rmethod : method
++void TemplateTable::invokespecial(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3);
++  // now recv & flags in T3, T1
++  __ verify_oop(T3);
++  __ null_check(T3);
++  __ profile_call(T4);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T4: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T4, false);
++
++  __ jump_from_interpreted(Rmethod, T4);
++  __ move(T0, T3);
++}
++
++void TemplateTable::invokestatic(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG);
++
++  __ profile_call(T4);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T4: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T4, false);
++
++  __ jump_from_interpreted(Rmethod, T4);
++}
++
++// i have no idea what to do here, now. for future change. FIXME.
++void TemplateTable::fast_invokevfinal(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  __ stop("fast_invokevfinal not used on LoongArch64");
++}
++
++// used registers : T0, T1, T2, T3, T1, A7
++// T0 : itable, vtable, entry
++// T1 : interface
++// T3 : receiver
++// T1 : flags, klass
++// Rmethod : index, method, this is required by interpreter_entry
++void TemplateTable::invokeinterface(int byte_no) {
++  transition(vtos, vtos);
++  //this method will use T1-T4 and T0
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, T2, Rmethod, T3, T1);
++  // T2: reference klass (from f1) if interface method
++  // Rmethod: method (from f2)
++  // T3: receiver
++  // T1: flags
++
++  // First check for Object case, then private interface method,
++  // then regular interface method.
++
++  // Special case of invokeinterface called for virtual method of
++  // java.lang.Object.  See cpCache.cpp for details.
++  Label notObjectMethod;
++  __ li(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notObjectMethod);
++
++  invokevirtual_helper(Rmethod, T3, T1);
++  // no return from above
++  __ bind(notObjectMethod);
++
++  Label no_such_interface; // for receiver subtype check
++  Register recvKlass; // used for exception processing
++
++  // Check for private method invocation - indicated by vfinal
++  Label notVFinal;
++  __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notVFinal);
++
++  // Get receiver klass into FSR - also a null check
++  __ null_check(T3, oopDesc::klass_offset_in_bytes());
++  __ load_klass(FSR, T3);
++
++  Label subtype;
++  __ check_klass_subtype(FSR, T2, T0, subtype);
++  // If we get here the typecheck failed
++  recvKlass = T1;
++  __ move(recvKlass, FSR);
++  __ b(no_such_interface);
++
++  __ bind(subtype);
++
++  // do the call - rbx is actually the method to call
++
++  __ profile_final_call(T1);
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  __ jump_from_interpreted(Rmethod, T1);
++  // no return from above
++  __ bind(notVFinal);
++
++  // Get receiver klass into T1 - also a null check
++  __ restore_locals();
++  __ null_check(T3, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T1, T3);
++
++  Label no_such_method;
++
++  // Preserve method for throw_AbstractMethodErrorVerbose.
++  __ move(T3, Rmethod);
++  // Receiver subtype check against REFC.
++  // Superklass in T2. Subklass in T1.
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             T1, T2, noreg,
++                             // outputs: scan temp. reg, scan temp. reg
++                             T0, FSR,
++                             no_such_interface,
++                             /*return_method=*/false);
++
++
++  // profile this call
++  __ restore_bcp();
++  __ profile_virtual_call(T1, T0, FSR);
++
++  // Get declaring interface class from method, and itable index
++  __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes());
++  __ ld_w(Rmethod, Rmethod, in_bytes(Method::itable_index_offset()));
++  __ addi_d(Rmethod, Rmethod, (-1) * Method::itable_index_max);
++  __ sub_w(Rmethod, R0, Rmethod);
++
++  // Preserve recvKlass for throw_AbstractMethodErrorVerbose.
++  __ move(FSR, T1);
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             FSR, T2, Rmethod,
++                             // outputs: method, scan temp. reg
++                             Rmethod, T0,
++                             no_such_interface);
++
++  // Rmethod: Method* to call
++  // T3: receiver
++  // Check for abstract method error
++  // Note: This should be done more efficiently via a throw_abstract_method_error
++  //       interpreter entry point and a conditional jump to it in case of a null
++  //       method.
++  __ beq(Rmethod, R0, no_such_method);
++
++  __ profile_called_method(Rmethod, T0, T1);
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  // do the call
++  // T3: receiver
++  // Rmethod: Method*
++  __ jump_from_interpreted(Rmethod, T1);
++  __ should_not_reach_here();
++
++  // exception handling code follows...
++  // note: must restore interpreter registers to canonical
++  //       state for exception handling to work correctly!
++
++  __ bind(no_such_method);
++  // throw exception
++  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
++  __ restore_bcp();
++  __ restore_locals();
++  // Pass arguments for generating a verbose error message.
++  recvKlass = A1;
++  Register method = A2;
++  if (recvKlass != T1) { __ move(recvKlass, T1); }
++  if (method != T3)    { __ move(method, T3);    }
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  __ bind(no_such_interface);
++  // throw exception
++  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
++  __ restore_bcp();
++  __ restore_locals();
++  // Pass arguments for generating a verbose error message.
++  if (recvKlass != T1) { __ move(recvKlass, T1); }
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++}
++
++
++void TemplateTable::invokehandle(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  const Register T2_method  = Rmethod;
++  const Register FSR_mtype  = FSR;
++  const Register T3_recv    = T3;
++
++   prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
++   //??__ verify_method_ptr(T2_method);
++   __ verify_oop(T3_recv);
++   __ null_check(T3_recv);
++
++   // T4: MethodType object (from cpool->resolved_references[f1], if necessary)
++   // T2_method: MH.invokeExact_MT method (from f2)
++
++   // Note:  T4 is already pushed (if necessary) by prepare_invoke
++
++   // FIXME: profile the LambdaForm also
++   __ profile_final_call(T4);
++
++   // T8: tmp, used for mdp
++   // T2_method: callee
++   // T4: tmp
++   // is_virtual: true
++   __ profile_arguments_type(T8, T2_method, T4, true);
++
++  __ jump_from_interpreted(T2_method, T4);
++}
++
++ void TemplateTable::invokedynamic(int byte_no) {
++   transition(vtos, vtos);
++   assert(byte_no == f1_byte, "use this argument");
++
++   const Register T2_callsite = T2;
++
++   prepare_invoke(byte_no, Rmethod, T2_callsite);
++
++   // T2: CallSite object (from cpool->resolved_references[f1])
++   // Rmethod: MH.linkToCallSite method (from f2)
++
++   // Note:  T2_callsite is already pushed by prepare_invoke
++   // %%% should make a type profile for any invokedynamic that takes a ref argument
++   // profile this call
++   __ profile_call(T4);
++
++   // T8: tmp, used for mdp
++   // Rmethod: callee
++   // T4: tmp
++   // is_virtual: false
++   __ profile_arguments_type(T8, Rmethod, T4, false);
++
++   __ verify_oop(T2_callsite);
++
++   __ jump_from_interpreted(Rmethod, T4);
++ }
++
++//-----------------------------------------------------------------------------
++// Allocation
++// T1 : tags & buffer end & thread
++// T2 : object end
++// T3 : klass
++// T1 : object size
++// A1 : cpool
++// A2 : cp index
++// return object in FSR
++void TemplateTable::_new() {
++  transition(vtos, atos);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++
++  Label slow_case;
++  Label done;
++  Label initialize_header;
++  Label initialize_object; // including clearing the fields
++  Label allocate_shared;
++
++  __ get_cpool_and_tags(A1, T1);
++
++  // make sure the class we're about to instantiate has been resolved.
++  // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++  __ add_d(T1, T1, A2);
++  __ ld_b(AT, T1, tags_offset);
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  }
++  __ addi_d(AT, AT, -(int)JVM_CONSTANT_Class);
++  __ bne(AT, R0, slow_case);
++
++  // get InstanceKlass
++  __ load_resolved_klass_at_index(A1, A2, T3);
++
++  // make sure klass is initialized & doesn't have finalizer
++  // make sure klass is fully initialized
++  __ ld_hu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
++  __ addi_d(AT, T1, - (int)InstanceKlass::fully_initialized);
++  __ bne(AT, R0, slow_case);
++
++  // has_finalizer
++  __ ld_w(T0, T3, in_bytes(Klass::layout_helper_offset()) );
++  __ andi(AT, T0, Klass::_lh_instance_slow_path_bit);
++  __ bne(AT, R0, slow_case);
++
++  // Allocate the instance
++  // 1) Try to allocate in the TLAB
++  // 2) if fail and the object is large allocate in the shared Eden
++  // 3) if the above fails (or is not applicable), go to a slow case
++  // (creates a new TLAB, etc.)
++
++  const bool allow_shared_alloc =
++    Universe::heap()->supports_inline_contig_alloc();
++
++#ifndef OPT_THREAD
++    const Register thread = T8;
++    if (UseTLAB || allow_shared_alloc) {
++      __ get_thread(thread);
++    }
++#else
++    const Register thread = TREG;
++#endif
++
++  if (UseTLAB) {
++    // get tlab_top
++    __ ld_d(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
++    // get tlab_end
++    __ ld_d(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
++    __ add_d(T2, FSR, T0);
++    __ blt(AT, T2, allow_shared_alloc ? allocate_shared : slow_case);
++    __ st_d(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
++
++    if (ZeroTLAB) {
++      // the fields have been already cleared
++      __ beq(R0, R0, initialize_header);
++    } else {
++      // initialize both the header and fields
++      __ beq(R0, R0, initialize_object);
++    }
++  }
++
++  // Allocation in the shared Eden , if allowed
++  // T0 : instance size in words
++  if(allow_shared_alloc){
++    __ bind(allocate_shared);
++
++    Label done, retry;
++    Address heap_top(T1);
++    __ li(T1, (long)Universe::heap()->top_addr());
++    __ ld_d(FSR, heap_top);
++
++    __ bind(retry);
++    __ li(AT, (long)Universe::heap()->end_addr());
++    __ ld_d(AT, AT, 0);
++    __ add_d(T2, FSR, T0);
++    __ blt(AT, T2, slow_case);
++
++    // Compare FSR with the top addr, and if still equal, store the new
++    // top addr in T2 at the address of the top addr pointer. Sets AT if was
++    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
++    //
++    // FSR: object begin
++    // T2: object end
++    // T0: instance size in words
++
++    // if someone beat us on the allocation, try again, otherwise continue
++    __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry);
++
++    __ bind(done);
++    __ incr_allocated_bytes(thread, T0, 0);
++  }
++
++  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
++    // The object is initialized before the header.  If the object size is
++    // zero, go directly to the header initialization.
++    __ bind(initialize_object);
++    __ li(AT, - sizeof(oopDesc));
++    __ add_d(T0, T0, AT);
++    __ beq(T0, R0, initialize_header);
++
++    // initialize remaining object fields: T0 is a multiple of 2
++    {
++       Label loop;
++       __ add_d(T1, FSR, T0);
++
++       __ bind(loop);
++       __ addi_d(T1, T1, -oopSize);
++       __ st_d(R0, T1, sizeof(oopDesc));
++       __ bne(T1, FSR, loop); // dont clear header
++    }
++
++    // klass in T3,
++    // initialize object header only.
++    __ bind(initialize_header);
++    if (UseBiasedLocking) {
++      __ ld_d(AT, T3, in_bytes(Klass::prototype_header_offset()));
++      __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes ());
++    } else {
++      __ li(AT, (long)markOopDesc::prototype());
++      __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes());
++    }
++
++    __ store_klass_gap(FSR, R0);
++    __ store_klass(FSR, T3);
++
++    {
++      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
++      // Trigger dtrace event for fastpath
++      __ push(atos);
++      __ call_VM_leaf(
++           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
++      __ pop(atos);
++
++    }
++    __ b(done);
++  }
++
++  // slow case
++  __ bind(slow_case);
++  __ get_constant_pool(A1);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
++
++  // continue
++  __ bind(done);
++  __ membar(__ StoreStore);
++}
++
++void TemplateTable::newarray() {
++  transition(itos, atos);
++  __ ld_bu(A1, at_bcp(1));
++  // type, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
++  __ membar(__ StoreStore);
++}
++
++void TemplateTable::anewarray() {
++  transition(itos, atos);
++  __ get_2_byte_integer_at_bcp(A2, AT, 1);
++  __ huswap(A2);
++  __ get_constant_pool(A1);
++  // cp, index, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
++  __ membar(__ StoreStore);
++}
++
++void TemplateTable::arraylength() {
++  transition(atos, itos);
++  __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
++  __ ld_w(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
++}
++
++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
++// T2 : sub klass
++// T3 : cpool
++// T3 : super klass
++void TemplateTable::checkcast() {
++  transition(atos, atos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++  __ beq(FSR, R0, is_null);
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ huswap(T2);
++
++  // See if bytecode has already been quicked
++  __ add_d(AT, T1, T2);
++  __ ld_b(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  }
++  __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++
++  // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
++  // Then, GC will move the object in V0 to another places in heap.
++  // Therefore, We should never save such an object in register.
++  // Instead, we should save it in the stack. It can be modified automatically by the GC thread.
++  // After GC, the object address in FSR is changed to a new place.
++  //
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++
++  // klass already in cp, get superklass in T3
++  __ bind(quicked);
++  __ load_resolved_klass_at_index(T3, T2, T3);
++
++  __ bind(resolved);
++
++  // get subklass in T2
++  __ load_klass(T2, FSR);
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ClassCastException_entry);
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++
++  // Collect counts on whether this check-cast sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ b(done);
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);
++  }
++  __ bind(done);
++}
++
++// T3 as cpool, T1 as tags, T2 as index
++// object always in FSR, superklass in T3, subklass in T2
++void TemplateTable::instanceof() {
++  transition(atos, itos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++
++  __ beq(FSR, R0, is_null);
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  // get index
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ huswap(T2);
++
++  // See if bytecode has already been quicked
++  // quicked
++  __ add_d(AT, T1, T2);
++  __ ld_b(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
++  }
++  __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++
++  // get superklass in T3, subklass in T2
++  __ bind(quicked);
++  __ load_resolved_klass_at_index(T3, T2, T3);
++
++  __ bind(resolved);
++  // get subklass in T2
++  __ load_klass(T2, FSR);
++
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++  __ move(FSR, R0);
++  // Come here on failure
++  __ b(done);
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++  __ li(FSR, 1);
++
++  // Collect counts on whether this test sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ beq(R0, R0, done);
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);   // same as 'done'
++  }
++  __ bind(done);
++  // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
++  // FSR = 1: obj != NULL and obj is     an instanceof the specified klass
++}
++
++//--------------------------------------------------------
++//--------------------------------------------
++// Breakpoints
++void TemplateTable::_breakpoint() {
++  // Note: We get here even if we are single stepping..
++  // jbug inists on setting breakpoints at every bytecode
++  // even if we are in single step mode.
++
++  transition(vtos, vtos);
++
++  // get the unpatched byte code
++  __ get_method(A1);
++  __ call_VM(NOREG,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::get_original_bytecode_at),
++             A1, BCP);
++  __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal
++
++  // post the breakpoint event
++  __ get_method(A1);
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
++
++  // complete the execution of original bytecode
++  __ dispatch_only_normal(vtos);
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateTable::athrow() {
++  transition(atos, vtos);
++  __ null_check(FSR);
++  __ jmp(Interpreter::throw_exception_entry());
++}
++
++//-----------------------------------------------------------------------------
++// Synchronization
++//
++// Note: monitorenter & exit are symmetric routines; which is reflected
++//       in the assembly code structure as well
++//
++// Stack layout:
++//
++// [expressions  ] <--- SP               = expression stack top
++// ..
++// [expressions  ]
++// [monitor entry] <--- monitor block top = expression stack bot
++// ..
++// [monitor entry]
++// [frame data   ] <--- monitor block bot
++// ...
++// [return addr  ] <--- FP
++
++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
++// object always in FSR
++void TemplateTable::monitorenter() {
++  transition(atos, vtos);
++
++  // check for NULL object
++  __ null_check(FSR);
++
++  const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
++      * wordSize);
++  const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
++  Label allocated;
++
++  // initialize entry pointer
++  __ move(c_rarg0, R0);
++
++  // find a free slot in the monitor block (result in c_rarg0)
++  {
++    Label entry, loop, exit, next;
++    __ ld_d(T2, monitor_block_top);
++    __ addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    __ b(entry);
++
++    // free slot?
++    __ bind(loop);
++    __ ld_d(AT, T2, BasicObjectLock::obj_offset_in_bytes());
++    __ bne(AT, R0, next);
++    __ move(c_rarg0, T2);
++
++    __ bind(next);
++    __ beq(FSR, AT, exit);
++    __ addi_d(T2, T2, entry_size);
++
++    __ bind(entry);
++    __ bne(T3, T2, loop);
++    __ bind(exit);
++  }
++
++  __ bne(c_rarg0, R0, allocated);
++
++  // allocate one if there's no free slot
++  {
++    Label entry, loop;
++    // 1. compute new pointers                   // SP: old expression stack top
++    __ ld_d(c_rarg0, monitor_block_top);
++    __ addi_d(SP, SP, -entry_size);
++    __ addi_d(c_rarg0, c_rarg0, -entry_size);
++    __ st_d(c_rarg0, monitor_block_top);
++    __ move(T3, SP);
++    __ b(entry);
++
++    // 2. move expression stack contents
++    __ bind(loop);
++    __ ld_d(AT, T3, entry_size);
++    __ st_d(AT, T3, 0);
++    __ addi_d(T3, T3, wordSize);
++    __ bind(entry);
++    __ bne(T3, c_rarg0, loop);
++  }
++
++  __ bind(allocated);
++  // Increment bcp to point to the next bytecode,
++  // so exception handling for async. exceptions work correctly.
++  // The object has already been poped from the stack, so the
++  // expression stack looks correct.
++  __ addi_d(BCP, BCP, 1);
++  __ st_d(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  __ lock_object(c_rarg0);
++  // check to make sure this monitor doesn't cause stack overflow after locking
++  __ save_bcp();  // in case of exception
++  __ generate_stack_overflow_check(0);
++  // The bcp has already been incremented. Just need to dispatch to next instruction.
++
++  __ dispatch_next(vtos);
++}
++
++// T2 : top
++// c_rarg0 : entry
++void TemplateTable::monitorexit() {
++  transition(atos, vtos);
++
++  __ null_check(FSR);
++
++  const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
++  Label found;
++
++  // find matching slot
++  {
++    Label entry, loop;
++    __ ld_d(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ addi_d(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    __ b(entry);
++
++    __ bind(loop);
++    __ ld_d(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    __ beq(FSR, AT, found);
++    __ addi_d(c_rarg0, c_rarg0, entry_size);
++    __ bind(entry);
++    __ bne(T2, c_rarg0, loop);
++  }
++
++  // error handling. Unlocking was not block-structured
++  Label end;
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_illegal_monitor_state_exception));
++  __ should_not_reach_here();
++
++  // call run-time routine
++  // c_rarg0: points to monitor entry
++  __ bind(found);
++  __ move(TSR, FSR);
++  __ unlock_object(c_rarg0);
++  __ move(FSR, TSR);
++  __ bind(end);
++}
++
++
++// Wide instructions
++void TemplateTable::wide() {
++  transition(vtos, vtos);
++  __ ld_bu(Rnext, at_bcp(1));
++  __ slli_d(T4, Rnext, Address::times_8);
++  __ li(AT, (long)Interpreter::_wentry_point);
++  __ add_d(AT, T4, AT);
++  __ ld_d(T4, AT, 0);
++  __ jr(T4);
++}
++
++
++void TemplateTable::multianewarray() {
++  transition(vtos, atos);
++  // last dim is on top of stack; we want address of first one:
++  // first_addr = last_addr + (ndims - 1) * wordSize
++  __ ld_bu(A1, at_bcp(3));  // dimension
++  __ addi_d(A1, A1, -1);
++  __ alsl_d(A1, A1, SP, Address::times_8 - 1); // now A1 pointer to the count array on the stack
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
++  __ ld_bu(AT, at_bcp(3));
++  __ alsl_d(SP, AT, SP, Address::times_8 - 1);
++  __ membar(__ AnyAny);//no membar here for aarch64
++}
++#endif // !CC_INTERP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP
++#define CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP
++
++  static void prepare_invoke(int byte_no,
++                             Register method,         // linked method (or i-klass)
++                             Register index = noreg,  // itable index, MethodType, etc.
++                             Register recv  = noreg,  // if caller wants to see it
++                             Register flags = noreg   // if caller wants to test it
++                             );
++  static void invokevirtual_helper(Register index, Register recv,
++                                   Register flags);
++  static void volatile_barrier();
++
++  // Helpers
++  static void index_check(Register array, Register index);
++  static void index_check_without_pop(Register array, Register index);
++
++#endif // CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "code/vmreg.hpp"
++
++
++
++void VMRegImpl::set_regName() {
++  Register reg = ::as_Register(0);
++  int i;
++  for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
++    for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) {
++      regName[i++] = reg->name();
++    }
++    reg = reg->successor();
++  }
++
++  FloatRegister freg = ::as_FloatRegister(0);
++  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
++    for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
++      regName[i++] = freg->name();
++    }
++    freg = freg->successor();
++  }
++
++  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
++    regName[i] = "NON-GPR-FPR";
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,58 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_HPP
++#define CPU_LOONGARCH_VMREG_LOONGARCH_HPP
++
++inline bool is_Register() {
++  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
++}
++
++inline Register as_Register() {
++  assert( is_Register(), "must be");
++  return ::as_Register(value() / RegisterImpl::max_slots_per_register);
++}
++
++inline bool is_FloatRegister() {
++  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
++}
++
++inline FloatRegister as_FloatRegister() {
++  assert( is_FloatRegister() && is_even(value()), "must be" );
++  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
++                            FloatRegisterImpl::max_slots_per_register);
++}
++
++inline   bool is_concrete() {
++  assert(is_reg(), "must be");
++  if (is_FloatRegister()) {
++    int base = value() - ConcreteRegisterImpl::max_gpr;
++    return base % FloatRegisterImpl::max_slots_per_register == 0;
++  } else {
++    return is_even(value());
++  }
++}
++
++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp
+--- a/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,39 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP
++
++inline VMReg RegisterImpl::as_VMReg() {
++  if( this==noreg ) return VMRegImpl::Bad();
++  return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register);
++}
++
++inline VMReg FloatRegisterImpl::as_VMReg() {
++  return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) +
++                             ConcreteRegisterImpl::max_gpr);
++}
++
++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,61 @@
++/*
++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP
++#define CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP
++
++// These are the CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)            \
++  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                         \
++                                                                                                                                     \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
++  /* be present there)                                                */
++
++
++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)                               \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
++  /* be present there)                                                */
++
++
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used        */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                      */
++
++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used         */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                       */
++
++#endif // CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp	2024-01-30 10:00:11.841431732 +0800
+@@ -0,0 +1,85 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "memory/allocation.inline.hpp"
++#include "runtime/os.inline.hpp"
++#include "vm_version_ext_loongarch.hpp"
++
++// VM_Version_Ext statics
++int VM_Version_Ext::_no_of_threads = 0;
++int VM_Version_Ext::_no_of_cores = 0;
++int VM_Version_Ext::_no_of_sockets = 0;
++bool VM_Version_Ext::_initialized = false;
++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
++
++void VM_Version_Ext::initialize_cpu_information(void) {
++  // do nothing if cpu info has been initialized
++  if (_initialized) {
++    return;
++  }
++
++  _no_of_cores  = os::processor_count();
++  _no_of_threads = _no_of_cores;
++  _no_of_sockets = _no_of_cores;
++  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "LoongArch");
++  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "LoongArch %s", cpu_features());
++  _initialized = true;
++}
++
++int VM_Version_Ext::number_of_threads(void) {
++  initialize_cpu_information();
++  return _no_of_threads;
++}
++
++int VM_Version_Ext::number_of_cores(void) {
++  initialize_cpu_information();
++  return _no_of_cores;
++}
++
++int VM_Version_Ext::number_of_sockets(void) {
++  initialize_cpu_information();
++  return _no_of_sockets;
++}
++
++const char* VM_Version_Ext::cpu_name(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
++  return tmp;
++}
++
++const char* VM_Version_Ext::cpu_description(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
++  return tmp;
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP
++
++#include "runtime/vm_version.hpp"
++#include "utilities/macros.hpp"
++
++class VM_Version_Ext : public VM_Version {
++ private:
++  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
++  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
++
++  static int               _no_of_threads;
++  static int               _no_of_cores;
++  static int               _no_of_sockets;
++  static bool              _initialized;
++  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
++  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
++
++ public:
++  static int number_of_threads(void);
++  static int number_of_cores(void);
++  static int number_of_sockets(void);
++
++  static const char* cpu_name(void);
++  static const char* cpu_description(void);
++  static void initialize_cpu_information(void);
++};
++
++#endif // CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp
+--- a/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,397 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/java.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/vm_version.hpp"
++#ifdef TARGET_OS_FAMILY_linux
++# include "os_linux.inline.hpp"
++#endif
++
++#define T5 RT5
++
++const char* VM_Version::_features_str = "";
++VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
++bool VM_Version::_cpu_info_is_initialized = false;
++
++static BufferBlob* stub_blob;
++static const int stub_size = 600;
++
++extern "C" {
++  typedef void (*get_cpu_info_stub_t)(void*);
++}
++static get_cpu_info_stub_t get_cpu_info_stub = NULL;
++
++
++class VM_Version_StubGenerator: public StubCodeGenerator {
++ public:
++
++  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
++
++  address generate_get_cpu_info() {
++    assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized");
++    StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
++#   define __ _masm->
++
++    address start = __ pc();
++
++    __ enter();
++    __ push(AT);
++    __ push(T5);
++
++    __ li(AT, (long)0);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
++
++    __ li(AT, 1);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
++
++    __ li(AT, 2);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
++
++    __ li(AT, 3);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id3_offset()));
++
++    __ li(AT, 4);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id4_offset()));
++
++    __ li(AT, 5);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id5_offset()));
++
++    __ li(AT, 6);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id6_offset()));
++
++    __ li(AT, 10);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id10_offset()));
++
++    __ li(AT, 11);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id11_offset()));
++
++    __ li(AT, 12);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id12_offset()));
++
++    __ li(AT, 13);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id13_offset()));
++
++    __ li(AT, 14);
++    __ cpucfg(T5, AT);
++    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id14_offset()));
++
++    __ pop(T5);
++    __ pop(AT);
++    __ leave();
++    __ jr(RA);
++#   undef __
++    return start;
++  };
++};
++
++uint32_t VM_Version::get_feature_flags_by_cpucfg() {
++  uint32_t result = 0;
++  if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b00 || _cpuid_info.cpucfg_info_id1.bits.ARCH == 0b01 ) {
++    result |= CPU_LA32;
++  } else if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b10 ) {
++    result |= CPU_LA64;
++  }
++
++  if (_cpuid_info.cpucfg_info_id2.bits.FP_CFG != 0)
++    result |= CPU_FP;
++
++  if (_cpuid_info.cpucfg_info_id3.bits.CCDMA != 0)
++    result |= CPU_CCDMA;
++  if (_cpuid_info.cpucfg_info_id3.bits.LLDBAR != 0)
++    result |= CPU_LLDBAR;
++  if (_cpuid_info.cpucfg_info_id3.bits.SCDLY != 0)
++    result |= CPU_SCDLY;
++  if (_cpuid_info.cpucfg_info_id3.bits.LLEXC != 0)
++    result |= CPU_LLEXC;
++
++  result |= CPU_ULSYNC;
++
++  return result;
++}
++
++void VM_Version::get_processor_features() {
++
++  clean_cpuFeatures();
++
++  get_os_cpu_info();
++
++  get_cpu_info_stub(&_cpuid_info);
++  _features |= get_feature_flags_by_cpucfg();
++
++  _supports_cx8 = true;
++
++  if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) {
++    FLAG_SET_DEFAULT(MaxGCPauseMillis, 150);
++  }
++
++  if (supports_lsx()) {
++    if (FLAG_IS_DEFAULT(UseLSX)) {
++      FLAG_SET_DEFAULT(UseLSX, true);
++    }
++  } else if (UseLSX) {
++    warning("LSX instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLSX, false);
++  }
++
++  if (supports_lasx()) {
++    if (FLAG_IS_DEFAULT(UseLASX)) {
++      FLAG_SET_DEFAULT(UseLASX, true);
++    }
++  } else if (UseLASX) {
++    warning("LASX instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLASX, false);
++  }
++
++  if (UseLASX && !UseLSX) {
++    warning("LASX instructions depends on LSX, setting UseLASX to false");
++    FLAG_SET_DEFAULT(UseLASX, false);
++  }
++
++#ifdef COMPILER2
++  int max_vector_size = 0;
++  int min_vector_size = 0;
++  if (UseLASX) {
++    max_vector_size = 32;
++    min_vector_size = 16;
++  }
++  else if (UseLSX) {
++    max_vector_size = 16;
++    min_vector_size = 16;
++  }
++
++  if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
++    if (MaxVectorSize == 0) {
++      // do nothing
++    } else if (MaxVectorSize > max_vector_size) {
++      warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
++      FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
++    } else if (MaxVectorSize < min_vector_size) {
++      warning("MaxVectorSize must be at least %i or 0 on this platform, setting to: %i", min_vector_size, min_vector_size);
++      FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
++    } else if (!is_power_of_2(MaxVectorSize)) {
++      warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
++      FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
++    }
++  } else {
++    // If default, use highest supported configuration
++    FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
++  }
++#endif
++
++  char buf[256];
++
++  // A note on the _features_string format:
++  //   There are jtreg tests checking the _features_string for various properties.
++  //   For some strange reason, these tests require the string to contain
++  //   only _lowercase_ characters. Keep that in mind when being surprised
++  //   about the unusual notation of features - and when adding new ones.
++  //   Features may have one comma at the end.
++  //   Furthermore, use one, and only one, separator space between features.
++  //   Multiple spaces are considered separate tokens, messing up everything.
++  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, "
++    "0x%lx, fp_ver: %d, lvz_ver: %d, ",
++    (is_la64()             ?  "la64"  : ""),
++    (is_la32()             ?  "la32"  : ""),
++    (supports_lsx()        ?  ", lsx" : ""),
++    (supports_lasx()       ?  ", lasx" : ""),
++    (supports_crypto()     ?  ", crypto" : ""),
++    (supports_lam()        ?  ", am" : ""),
++    (supports_ual()        ?  ", ual" : ""),
++    (supports_lldbar()     ?  ", lldbar" : ""),
++    (supports_scdly()      ?  ", scdly" : ""),
++    (supports_llexc()      ?  ", llexc" : ""),
++    (supports_lbt_x86()    ?  ", lbt_x86" : ""),
++    (supports_lbt_arm()    ?  ", lbt_arm" : ""),
++    (supports_lbt_mips()   ?  ", lbt_mips" : ""),
++    (needs_llsync()        ?  ", needs_llsync" : ""),
++    (needs_tgtsync()       ?  ", needs_tgtsync": ""),
++    (needs_ulsync()        ?  ", needs_ulsync": ""),
++    _cpuid_info.cpucfg_info_id0.bits.PRID,
++    _cpuid_info.cpucfg_info_id2.bits.FP_VER,
++    _cpuid_info.cpucfg_info_id2.bits.LVZ_VER);
++  _features_str = strdup(buf);
++
++  assert(!is_la32(), "Should Not Reach Here, what is the cpu type?");
++  assert( is_la64(), "Should be LoongArch64");
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchLines, 3);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 192);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1);
++  }
++
++  // Basic instructions are used to implement SHA Intrinsics on LA, so sha
++  // instructions support is not needed.
++  if (/*supports_crypto()*/ 1) {
++    if (FLAG_IS_DEFAULT(UseSHA)) {
++      FLAG_SET_DEFAULT(UseSHA, true);
++    }
++  } else if (UseSHA) {
++    warning("SHA instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
++
++  if (UseSHA/* && supports_crypto()*/) {
++    if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
++      FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
++    }
++  } else if (UseSHA1Intrinsics) {
++    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
++  }
++
++  if (UseSHA/* && supports_crypto()*/) {
++    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
++      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
++    }
++  } else if (UseSHA256Intrinsics) {
++    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
++  }
++
++  if (UseSHA512Intrinsics) {
++    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++  }
++
++  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
++
++  // Basic instructions are used to implement AES Intrinsics on LA, so AES
++  // instructions support is not needed.
++  if (/*supports_crypto()*/ 1) {
++    if (FLAG_IS_DEFAULT(UseAES)) {
++      FLAG_SET_DEFAULT(UseAES, true);
++    }
++  } else if (UseAES) {
++    if (!FLAG_IS_DEFAULT(UseAES))
++      warning("AES instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseAES, false);
++  }
++
++  if (UseAES/* && supports_crypto()*/) {
++    if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
++      FLAG_SET_DEFAULT(UseAESIntrinsics, true);
++    }
++  } else if (UseAESIntrinsics) {
++    if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
++      warning("AES intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++  }
++
++  if (UseAESCTRIntrinsics) {
++    warning("AES/CTR intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
++  }
++
++  if (FLAG_IS_DEFAULT(UseCRC32)) {
++    FLAG_SET_DEFAULT(UseCRC32, true);
++  }
++
++  if (UseCRC32) {
++    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
++      UseCRC32Intrinsics = true;
++    }
++
++    if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
++      UseCRC32CIntrinsics = true;
++    }
++  }
++
++#ifdef COMPILER2
++  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
++    FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
++  }
++
++  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
++    UseMontgomeryMultiplyIntrinsic = true;
++  }
++  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
++    UseMontgomerySquareIntrinsic = true;
++  }
++#endif
++
++  // This machine allows unaligned memory accesses
++  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
++    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
++  }
++
++  if (FLAG_IS_DEFAULT(UseFMA)) {
++    FLAG_SET_DEFAULT(UseFMA, true);
++  }
++
++  if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
++    FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
++  }
++
++  UNSUPPORTED_OPTION(CriticalJNINatives);
++}
++
++void VM_Version::initialize() {
++  ResourceMark rm;
++  // Making this stub must be FIRST use of assembler
++
++  stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
++  if (stub_blob == NULL) {
++    vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
++  }
++  CodeBuffer c(stub_blob);
++  VM_Version_StubGenerator g(&c);
++  get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
++                                     g.generate_get_cpu_info());
++
++  get_processor_features();
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp
+--- a/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,292 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP
++
++#include "runtime/abstract_vm_version.hpp"
++#include "runtime/globals_extension.hpp"
++#include "utilities/sizes.hpp"
++
++class VM_Version: public Abstract_VM_Version {
++  friend class JVMCIVMStructs;
++
++public:
++
++  union LoongArch_Cpucfg_Id0 {
++    uint32_t value;
++    struct {
++      uint32_t PRID      : 32;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id1 {
++    uint32_t value;
++    struct {
++      uint32_t ARCH      : 2,
++               PGMMU     : 1,
++               IOCSR     : 1,
++               PALEN     : 8,
++               VALEN     : 8,
++               UAL       : 1, // unaligned access
++               RI        : 1,
++               EP        : 1,
++               RPLV      : 1,
++               HP        : 1,
++               IOCSR_BRD : 1,
++               MSG_INT   : 1,
++                         : 5;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id2 {
++    uint32_t value;
++    struct {
++      uint32_t FP_CFG     : 1, // FP is used, use FP_CFG instead
++               FP_SP      : 1,
++               FP_DP      : 1,
++               FP_VER     : 3,
++               LSX        : 1,
++               LASX       : 1,
++               COMPLEX    : 1,
++               CRYPTO     : 1,
++               LVZ        : 1,
++               LVZ_VER    : 3,
++               LLFTP      : 1,
++               LLFTP_VER  : 3,
++               LBT_X86    : 1,
++               LBT_ARM    : 1,
++               LBT_MIPS   : 1,
++               LSPW       : 1,
++               LAM        : 1,
++                          : 9;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id3 {
++    uint32_t value;
++    struct {
++      uint32_t CCDMA      : 1,
++               SFB        : 1,
++               UCACC      : 1,
++               LLEXC      : 1,
++               SCDLY      : 1,
++               LLDBAR     : 1,
++               ITLBHMC    : 1,
++               ICHMC      : 1,
++               SPW_LVL    : 3,
++               SPW_HP_HF  : 1,
++               RVA        : 1,
++               RVAMAXM1   : 4,
++                          : 15;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id4 {
++    uint32_t value;
++    struct {
++      uint32_t CC_FREQ      : 32;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id5 {
++    uint32_t value;
++    struct {
++      uint32_t CC_MUL      : 16,
++               CC_DIV      : 16;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id6 {
++    uint32_t value;
++    struct {
++      uint32_t PMP      : 1,
++               PMVER    : 3,
++               PMNUM    : 4,
++               PMBITS   : 6,
++               UPM      : 1,
++                        : 17;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id10 {
++    uint32_t value;
++    struct {
++      uint32_t L1IU_PRESENT    : 1,
++               L1IU_UNIFY      : 1,
++               L1D_PRESENT     : 1,
++               L2IU_PRESENT    : 1,
++               L2IU_UNIFY      : 1,
++               L2IU_PRIVATE    : 1,
++               L2IU_INCLUSIVE  : 1,
++               L2D_PRESENT     : 1,
++               L2D_PRIVATE     : 1,
++               L2D_INCLUSIVE   : 1,
++               L3IU_PRESENT    : 1,
++               L3IU_UNIFY      : 1,
++               L3IU_PRIVATE    : 1,
++               L3IU_INCLUSIVE  : 1,
++               L3D_PRESENT     : 1,
++               L3D_PRIVATE     : 1,
++               L3D_INCLUSIVE   : 1,
++                               : 15;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id11 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id12 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id13 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++  union LoongArch_Cpucfg_Id14 {
++    uint32_t value;
++    struct {
++      uint32_t WAYM1         : 16,
++               INDEXMLOG2    : 8,
++               LINESIZELOG2  : 7,
++                             : 1;
++    } bits;
++  };
++
++protected:
++
++  enum {
++    CPU_LAM               = (1 << 1),
++    CPU_UAL               = (1 << 2),
++    CPU_LSX               = (1 << 4),
++    CPU_LASX              = (1 << 5),
++    CPU_COMPLEX           = (1 << 7),
++    CPU_CRYPTO            = (1 << 8),
++    CPU_LBT_X86           = (1 << 10),
++    CPU_LBT_ARM           = (1 << 11),
++    CPU_LBT_MIPS          = (1 << 12),
++    // flags above must follow Linux HWCAP
++    CPU_LA32              = (1 << 13),
++    CPU_LA64              = (1 << 14),
++    CPU_FP                = (1 << 15),
++    CPU_LLEXC             = (1 << 16),
++    CPU_SCDLY             = (1 << 17),
++    CPU_LLDBAR            = (1 << 18),
++    CPU_CCDMA             = (1 << 19),
++    CPU_LLSYNC            = (1 << 20),
++    CPU_TGTSYNC           = (1 << 21),
++    CPU_ULSYNC            = (1 << 22),
++
++    //////////////////////add some other feature here//////////////////
++  } cpuFeatureFlags;
++
++  static const char* _features_str;
++  static bool _cpu_info_is_initialized;
++
++  struct CpuidInfo {
++    LoongArch_Cpucfg_Id0   cpucfg_info_id0;
++    LoongArch_Cpucfg_Id1   cpucfg_info_id1;
++    LoongArch_Cpucfg_Id2   cpucfg_info_id2;
++    LoongArch_Cpucfg_Id3   cpucfg_info_id3;
++    LoongArch_Cpucfg_Id4   cpucfg_info_id4;
++    LoongArch_Cpucfg_Id5   cpucfg_info_id5;
++    LoongArch_Cpucfg_Id6   cpucfg_info_id6;
++    LoongArch_Cpucfg_Id10  cpucfg_info_id10;
++    LoongArch_Cpucfg_Id11  cpucfg_info_id11;
++    LoongArch_Cpucfg_Id12  cpucfg_info_id12;
++    LoongArch_Cpucfg_Id13  cpucfg_info_id13;
++    LoongArch_Cpucfg_Id14  cpucfg_info_id14;
++  };
++
++  // The actual cpuid info block
++  static CpuidInfo _cpuid_info;
++
++  static uint32_t get_feature_flags_by_cpucfg();
++  static void     get_processor_features();
++  static void     get_os_cpu_info();
++
++public:
++  // Offsets for cpuid asm stub
++  static ByteSize Loongson_Cpucfg_id0_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id0); }
++  static ByteSize Loongson_Cpucfg_id1_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id1); }
++  static ByteSize Loongson_Cpucfg_id2_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id2); }
++  static ByteSize Loongson_Cpucfg_id3_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id3); }
++  static ByteSize Loongson_Cpucfg_id4_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id4); }
++  static ByteSize Loongson_Cpucfg_id5_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id5); }
++  static ByteSize Loongson_Cpucfg_id6_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id6); }
++  static ByteSize Loongson_Cpucfg_id10_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id10); }
++  static ByteSize Loongson_Cpucfg_id11_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id11); }
++  static ByteSize Loongson_Cpucfg_id12_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id12); }
++  static ByteSize Loongson_Cpucfg_id13_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id13); }
++  static ByteSize Loongson_Cpucfg_id14_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id14); }
++
++  static void clean_cpuFeatures()   { _features = 0; }
++
++  // Initialization
++  static void initialize();
++
++  static bool cpu_info_is_initialized()                   { return _cpu_info_is_initialized; }
++
++  static bool is_la32()             { return _features & CPU_LA32; }
++  static bool is_la64()             { return _features & CPU_LA64; }
++  static bool supports_crypto()     { return _features & CPU_CRYPTO; }
++  static bool supports_lsx()        { return _features & CPU_LSX; }
++  static bool supports_lasx()       { return _features & CPU_LASX; }
++  static bool supports_lam()        { return _features & CPU_LAM; }
++  static bool supports_llexc()      { return _features & CPU_LLEXC; }
++  static bool supports_scdly()      { return _features & CPU_SCDLY; }
++  static bool supports_lldbar()     { return _features & CPU_LLDBAR; }
++  static bool supports_ual()        { return _features & CPU_UAL; }
++  static bool supports_lbt_x86()    { return _features & CPU_LBT_X86; }
++  static bool supports_lbt_arm()    { return _features & CPU_LBT_ARM; }
++  static bool supports_lbt_mips()   { return _features & CPU_LBT_MIPS; }
++  static bool needs_llsync()        { return !supports_lldbar(); }
++  static bool needs_tgtsync()       { return 1; }
++  static bool needs_ulsync()        { return 1; }
++
++  static const char* cpu_features()           { return _features_str; }
++};
++
++#endif // CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp
+--- a/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,322 @@
++/*
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/vtableStubs.hpp"
++#include "interp_masm_loongarch.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klassVtable.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++
++// machine-dependent part of VtableStubs: create VtableStub of correct size and
++// initialize its code
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++#ifndef PRODUCT
++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
++#endif
++
++// used by compiler only;  reciever in T0.
++// used registers :
++// Rmethod : receiver klass & method
++// NOTE: If this code is used by the C1, the receiver_location is always 0.
++// when reach here, receiver in T0, klass in T8
++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(true);
++  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  int       load_const_maxLen = 4*BytesPerInstWord;  // load_const generates 4 instructions. Assume that as max size for li
++  // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
++  const int index_dependent_slop     = 0;
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  Register t1 = T8, t2 = Rmethod;
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    start_pc = __ pc();
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++    __ ld_w(t1, AT , 0);
++    __ addi_w(t1, t1, 1);
++    __ st_w(t1, AT,0);
++  }
++#endif
++
++  // get receiver (need to skip return address on top of stack)
++  //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0");
++
++  // get receiver klass
++  address npe_addr = __ pc();
++  __ load_klass(t1, T0);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    // check offset vs vtable length
++    __ ld_w(t2, t1, in_bytes(Klass::vtable_length_offset()));
++    assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code");
++    __ li(AT, vtable_index*vtableEntry::size());
++    __ blt(AT, t2, L);
++    __ li(A2, vtable_index);
++    __ move(A1, A0);
++
++    // VTABLE TODO: find upper bound for call_VM length.
++    start_pc = __ pc();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2);
++    const ptrdiff_t estimate = 512;
++    const ptrdiff_t codesize = __ pc() - start_pc;
++    slop_delta  = estimate - codesize;  // call_VM varies in length, depending on data
++    assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
++    __ bind(L);
++  }
++#endif // PRODUCT
++  const Register method = Rmethod;
++
++  // load methodOop and target address
++  start_pc = __ pc();
++  // lookup_virtual_method generates 6 instructions (worst case)
++  __ lookup_virtual_method(t1, vtable_index, method);
++  slop_delta  = 6*BytesPerInstWord - (int)(__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    __ beq(method, R0, L);
++    __ ld_d(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L);
++    __ stop("Vtable entry is NULL");
++    __ bind(L);
++  }
++#endif // PRODUCT
++
++  // T8: receiver klass
++  // T0: receiver
++  // Rmethod: methodOop
++  // T4: entry
++  address ame_addr = __ pc();
++  __ ld_ptr(T4, method,in_bytes(Method::from_compiled_offset()));
++  __ jr(T4);
++  masm->flush();
++  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
++
++  return s;
++}
++
++
++// used registers :
++//  T1 T2
++// when reach here, the receiver in T0, klass in T1
++VtableStub* VtableStubs::create_itable_stub(int itable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(false);
++  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  int       load_const_maxLen = 4*BytesPerInstWord;  // load_const generates 4 instructions. Assume that as max size for li
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler *masm = new MacroAssembler(&cb);
++
++  // we use T8, T4, T2 as temparary register, they are free from register allocator
++  Register t1 = T8, t2 = T2, t3 = T4;
++  // Entry arguments:
++  //  T1: Interface
++  //  T0: Receiver
++
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    start_pc = __ pc();
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++    __ ld_w(T8, AT, 0);
++    __ addi_w(T8, T8, 1);
++    __ st_w(T8, AT, 0);
++  }
++#endif // PRODUCT
++
++  const Register holder_klass_reg   = T1; // declaring interface klass (DECC)
++  const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC)
++  const Register icholder_reg = T1;
++
++  Label L_no_such_interface;
++
++  __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset());
++  __ ld_ptr(holder_klass_reg,   icholder_reg, CompiledICHolder::holder_metadata_offset());
++
++  // get receiver klass (also an implicit null-check)
++  address npe_addr = __ pc();
++  __ load_klass(t1, T0);
++
++  // x86 use lookup_interface_method, but lookup_interface_method makes more instructions.
++  // No dynamic code size variance here, so slop_bytes is not needed.
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
++  assert(Assembler::is_simm16(base), "change this code");
++  __ addi_d(t2, t1, base);
++  __ ld_w(AT, t1, in_bytes(Klass::vtable_length_offset()));
++  __ alsl_d(t2, AT, t2, Address::times_8 - 1);
++
++  __ move(t3, t2);
++  {
++    Label hit, entry;
++
++    __ ld_ptr(AT, t3, itableOffsetEntry::interface_offset_in_bytes());
++    __ beq(AT, resolved_klass_reg, hit);
++
++    __ bind(entry);
++    // Check that the entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    __ beqz(AT, L_no_such_interface);
++
++    __ addi_d(t3, t3, itableOffsetEntry::size() * wordSize);
++    __ ld_ptr(AT, t3, itableOffsetEntry::interface_offset_in_bytes());
++    __ bne(AT, resolved_klass_reg, entry);
++
++    __ bind(hit);
++  }
++
++  {
++    Label hit, entry;
++
++    __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++    __ beq(AT, holder_klass_reg, hit);
++
++    __ bind(entry);
++    // Check that the entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    __ beqz(AT, L_no_such_interface);
++
++    __ addi_d(t2, t2, itableOffsetEntry::size() * wordSize);
++    __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++    __ bne(AT, holder_klass_reg, entry);
++
++    __ bind(hit);
++  }
++
++  // We found a hit, move offset into T4
++  __ ld_wu(t2, t2, itableOffsetEntry::offset_offset_in_bytes());
++
++  // Compute itableMethodEntry.
++  const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) +
++                            itableMethodEntry::method_offset_in_bytes();
++
++  // Get methodOop and entrypoint for compiler
++  const Register method = Rmethod;
++
++  start_pc = __ pc();
++  __ li(AT, method_offset);
++  slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++  __ add_d(AT, AT, t2);
++  __ ldx_d(method, t1, AT);
++
++#ifdef ASSERT
++  if (DebugVtables) {
++    Label L1;
++    __ beq(method, R0, L1);
++    __ ld_d(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L1);
++    __ stop("methodOop is null");
++    __ bind(L1);
++  }
++#endif // ASSERT
++
++  // Rmethod: methodOop
++  // T0: receiver
++  // T4: entry point
++  address ame_addr = __ pc();
++  __ ld_ptr(T4, method, in_bytes(Method::from_compiled_offset()));
++  __ jr(T4);
++
++  __ bind(L_no_such_interface);
++  // Handle IncompatibleClassChangeError in itable stubs.
++  // More detailed error message.
++  // We force resolving of the call site by jumping to the "handle
++  // wrong method" stub, and so let the interpreter runtime do all the
++  // dirty work.
++  assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order");
++  __ jmp((address)SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type);
++
++  masm->flush();
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
++
++  return s;
++}
++
++// NOTE : whenever you change the code above, dont forget to change the const here
++int VtableStub::pd_code_alignment() {
++  const unsigned int icache_line_size = wordSize;
++  return icache_line_size;
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp
+--- a/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,132 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "ci/ciMethod.hpp"
++#include "interpreter/interpreter.hpp"
++#include "runtime/frame.inline.hpp"
++
++// asm based interpreter deoptimization helpers
++int AbstractInterpreter::size_activation(int max_stack,
++                                         int temps,
++                                         int extra_args,
++                                         int monitors,
++                                         int callee_params,
++                                         int callee_locals,
++                                         bool is_top_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++
++  // fixed size of an interpreter frame:
++  int overhead = frame::sender_sp_offset -
++                 frame::interpreter_frame_initial_sp_offset;
++  // Our locals were accounted for by the caller (or last_frame_adjust
++  // on the transistion) Since the callee parameters already account
++  // for the callee's params we only need to account for the extra
++  // locals.
++  int size = overhead +
++         (callee_locals - callee_params)*Interpreter::stackElementWords +
++         monitors * frame::interpreter_frame_monitor_size() +
++         temps* Interpreter::stackElementWords + extra_args;
++
++  return size;
++}
++
++// How much stack a method activation needs in words.
++int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
++
++  const int entry_size    = frame::interpreter_frame_monitor_size();
++
++  // total overhead size: entry_size + (saved ebp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size;
++
++  const int stub_code = 6;  // see generate_call_stub
++  // return overhead_size + method->max_locals() + method->max_stack() + stub_code;
++  const int method_stack = (method->max_locals() + method->max_stack()) *
++          Interpreter::stackElementWords;
++  return overhead_size + method_stack + stub_code;
++}
++
++void AbstractInterpreter::layout_activation(Method* method,
++                                           int tempcount,
++                                           int popframe_extra_args,
++                                           int moncount,
++                                           int caller_actual_parameters,
++                                           int callee_param_count,
++                                           int callee_locals,
++                                           frame* caller,
++                                           frame* interpreter_frame,
++                                           bool is_top_frame,
++                                           bool is_bottom_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++  // If interpreter_frame!=NULL, set up the method, locals, and monitors.
++  // The frame interpreter_frame, if not NULL, is guaranteed to be the
++  // right size, as determined by a previous call to this method.
++  // It is also guaranteed to be walkable even though it is in a skeletal state
++
++  // fixed size of an interpreter frame:
++
++  int max_locals = method->max_locals() * Interpreter::stackElementWords;
++  int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords;
++
++#ifdef ASSERT
++  assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
++#endif
++
++  interpreter_frame->interpreter_frame_set_method(method);
++  // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
++  // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
++  // and sender_sp is fp+8
++  intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
++
++#ifdef ASSERT
++  if (caller->is_interpreted_frame()) {
++    assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
++  }
++#endif
++
++  interpreter_frame->interpreter_frame_set_locals(locals);
++  BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
++  BasicObjectLock* monbot = montop - moncount;
++  interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount);
++
++  //set last sp;
++  intptr_t*  esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords -
++                      popframe_extra_args;
++  interpreter_frame->interpreter_frame_set_last_sp(esp);
++  // All frames but the initial interpreter frame we fill in have a
++  // value for sender_sp that allows walking the stack but isn't
++  // truly correct. Correct the value here.
++  //
++  if (extra_locals != 0 &&
++      interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) {
++    interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals);
++  }
++  *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache();
++  *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror();
++}
++
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/assembler_mips.cpp b/src/hotspot/cpu/mips/assembler_mips.cpp
+--- a/src/hotspot/cpu/mips/assembler_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/assembler_mips.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,759 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++#ifndef PRODUCT
++#include "compiler/disassembler.hpp"
++#endif
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#define STOP(error) block_comment(error); stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++// Implementation of AddressLiteral
++
++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
++  _is_lval = false;
++  _target = target;
++  _rspec = rspec_from_rtype(rtype, target);
++}
++
++// Implementation of Address
++
++Address Address::make_array(ArrayAddress adr) {
++  AddressLiteral base = adr.base();
++  Address index = adr.index();
++  assert(index._disp == 0, "must not have disp"); // maybe it can?
++  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
++  array._rspec = base._rspec;
++  return array;
++}
++
++// exceedingly dangerous constructor
++Address::Address(address loc, RelocationHolder spec) {
++  _base  = noreg;
++  _index = noreg;
++  _scale = no_scale;
++  _disp  = (intptr_t) loc;
++  _rspec = spec;
++}
++
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of Assembler
++const char *Assembler::ops_name[] = {
++  "special",  "regimm",   "j",      "jal",    "beq",      "bne",      "blez",   "bgtz",
++  "addi",     "addiu",    "slti",   "sltiu",  "andi",     "ori",      "xori",   "lui",
++  "cop0",     "cop1",     "cop2",   "cop3",   "beql",     "bnel",     "bleql",  "bgtzl",
++  "daddi",    "daddiu",   "ldl",    "ldr",    "",         "",         "",       "",
++  "lb",       "lh",       "lwl",    "lw",     "lbu",      "lhu",      "lwr",    "lwu",
++  "sb",       "sh",       "swl",    "sw",     "sdl",      "sdr",      "swr",    "cache",
++  "ll",       "lwc1",     "",       "",       "lld",      "ldc1",     "",       "ld",
++  "sc",       "swc1",     "",       "",       "scd",      "sdc1",     "",       "sd"
++};
++
++const char* Assembler::special_name[] = {
++  "sll",      "",         "srl",      "sra",      "sllv",     "",         "srlv",     "srav",
++  "jr",       "jalr",     "movz",     "movn",     "syscall",  "break",    "",         "sync",
++  "mfhi",     "mthi",     "mflo",     "mtlo",     "dsll",     "",         "dsrl",     "dsra",
++  "mult",     "multu",    "div",      "divu",     "dmult",    "dmultu",   "ddiv",     "ddivu",
++  "add",      "addu",     "sub",      "subu",     "and",      "or",       "xor",      "nor",
++  "",         "",         "slt",      "sltu",     "dadd",     "daddu",    "dsub",     "dsubu",
++  "tge",      "tgeu",     "tlt",      "tltu",     "teq",      "",         "tne",      "",
++  "dsll",     "",         "dsrl",     "dsra",     "dsll32",   "",         "dsrl32",   "dsra32"
++};
++
++const char* Assembler::cop1_name[] = {
++  "add",      "sub",      "mul",      "div",      "sqrt",     "abs",      "mov",      "neg",
++  "round.l",  "trunc.l",  "ceil.l",   "floor.l",  "round.w",  "trunc.w",  "ceil.w",   "floor.w",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "c.f",      "c.un",     "c.eq",     "c.ueq",    "c.olt",    "c.ult",    "c.ole",    "c.ule",
++  "c.sf",     "c.ngle",   "c.seq",    "c.ngl",    "c.lt",     "c.nge",    "c.le",     "c.ngt"
++};
++
++const char* Assembler::cop1x_name[] = {
++  "lwxc1", "ldxc1",       "",         "",         "",    "luxc1",         "",         "",
++  "swxc1", "sdxc1",       "",         "",         "",    "suxc1",         "",    "prefx",
++  "",         "",         "",         "",         "",         "",  "alnv.ps",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "madd.s",   "madd.d",   "",         "",         "",         "",  "madd.ps",         "",
++  "msub.s",   "msub.d",   "",         "",         "",         "",  "msub.ps",         "",
++  "nmadd.s", "nmadd.d",   "",         "",         "",         "", "nmadd.ps",         "",
++  "nmsub.s", "nmsub.d",   "",         "",         "",         "", "nmsub.ps",         ""
++};
++
++const char* Assembler::special2_name[] = {
++  "madd",     "",         "mul",      "",         "msub",     "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "gsdmult",  "",         "",         "gsdiv",    "gsddiv",   "",         "",
++  "",         "",         "",         "",         "gsmod",    "gsdmod",   "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         ""
++};
++
++const char* Assembler::special3_name[] = {
++  "ext",      "",         "",         "",      "ins",    "dinsm",    "dinsu",     "dins",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "bshfl",    "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++};
++
++const char* Assembler::regimm_name[] = {
++  "bltz",     "bgez",     "bltzl",    "bgezl",    "",         "",         "",         "",
++  "tgei",     "tgeiu",    "tlti",     "tltiu",    "teqi",     "",         "tnei",     "",
++  "bltzal",   "bgezal",   "bltzall",  "bgezall"
++};
++
++const char* Assembler::gs_ldc2_name[] = {
++  "gslbx",    "gslhx",    "gslwx",    "gsldx",    "",         "",         "gslwxc1",  "gsldxc1"
++};
++
++
++const char* Assembler::gs_lwc2_name[] = {
++        "",       "",       "",       "",         "",         "",         "",         "",
++        "",       "",       "",       "",         "",         "",         "",         "",
++        "gslble", "gslbgt", "gslhle", "gslhgt",   "gslwle",   "gslwgt",   "gsldle",   "gsldgt",
++        "",       "",       "",       "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/
++        "gslq",   ""
++};
++
++const char* Assembler::gs_sdc2_name[] = {
++  "gssbx",    "gsshx",    "gsswx",    "gssdx",    "",         "",         "gsswxc1",  "gssdxc1"
++};
++
++const char* Assembler::gs_swc2_name[] = {
++        "",        "",        "",        "",        "",          "",          "",         "",
++        "",        "",        "",        "",        "",          "",          "",         "",
++        "gssble",  "gssbgt",  "gsshle",  "gsshgt",  "gsswle",    "gsswgt",    "gssdle",   "gssdgt",
++        "",        "",        "",        "",        "gsswlec1",  "gsswgtc1",  "gssdlec1", "gssdgtc1",
++        "gssq",    ""
++};
++
++//misleading name, print only branch/jump instruction
++void Assembler::print_instruction(int inst) {
++  const char *s;
++  switch( opcode(inst) ) {
++  default:
++    s = ops_name[opcode(inst)];
++    break;
++  case special_op:
++    s = special_name[special(inst)];
++    break;
++  case regimm_op:
++    s = special_name[rt(inst)];
++    break;
++  }
++
++  ::tty->print("%s", s);
++}
++
++int Assembler::is_int_mask(int x) {
++  int xx = x;
++  int count = 0;
++
++  while (x != 0) {
++    x &= (x - 1);
++    count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++    return count;
++  } else {
++    return -1;
++  }
++}
++
++int Assembler::is_jlong_mask(jlong x) {
++  jlong  xx = x;
++  int count = 0;
++
++  while (x != 0) {
++    x &= (x - 1);
++    count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++    return count;
++  } else {
++    return -1;
++  }
++}
++
++//without check, maybe fixed
++int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
++  int v = (dest_pos - inst_pos - 4)>>2;
++  switch(opcode(inst)) {
++  case j_op:
++  case jal_op:
++  case lui_op:
++  case ori_op:
++  case daddiu_op:
++    ShouldNotReachHere();
++    break;
++  default:
++    assert(is_simm16(v), "must be simm16");
++#ifndef PRODUCT
++    if (!is_simm16(v)) {
++      tty->print_cr("must be simm16");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    v = low16(v);
++    inst &= 0xffff0000;
++    break;
++  }
++
++  return inst | v;
++}
++
++int Assembler::branch_destination(int inst, int pos) {
++  int off = 0;
++
++  switch(opcode(inst)) {
++  case j_op:
++  case jal_op:
++    assert(false, "should not use j/jal here");
++    break;
++  default:
++    off = expand(low16(inst), 15);
++    break;
++  }
++
++  return off ? pos + 4 + (off<<2) : 0;
++}
++
++int AbstractAssembler::code_fill_byte() {
++  return 0x00;                  // illegal instruction 0x00000000
++}
++
++// Now the Assembler instruction (identical for 32/64 bits)
++
++void Assembler::lb(Register rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  lb(rt, src.base(), src.disp());
++}
++
++void Assembler::lbu(Register rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  lbu(rt, src.base(), src.disp());
++}
++
++void Assembler::ld(Register rt, Address dst){
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (Assembler::is_simm16(disp)) {
++      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          gsldx(src, base, index, disp);
++        } else {
++          dsll(AT, index, scale);
++          gsldx(src, base, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        ld(src, AT, disp);
++      }
++    } else {
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gsldx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          ld(src, AT, 0);
++        }
++      } else {
++        assert_different_registers(src, AT);
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(src, split_low(disp >> 16));
++        if (split_low(disp)) ori(src, src, split_low(disp));
++        if (UseLEXT1) {
++          gsldx(src, AT, src, 0);
++        } else {
++          daddu(AT, AT, src);
++          ld(src, AT, 0);
++        }
++      }
++    }
++  } else {
++    if (Assembler::is_simm16(disp)) {
++      ld(src, base, disp);
++    } else {
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gsldx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        ld(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::ldl(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ldl(rt, src.base(), src.disp());
++}
++
++void Assembler::ldr(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ldr(rt, src.base(), src.disp());
++}
++
++void Assembler::lh(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lh(rt, src.base(), src.disp());
++}
++
++void Assembler::lhu(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lhu(rt, src.base(), src.disp());
++}
++
++void Assembler::ll(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ll(rt, src.base(), src.disp());
++}
++
++void Assembler::lld(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lld(rt, src.base(), src.disp());
++}
++
++void Assembler::lw(Register rt, Address dst){
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (Assembler::is_simm16(disp)) {
++      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          gslwx(src, base, index, disp);
++        } else {
++          dsll(AT, index, scale);
++          gslwx(src, base, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        lw(src, AT, disp);
++      }
++    } else {
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gslwx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          lw(src, AT, 0);
++        }
++      } else {
++        assert_different_registers(src, AT);
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(src, split_low(disp >> 16));
++        if (split_low(disp)) ori(src, src, split_low(disp));
++        if (UseLEXT1) {
++          gslwx(src, AT, src, 0);
++        } else {
++          daddu(AT, AT, src);
++          lw(src, AT, 0);
++        }
++      }
++    }
++  } else {
++    if (Assembler::is_simm16(disp)) {
++      lw(src, base, disp);
++    } else {
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gslwx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        lw(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::lea(Register rt, Address src) {
++  Register dst   = rt;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index == noreg) {
++    if (is_simm16(disp)) {
++      daddiu(dst, base, disp);
++    } else {
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++      daddu(dst, base, AT);
++    }
++  } else {
++    if (scale == 0) {
++      if (is_simm16(disp)) {
++        daddu(AT, base, index);
++        daddiu(dst, AT, disp);
++      } else {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, base, AT);
++        daddu(dst, AT, index);
++      }
++    } else {
++      if (is_simm16(disp)) {
++        dsll(AT, index, scale);
++        daddu(AT, AT, base);
++        daddiu(dst, AT, disp);
++      } else {
++        assert_different_registers(dst, AT);
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        dsll(dst, index, scale);
++        daddu(dst, dst, AT);
++      }
++    }
++  }
++}
++
++void Assembler::lwl(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwl(rt, src.base(), src.disp());
++}
++
++void Assembler::lwr(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwr(rt, src.base(), src.disp());
++}
++
++void Assembler::lwu(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwu(rt, src.base(), src.disp());
++}
++
++void Assembler::sb(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sb(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sc(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sc(rt, dst.base(), dst.disp());
++}
++
++void Assembler::scd(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  scd(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sd(Register rt, Address dst) {
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (is_simm16(disp)) {
++      if ( UseLEXT1 && is_simm(disp, 8)) {
++        if (scale == 0) {
++          gssdx(src, base, index, disp);
++        } else {
++          assert_different_registers(rt, AT);
++          dsll(AT, index, scale);
++          gssdx(src, base, AT, disp);
++        }
++      } else {
++        assert_different_registers(rt, AT);
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        sd(src, AT, disp);
++      }
++    } else {
++      assert_different_registers(rt, AT);
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gssdx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          sd(src, AT, 0);
++        }
++      } else {
++        daddiu(SP, SP, -wordSize);
++        sd(T9, SP, 0);
++
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(T9, split_low(disp >> 16));
++        if (split_low(disp)) ori(T9, T9, split_low(disp));
++        daddu(AT, AT, T9);
++        ld(T9, SP, 0);
++        daddiu(SP, SP, wordSize);
++        sd(src, AT, 0);
++      }
++    }
++  } else {
++    if (is_simm16(disp)) {
++      sd(src, base, disp);
++    } else {
++      assert_different_registers(rt, AT);
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gssdx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        sd(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::sdl(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sdl(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sdr(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sdr(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sh(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sh(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sw(Register rt, Address dst) {
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if ( Assembler::is_simm16(disp) ) {
++      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          gsswx(src, base, index, disp);
++        } else {
++          assert_different_registers(rt, AT);
++          dsll(AT, index, scale);
++          gsswx(src, base, AT, disp);
++        }
++      } else {
++        assert_different_registers(rt, AT);
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        sw(src, AT, disp);
++      }
++    } else {
++      assert_different_registers(rt, AT);
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gsswx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          sw(src, AT, 0);
++        }
++      } else {
++        daddiu(SP, SP, -wordSize);
++        sd(T9, SP, 0);
++
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(T9, split_low(disp >> 16));
++        if (split_low(disp)) ori(T9, T9, split_low(disp));
++        daddu(AT, AT, T9);
++        ld(T9, SP, 0);
++        daddiu(SP, SP, wordSize);
++        sw(src, AT, 0);
++      }
++    }
++  } else {
++    if (Assembler::is_simm16(disp)) {
++      sw(src, base, disp);
++    } else {
++      assert_different_registers(rt, AT);
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gsswx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        sw(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::swl(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  swl(rt, dst.base(), dst.disp());
++}
++
++void Assembler::swr(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  swr(rt, dst.base(), dst.disp());
++}
++
++void Assembler::lwc1(FloatRegister rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwc1(rt, src.base(), src.disp());
++}
++
++void Assembler::ldc1(FloatRegister rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  ldc1(rt, src.base(), src.disp());
++}
++
++void Assembler::swc1(FloatRegister rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  swc1(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sdc1(FloatRegister rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sdc1(rt, dst.base(), dst.disp());
++}
++
++void Assembler::j(address entry) {
++  int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2;
++  emit_long((j_op<<26) | dest);
++  has_delay_slot();
++}
++
++void Assembler::jal(address entry) {
++  int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2;
++  emit_long((jal_op<<26) | dest);
++  has_delay_slot();
++}
++
++void Assembler::emit_long(int x) { // shadows AbstractAssembler::emit_long
++  check_delay();
++  AbstractAssembler::emit_int32(x);
++}
++
++inline void Assembler::emit_data(int x) { emit_long(x); }
++inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
++  relocate(rtype);
++  emit_long(x);
++}
++
++inline void Assembler::emit_data(int x, RelocationHolder const& rspec) {
++  relocate(rspec);
++  emit_long(x);
++}
++
++inline void Assembler::check_delay() {
++#ifdef CHECK_DELAY
++  guarantee(delay_state != at_delay_slot, "must say delayed() when filling delay slot");
++  delay_state = no_delay;
++#endif
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/assembler_mips.hpp b/src/hotspot/cpu/mips/assembler_mips.hpp
+--- a/src/hotspot/cpu/mips/assembler_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/assembler_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,1789 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
++
++#include "asm/register.hpp"
++#include "runtime/vm_version.hpp"
++
++class BiasedLockingCounters;
++
++
++// Note: A register location is represented via a Register, not
++//       via an address for efficiency & simplicity reasons.
++
++class ArrayAddress;
++
++class Address {
++ public:
++  enum ScaleFactor {
++    no_scale = -1,
++    times_1  =  0,
++    times_2  =  1,
++    times_4  =  2,
++    times_8  =  3,
++    times_ptr = times_8
++  };
++  static ScaleFactor times(int size) {
++    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
++    if (size == 8)  return times_8;
++    if (size == 4)  return times_4;
++    if (size == 2)  return times_2;
++    return times_1;
++  }
++
++ private:
++  Register         _base;
++  Register         _index;
++  ScaleFactor      _scale;
++  int              _disp;
++  RelocationHolder _rspec;
++
++  // Easily misused constructors make them private
++  Address(address loc, RelocationHolder spec);
++  Address(int disp, address loc, relocInfo::relocType rtype);
++  Address(int disp, address loc, RelocationHolder spec);
++
++ public:
++
++  // creation
++  Address()
++    : _base(noreg),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(0) {
++  }
++
++  // No default displacement otherwise Register can be implicitly
++  // converted to 0(Register) which is quite a different animal.
++
++  Address(Register base, int disp = 0)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(disp) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
++    : _base (base),
++      _index(index),
++      _scale(scale),
++      _disp (disp) {
++    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
++    assert_different_registers(_base, _index, AT);
++  }
++
++  // The following two overloads are used in connection with the
++  // ByteSize type (see sizes.hpp).  They simplify the use of
++  // ByteSize'd arguments in assembly code. Note that their equivalent
++  // for the optimized build are the member functions with int disp
++  // argument since ByteSize is mapped to an int type in that case.
++  //
++  // Note: DO NOT introduce similar overloaded functions for WordSize
++  // arguments as in the optimized mode, both ByteSize and WordSize
++  // are mapped to the same type and thus the compiler cannot make a
++  // distinction anymore (=> compiler errors).
++
++#ifdef ASSERT
++  Address(Register base, ByteSize disp)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(in_bytes(disp)) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
++    : _base(base),
++      _index(index),
++      _scale(scale),
++      _disp(in_bytes(disp)) {
++    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
++    assert_different_registers(_base, _index, AT);
++  }
++#endif // ASSERT
++
++  // accessors
++  bool        uses(Register reg) const { return _base == reg || _index == reg; }
++  Register    base()             const { return _base;  }
++  Register    index()            const { return _index; }
++  ScaleFactor scale()            const { return _scale; }
++  int         disp()             const { return _disp;  }
++
++  static Address make_array(ArrayAddress);
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class LIR_Assembler; // base/index/scale/disp
++};
++
++// Calling convention
++class Argument {
++ private:
++  int _number;
++ public:
++  enum {
++    n_register_parameters = 8,   // 8 integer registers used to pass parameters
++    n_float_register_parameters = 8   // 8 float registers used to pass parameters
++  };
++
++  Argument(int number):_number(number){ }
++  Argument successor() {return Argument(number() + 1);}
++
++  int number()const {return _number;}
++  bool is_Register()const {return _number < n_register_parameters;}
++  bool is_FloatRegister()const {return _number < n_float_register_parameters;}
++
++  Register as_Register()const {
++    assert(is_Register(), "must be a register argument");
++    return ::as_Register(A0->encoding() + _number);
++  }
++  FloatRegister  as_FloatRegister()const {
++    assert(is_FloatRegister(), "must be a float register argument");
++    return ::as_FloatRegister(F12->encoding() + _number);
++  }
++
++  Address as_caller_address()const {return Address(SP, (number() - n_register_parameters) * wordSize);}
++};
++
++//
++// AddressLiteral has been split out from Address because operands of this type
++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
++// the few instructions that need to deal with address literals are unique and the
++// MacroAssembler does not have to implement every instruction in the Assembler
++// in order to search for address literals that may need special handling depending
++// on the instruction and the platform. As small step on the way to merging i486/amd64
++// directories.
++//
++class AddressLiteral {
++  friend class ArrayAddress;
++  RelocationHolder _rspec;
++  // Typically we use AddressLiterals we want to use their rval
++  // However in some situations we want the lval (effect address) of the item.
++  // We provide a special factory for making those lvals.
++  bool _is_lval;
++
++  // If the target is far we'll need to load the ea of this to
++  // a register to reach it. Otherwise if near we can do rip
++  // relative addressing.
++
++  address          _target;
++
++ protected:
++  // creation
++  AddressLiteral()
++    : _is_lval(false),
++      _target(NULL)
++  {}
++
++  public:
++
++
++  AddressLiteral(address target, relocInfo::relocType rtype);
++
++  AddressLiteral(address target, RelocationHolder const& rspec)
++    : _rspec(rspec),
++      _is_lval(false),
++      _target(target)
++  {}
++
++  AddressLiteral addr() {
++    AddressLiteral ret = *this;
++    ret._is_lval = true;
++    return ret;
++  }
++
++
++ private:
++
++  address target() { return _target; }
++  bool is_lval() { return _is_lval; }
++
++  relocInfo::relocType reloc() const { return _rspec.type(); }
++  const RelocationHolder& rspec() const { return _rspec; }
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class Address;
++  friend class LIR_Assembler;
++  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
++    switch (rtype) {
++      case relocInfo::external_word_type:
++        return external_word_Relocation::spec(addr);
++      case relocInfo::internal_word_type:
++        return internal_word_Relocation::spec(addr);
++      case relocInfo::opt_virtual_call_type:
++        return opt_virtual_call_Relocation::spec();
++      case relocInfo::static_call_type:
++        return static_call_Relocation::spec();
++      case relocInfo::runtime_call_type:
++        return runtime_call_Relocation::spec();
++      case relocInfo::poll_type:
++      case relocInfo::poll_return_type:
++        return Relocation::spec_simple(rtype);
++      case relocInfo::none:
++      case relocInfo::oop_type:
++        // Oops are a special case. Normally they would be their own section
++        // but in cases like icBuffer they are literals in the code stream that
++        // we don't have a section for. We use none so that we get a literal address
++        // which is always patchable.
++        return RelocationHolder();
++      default:
++        ShouldNotReachHere();
++        return RelocationHolder();
++    }
++  }
++
++};
++
++// Convience classes
++class RuntimeAddress: public AddressLiteral {
++
++  public:
++
++  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
++
++};
++
++class OopAddress: public AddressLiteral {
++
++ public:
++
++  OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){}
++
++};
++
++class ExternalAddress: public AddressLiteral {
++
++ public:
++
++  ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){}
++
++};
++
++class InternalAddress: public AddressLiteral {
++
++  public:
++
++  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
++
++};
++
++// x86 can do array addressing as a single operation since disp can be an absolute
++// address amd64 can't. We create a class that expresses the concept but does extra
++// magic on amd64 to get the final result
++
++class ArrayAddress {
++  private:
++
++  AddressLiteral _base;
++  Address        _index;
++
++  public:
++
++  ArrayAddress() {};
++  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
++  AddressLiteral base() { return _base; }
++  Address index() { return _index; }
++
++};
++
++const int FPUStateSizeInWords = 512 / wordSize;
++
++// The MIPS LOONGSON Assembler: Pure assembler doing NO optimizations on the instruction
++// level ; i.e., what you write is what you get. The Assembler is generating code into
++// a CodeBuffer.
++
++class Assembler : public AbstractAssembler  {
++  friend class AbstractAssembler; // for the non-virtual hack
++  friend class LIR_Assembler; // as_Address()
++  friend class StubGenerator;
++
++ public:
++  enum Condition {
++    zero         ,
++    notZero      ,
++    equal        ,
++    notEqual     ,
++    less         ,
++    lessEqual    ,
++    greater      ,
++    greaterEqual ,
++    below        ,
++    belowEqual   ,
++    above        ,
++    aboveEqual
++  };
++
++  static const int LogInstructionSize = 2;
++  static const int InstructionSize    = 1 << LogInstructionSize;
++
++  // opcode, highest 6 bits: bits[31...26]
++  enum ops {
++    special_op  = 0x00, // special_ops
++    regimm_op   = 0x01, // regimm_ops
++    j_op        = 0x02,
++    jal_op      = 0x03,
++    beq_op      = 0x04,
++    bne_op      = 0x05,
++    blez_op     = 0x06,
++    bgtz_op     = 0x07,
++    addiu_op    = 0x09,
++    slti_op     = 0x0a,
++    sltiu_op    = 0x0b,
++    andi_op     = 0x0c,
++    ori_op      = 0x0d,
++    xori_op     = 0x0e,
++    lui_op      = 0x0f,
++    cop0_op     = 0x10, // cop0_ops
++    cop1_op     = 0x11, // cop1_ops
++    gs_cop2_op  = 0x12, // gs_cop2_ops
++    cop1x_op    = 0x13, // cop1x_ops
++    beql_op     = 0x14,
++    bnel_op     = 0x15,
++    blezl_op    = 0x16,
++    bgtzl_op    = 0x17,
++    daddiu_op   = 0x19,
++    ldl_op      = 0x1a,
++    ldr_op      = 0x1b,
++    special2_op = 0x1c, // special2_ops
++    msa_op      = 0x1e, // msa_ops
++    special3_op = 0x1f, // special3_ops
++    lb_op       = 0x20,
++    lh_op       = 0x21,
++    lwl_op      = 0x22,
++    lw_op       = 0x23,
++    lbu_op      = 0x24,
++    lhu_op      = 0x25,
++    lwr_op      = 0x26,
++    lwu_op      = 0x27,
++    sb_op       = 0x28,
++    sh_op       = 0x29,
++    swl_op      = 0x2a,
++    sw_op       = 0x2b,
++    sdl_op      = 0x2c,
++    sdr_op      = 0x2d,
++    swr_op      = 0x2e,
++    cache_op    = 0x2f,
++    ll_op       = 0x30,
++    lwc1_op     = 0x31,
++    gs_lwc2_op  = 0x32, //gs_lwc2_ops
++    pref_op     = 0x33,
++    lld_op      = 0x34,
++    ldc1_op     = 0x35,
++    gs_ldc2_op  = 0x36, //gs_ldc2_ops
++    ld_op       = 0x37,
++    sc_op       = 0x38,
++    swc1_op     = 0x39,
++    gs_swc2_op  = 0x3a, //gs_swc2_ops
++    scd_op      = 0x3c,
++    sdc1_op     = 0x3d,
++    gs_sdc2_op  = 0x3e, //gs_sdc2_ops
++    sd_op       = 0x3f
++  };
++
++  static  const char *ops_name[];
++
++  //special family, the opcode is in low 6 bits.
++  enum special_ops {
++    sll_op       = 0x00,
++    movci_op     = 0x01,
++    srl_op       = 0x02,
++    sra_op       = 0x03,
++    sllv_op      = 0x04,
++    srlv_op      = 0x06,
++    srav_op      = 0x07,
++    jr_op        = 0x08,
++    jalr_op      = 0x09,
++    movz_op      = 0x0a,
++    movn_op      = 0x0b,
++    syscall_op   = 0x0c,
++    break_op     = 0x0d,
++    sync_op      = 0x0f,
++    mfhi_op      = 0x10,
++    mthi_op      = 0x11,
++    mflo_op      = 0x12,
++    mtlo_op      = 0x13,
++    dsllv_op     = 0x14,
++    dsrlv_op     = 0x16,
++    dsrav_op     = 0x17,
++    mult_op      = 0x18,
++    multu_op     = 0x19,
++    div_op       = 0x1a,
++    divu_op      = 0x1b,
++    dmult_op     = 0x1c,
++    dmultu_op    = 0x1d,
++    ddiv_op      = 0x1e,
++    ddivu_op     = 0x1f,
++    addu_op      = 0x21,
++    subu_op      = 0x23,
++    and_op       = 0x24,
++    or_op        = 0x25,
++    xor_op       = 0x26,
++    nor_op       = 0x27,
++    slt_op       = 0x2a,
++    sltu_op      = 0x2b,
++    daddu_op     = 0x2d,
++    dsubu_op     = 0x2f,
++    tge_op       = 0x30,
++    tgeu_op      = 0x31,
++    tlt_op       = 0x32,
++    tltu_op      = 0x33,
++    teq_op       = 0x34,
++    tne_op       = 0x36,
++    dsll_op      = 0x38,
++    dsrl_op      = 0x3a,
++    dsra_op      = 0x3b,
++    dsll32_op    = 0x3c,
++    dsrl32_op    = 0x3e,
++    dsra32_op    = 0x3f
++  };
++
++  static  const char* special_name[];
++
++  //regimm family, the opcode is in rt[16...20], 5 bits
++  enum regimm_ops {
++    bltz_op      = 0x00,
++    bgez_op      = 0x01,
++    bltzl_op     = 0x02,
++    bgezl_op     = 0x03,
++    tgei_op      = 0x08,
++    tgeiu_op     = 0x09,
++    tlti_op      = 0x0a,
++    tltiu_op     = 0x0b,
++    teqi_op      = 0x0c,
++    tnei_op      = 0x0e,
++    bltzal_op    = 0x10,
++    bgezal_op    = 0x11,
++    bltzall_op   = 0x12,
++    bgezall_op   = 0x13,
++    bposge32_op  = 0x1c,
++    bposge64_op  = 0x1d,
++    synci_op     = 0x1f,
++  };
++
++  static  const char* regimm_name[];
++
++  //cop0 family, the ops is in bits[25...21], 5 bits
++  enum cop0_ops {
++    mfc0_op     = 0x00,
++    dmfc0_op    = 0x01,
++    //
++    mxgc0_op    = 0x03, //MFGC0, DMFGC0, MTGC0
++    mtc0_op     = 0x04,
++    dmtc0_op    = 0x05,
++    rdpgpr_op   = 0x0a,
++    inter_op    = 0x0b,
++    wrpgpr_op   = 0x0c
++  };
++
++  //cop1 family, the ops is in bits[25...21], 5 bits
++  enum cop1_ops {
++    mfc1_op     = 0x00,
++    dmfc1_op    = 0x01,
++    cfc1_op     = 0x02,
++    mfhc1_op    = 0x03,
++    mtc1_op     = 0x04,
++    dmtc1_op    = 0x05,
++    ctc1_op     = 0x06,
++    mthc1_op    = 0x07,
++    bc1f_op     = 0x08,
++    single_fmt  = 0x10,
++    double_fmt  = 0x11,
++    word_fmt    = 0x14,
++    long_fmt    = 0x15,
++    ps_fmt      = 0x16
++  };
++
++
++  //2 bist (bits[17...16]) of bc1x instructions (cop1)
++  enum bc_ops {
++    bcf_op       = 0x0,
++    bct_op       = 0x1,
++    bcfl_op      = 0x2,
++    bctl_op      = 0x3,
++  };
++
++  // low 6 bits of c_x_fmt instructions (cop1)
++  enum c_conds {
++    f_cond       = 0x30,
++    un_cond      = 0x31,
++    eq_cond      = 0x32,
++    ueq_cond     = 0x33,
++    olt_cond     = 0x34,
++    ult_cond     = 0x35,
++    ole_cond     = 0x36,
++    ule_cond     = 0x37,
++    sf_cond      = 0x38,
++    ngle_cond    = 0x39,
++    seq_cond     = 0x3a,
++    ngl_cond     = 0x3b,
++    lt_cond      = 0x3c,
++    nge_cond     = 0x3d,
++    le_cond      = 0x3e,
++    ngt_cond     = 0x3f
++  };
++
++  // low 6 bits of cop1 instructions
++  enum float_ops {
++    fadd_op      = 0x00,
++    fsub_op      = 0x01,
++    fmul_op      = 0x02,
++    fdiv_op      = 0x03,
++    fsqrt_op     = 0x04,
++    fabs_op      = 0x05,
++    fmov_op      = 0x06,
++    fneg_op      = 0x07,
++    froundl_op   = 0x08,
++    ftruncl_op   = 0x09,
++    fceill_op    = 0x0a,
++    ffloorl_op   = 0x0b,
++    froundw_op   = 0x0c,
++    ftruncw_op   = 0x0d,
++    fceilw_op    = 0x0e,
++    ffloorw_op   = 0x0f,
++    movf_f_op    = 0x11,
++    movt_f_op    = 0x11,
++    movz_f_op    = 0x12,
++    movn_f_op    = 0x13,
++    frecip_op    = 0x15,
++    frsqrt_op    = 0x16,
++    fcvts_op     = 0x20,
++    fcvtd_op     = 0x21,
++    fcvtw_op     = 0x24,
++    fcvtl_op     = 0x25,
++    fcvtps_op    = 0x26,
++    fcvtspl_op   = 0x28,
++    fpll_op      = 0x2c,
++    fplu_op      = 0x2d,
++    fpul_op      = 0x2e,
++    fpuu_op      = 0x2f
++  };
++
++  static const char* cop1_name[];
++
++  //cop1x family, the opcode is in low 6 bits.
++  enum cop1x_ops {
++    lwxc1_op    = 0x00,
++    ldxc1_op    = 0x01,
++    luxc1_op    = 0x05,
++    swxc1_op    = 0x08,
++    sdxc1_op    = 0x09,
++    suxc1_op    = 0x0d,
++    prefx_op    = 0x0f,
++
++    alnv_ps_op  = 0x1e,
++    madd_s_op   = 0x20,
++    madd_d_op   = 0x21,
++    madd_ps_op  = 0x26,
++    msub_s_op   = 0x28,
++    msub_d_op   = 0x29,
++    msub_ps_op  = 0x2e,
++    nmadd_s_op  = 0x30,
++    nmadd_d_op  = 0x31,
++    nmadd_ps_op = 0x36,
++    nmsub_s_op  = 0x38,
++    nmsub_d_op  = 0x39,
++    nmsub_ps_op = 0x3e
++  };
++
++  static const char* cop1x_name[];
++
++  //special2 family, the opcode is in low 6 bits.
++  enum special2_ops {
++    madd_op       = 0x00,
++    maddu_op      = 0x01,
++    mul_op        = 0x02,
++    gs0x03_op     = 0x03,
++    msub_op       = 0x04,
++    msubu_op      = 0x05,
++    gs0x06_op     = 0x06,
++    gsemul2_op    = 0x07,
++    gsemul3_op    = 0x08,
++    gsemul4_op    = 0x09,
++    gsemul5_op    = 0x0a,
++    gsemul6_op    = 0x0b,
++    gsemul7_op    = 0x0c,
++    gsemul8_op    = 0x0d,
++    gsemul9_op    = 0x0e,
++    gsemul10_op   = 0x0f,
++    gsmult_op     = 0x10,
++    gsdmult_op    = 0x11,
++    gsmultu_op    = 0x12,
++    gsdmultu_op   = 0x13,
++    gsdiv_op      = 0x14,
++    gsddiv_op     = 0x15,
++    gsdivu_op     = 0x16,
++    gsddivu_op    = 0x17,
++    gsmod_op      = 0x1c,
++    gsdmod_op     = 0x1d,
++    gsmodu_op     = 0x1e,
++    gsdmodu_op    = 0x1f,
++    clz_op        = 0x20,
++    clo_op        = 0x21,
++    xctx_op       = 0x22, //ctz, cto, dctz, dcto, gsX
++    gsrxr_x_op    = 0x23, //gsX
++    dclz_op       = 0x24,
++    dclo_op       = 0x25,
++    gsle_op       = 0x26,
++    gsgt_op       = 0x27,
++    gs86j_op      = 0x28,
++    gsloop_op     = 0x29,
++    gsaj_op       = 0x2a,
++    gsldpc_op     = 0x2b,
++    gs86set_op    = 0x30,
++    gstm_op       = 0x31,
++    gscvt_ld_op   = 0x32,
++    gscvt_ud_op   = 0x33,
++    gseflag_op    = 0x34,
++    gscam_op      = 0x35,
++    gstop_op      = 0x36,
++    gssettag_op   = 0x37,
++    gssdbbp_op    = 0x38
++  };
++
++  static  const char* special2_name[];
++
++  // special3 family, the opcode is in low 6 bits.
++  enum special3_ops {
++    ext_op         = 0x00,
++    dextm_op       = 0x01,
++    dextu_op       = 0x02,
++    dext_op        = 0x03,
++    ins_op         = 0x04,
++    dinsm_op       = 0x05,
++    dinsu_op       = 0x06,
++    dins_op        = 0x07,
++    lxx_op         = 0x0a, //lwx, lhx, lbux, ldx
++    insv_op        = 0x0c,
++    dinsv_op       = 0x0d,
++    ar1_op         = 0x10, //MIPS DSP
++    cmp1_op        = 0x11, //MIPS DSP
++    re1_op         = 0x12, //MIPS DSP, re1_ops
++    sh1_op         = 0x13, //MIPS DSP
++    ar2_op         = 0x14, //MIPS DSP
++    cmp2_op        = 0x15, //MIPS DSP
++    re2_op         = 0x16, //MIPS DSP, re2_ops
++    sh2_op         = 0x17, //MIPS DSP
++    ar3_op         = 0x18, //MIPS DSP
++    bshfl_op       = 0x20  //seb, seh
++  };
++
++  // re1_ops
++  enum re1_ops {
++    absq_s_qb_op = 0x01,
++    repl_qb_op   = 0x02,
++    replv_qb_op  = 0x03,
++    absq_s_ph_op = 0x09,
++    repl_ph_op   = 0x0a,
++    replv_ph_op  = 0x0b,
++    absq_s_w_op  = 0x11,
++    bitrev_op    = 0x1b
++  };
++
++  // re2_ops
++  enum re2_ops {
++    repl_ob_op   = 0x02,
++    replv_ob_op  = 0x03,
++    absq_s_qh_op = 0x09,
++    repl_qh_op   = 0x0a,
++    replv_qh_op  = 0x0b,
++    absq_s_pw_op = 0x11,
++    repl_pw_op   = 0x12,
++    replv_pw_op  = 0x13
++  };
++
++  static  const char* special3_name[];
++
++  // lwc2/gs_lwc2 family, the opcode is in low 6 bits.
++  enum gs_lwc2_ops {
++    gslble_op       = 0x10,
++    gslbgt_op       = 0x11,
++    gslhle_op       = 0x12,
++    gslhgt_op       = 0x13,
++    gslwle_op       = 0x14,
++    gslwgt_op       = 0x15,
++    gsldle_op       = 0x16,
++    gsldgt_op       = 0x17,
++    gslwlec1_op     = 0x1c,
++    gslwgtc1_op     = 0x1d,
++    gsldlec1_op     = 0x1e,
++    gsldgtc1_op     = 0x1f,
++    gslq_op         = 0x20
++  };
++
++  static const char* gs_lwc2_name[];
++
++  // ldc2/gs_ldc2 family, the opcode is in low 3 bits.
++  enum gs_ldc2_ops {
++    gslbx_op        =  0x0,
++    gslhx_op        =  0x1,
++    gslwx_op        =  0x2,
++    gsldx_op        =  0x3,
++    gslwxc1_op      =  0x6,
++    gsldxc1_op      =  0x7
++  };
++
++  static const char* gs_ldc2_name[];
++
++  // swc2/gs_swc2 family, the opcode is in low 6 bits.
++  enum gs_swc2_ops {
++    gssble_op       = 0x10,
++    gssbgt_op       = 0x11,
++    gsshle_op       = 0x12,
++    gsshgt_op       = 0x13,
++    gsswle_op       = 0x14,
++    gsswgt_op       = 0x15,
++    gssdle_op       = 0x16,
++    gssdgt_op       = 0x17,
++    gsswlec1_op     = 0x1c,
++    gsswgtc1_op     = 0x1d,
++    gssdlec1_op     = 0x1e,
++    gssdgtc1_op     = 0x1f,
++    gssq_op         = 0x20
++  };
++
++  static const char* gs_swc2_name[];
++
++  // sdc2/gs_sdc2 family, the opcode is in low 3 bits.
++  enum gs_sdc2_ops {
++    gssbx_op        =  0x0,
++    gsshx_op        =  0x1,
++    gsswx_op        =  0x2,
++    gssdx_op        =  0x3,
++    gsswxc1_op      =  0x6,
++    gssdxc1_op      =  0x7
++  };
++
++  static const char* gs_sdc2_name[];
++
++  enum WhichOperand {
++    // input to locate_operand, and format code for relocations
++    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
++    disp32_operand = 1,          // embedded 32-bit displacement or address
++    call32_operand = 2,          // embedded 32-bit self-relative displacement
++    narrow_oop_operand = 3,      // embedded 32-bit immediate narrow oop
++    _WhichOperand_limit = 4
++  };
++
++  static int opcode(int insn) { return (insn>>26)&0x3f; }
++  static int rs(int insn) { return (insn>>21)&0x1f; }
++  static int rt(int insn) { return (insn>>16)&0x1f; }
++  static int rd(int insn) { return (insn>>11)&0x1f; }
++  static int sa(int insn) { return (insn>>6)&0x1f; }
++  static int special(int insn) { return insn&0x3f; }
++  static int imm_off(int insn) { return (short)low16(insn); }
++
++  static int low  (int x, int l) { return bitfield(x, 0, l); }
++  static int low16(int x)        { return low(x, 16); }
++  static int low26(int x)        { return low(x, 26); }
++
++ protected:
++  //help methods for instruction ejection
++
++  // I-Type (Immediate)
++  // 31        26 25        21 20      16 15                              0
++  //|   opcode   |      rs    |    rt    |            immediat             |
++  //|            |            |          |                                 |
++  //      6              5          5                     16
++  static int insn_ORRI(int op, int rs, int rt, int imm) { assert(is_simm16(imm), "not a signed 16-bit int"); return (op<<26) | (rs<<21) | (rt<<16) | low16(imm); }
++
++  // R-Type (Register)
++  // 31         26 25        21 20      16 15      11 10         6 5         0
++  //|   special   |      rs    |    rt    |    rd    |     0      |   opcode  |
++  //| 0 0 0 0 0 0 |            |          |          | 0 0 0 0 0  |           |
++  //      6              5          5           5          5            6
++  static int insn_RRRO(int rs, int rt, int rd,   int op) { return (rs<<21) | (rt<<16) | (rd<<11)  | op; }
++  static int insn_RRSO(int rt, int rd, int sa,   int op) { return (rt<<16) | (rd<<11) | (sa<<6)   | op; }
++  static int insn_RRCO(int rs, int rt, int code, int op) { return (rs<<21) | (rt<<16) | (code<<6) | op; }
++
++  static int insn_COP0(int op, int rt, int rd) { return (cop0_op<<26) | (op<<21) | (rt<<16) | (rd<<11); }
++  static int insn_COP1(int op, int rt, int fs) { return (cop1_op<<26) | (op<<21) | (rt<<16) | (fs<<11); }
++
++  static int insn_F3RO(int fmt, int ft, int fs, int fd, int func) {
++    return (cop1_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func;
++  }
++  static int insn_F3ROX(int fmt, int ft, int fs, int fd, int func) {
++    return (cop1x_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func;
++  }
++
++  static int high  (int x, int l) { return bitfield(x, 32-l, l); }
++  static int high16(int x)        { return high(x, 16); }
++  static int high6 (int x)        { return high(x, 6); }
++
++  //get the offset field of jump/branch instruction
++  int offset(address entry) {
++    assert(is_simm16((entry - pc() - 4) / 4), "change this code");
++    if (!is_simm16((entry - pc() - 4) / 4)) {
++      tty->print_cr("!!! is_simm16: %lx", (entry - pc() - 4) / 4);
++    }
++    return (entry - pc() - 4) / 4;
++  }
++
++
++public:
++  using AbstractAssembler::offset;
++
++  //sign expand with the sign bit is h
++  static int expand(int x, int h) { return -(x & (1<<h)) | x;  }
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_int_mask(int x);
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_jlong_mask(jlong x);
++
++  // MIPS lui/addiu is both sign extended, so if you wan't to use off32/imm32, you have to use the follow three
++  static int split_low(int x) {
++    return (x & 0xffff);
++  }
++
++  // Convert 16-bit x to a sign-extended 16-bit integer
++  static int simm16(int x) {
++    assert(x == (x & 0xFFFF), "must be 16-bit only");
++    return (x << 16) >> 16;
++  }
++
++  static int split_high(int x) {
++    return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff;
++  }
++
++  static int merge(int low, int high) {
++    return expand(low, 15) + (high<<16);
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x16, intptr_t x32, intptr_t x48) {
++    return (x48 << 48) | (x32 << 32) | (x16 << 16) | x0;
++  }
++
++  // Test if x is within signed immediate range for nbits.
++  static bool is_simm  (int x, int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   min      = -( ((int)1) << nbits-1 );
++    const int   maxplus1 =  ( ((int)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  static bool is_simm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong min      = -( ((jlong)1) << nbits-1 );
++    const jlong maxplus1 =  ( ((jlong)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  // Test if x is within unsigned immediate range for nbits
++  static bool is_uimm(int x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   maxplus1 = ( ((int)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++  static bool is_uimm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong maxplus1 =  ( ((jlong)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++  static bool is_simm16(int x)            { return is_simm(x, 16); }
++  static bool is_simm16(long x)           { return is_simm((jlong)x, (unsigned int)16); }
++
++  static bool fit_in_jal(address target, address pc) {
++    intptr_t mask = 0xfffffffff0000000;
++    return ((intptr_t)(pc + 4) & mask) == ((intptr_t)target & mask);
++  }
++
++  bool fit_int_branch(address entry) {
++    return is_simm16(offset(entry));
++  }
++
++protected:
++#ifdef ASSERT
++    #define CHECK_DELAY
++#endif
++#ifdef CHECK_DELAY
++  enum Delay_state { no_delay, at_delay_slot, filling_delay_slot } delay_state;
++#endif
++
++public:
++  void assert_not_delayed() {
++#ifdef CHECK_DELAY
++    assert(delay_state == no_delay, "next instruction should not be a delay slot");
++#endif
++  }
++
++protected:
++  // Delay slot helpers
++  // cti is called when emitting control-transfer instruction,
++  // BEFORE doing the emitting.
++  // Only effective when assertion-checking is enabled.
++
++  // called when emitting cti with a delay slot, AFTER emitting
++  void has_delay_slot() {
++#ifdef CHECK_DELAY
++    assert(delay_state == no_delay, "just checking");
++    delay_state = at_delay_slot;
++#endif
++  }
++
++public:
++  Assembler* delayed() {
++#ifdef CHECK_DELAY
++    guarantee( delay_state == at_delay_slot, "delayed instructition is not in delay slot");
++    delay_state = filling_delay_slot;
++#endif
++    return this;
++  }
++
++  void flush() {
++#ifdef CHECK_DELAY
++    guarantee( delay_state == no_delay, "ending code with a delay slot");
++#endif
++    AbstractAssembler::flush();
++  }
++
++  void emit_long(int);  // shadows AbstractAssembler::emit_long
++  void emit_data(int);
++  void emit_data(int, RelocationHolder const&);
++  void emit_data(int, relocInfo::relocType rtype);
++  void check_delay();
++
++
++  // Generic instructions
++  // Does 32bit or 64bit as needed for the platform. In some sense these
++  // belong in macro assembler but there is no need for both varieties to exist
++
++  void addu32(Register rd, Register rs, Register rt){ emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), addu_op)); }
++  void addiu32(Register rt, Register rs, int imm)   { emit_long(insn_ORRI(addiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); }
++  void addiu(Register rt, Register rs, int imm)     { daddiu (rt, rs, imm);}
++  void addu(Register rd, Register rs, Register rt)  { daddu  (rd, rs, rt);  }
++
++  void andr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), and_op)); }
++  void andi(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(andi_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
++
++  void beq    (Register rs, Register rt, int off)  { emit_long(insn_ORRI(beq_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  void beql   (Register rs, Register rt, int off)  { emit_long(insn_ORRI(beql_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  void bgez   (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgez_op, off)); has_delay_slot(); }
++  void bgezal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezal_op, off)); has_delay_slot(); }
++  void bgezall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezall_op, off)); has_delay_slot(); }
++  void bgezl  (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezl_op, off)); has_delay_slot(); }
++  void bgtz   (Register rs, int off) { emit_long(insn_ORRI(bgtz_op,   (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void bgtzl  (Register rs, int off) { emit_long(insn_ORRI(bgtzl_op,  (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void blez   (Register rs, int off) { emit_long(insn_ORRI(blez_op,   (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void blezl  (Register rs, int off) { emit_long(insn_ORRI(blezl_op,  (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void bltz   (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltz_op, off)); has_delay_slot(); }
++  void bltzal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzal_op, off)); has_delay_slot(); }
++  void bltzall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzall_op, off)); has_delay_slot(); }
++  void bltzl  (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzl_op, off)); has_delay_slot(); }
++  void bne    (Register rs, Register rt, int off) { emit_long(insn_ORRI(bne_op,  (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  void bnel   (Register rs, Register rt, int off) { emit_long(insn_ORRI(bnel_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  // two versions of brk:
++  // the brk(code) version is according to MIPS64 Architecture For Programmers Volume II: The MIPS64 Instruction Set
++  // the brk(code1, code2) is according to disassembler of hsdis (binutils-2.27)
++  // both versions work
++  void brk    (int code) { assert(is_uimm(code, 20), "code is 20 bits"); emit_long( (low(code, 20)<<6) | break_op ); }
++  void brk    (int code1, int code2) { assert(is_uimm(code1, 10) && is_uimm(code2, 10), "code is 20 bits"); emit_long( (low(code1, 10)<<16) | (low(code2, 10)<<6) | break_op ); }
++
++  void beq    (Register rs, Register rt, address entry) { beq(rs, rt, offset(entry)); }
++  void beql   (Register rs, Register rt, address entry) { beql(rs, rt, offset(entry));}
++  void bgez   (Register rs, address entry) { bgez   (rs, offset(entry)); }
++  void bgezal (Register rs, address entry) { bgezal (rs, offset(entry)); }
++  void bgezall(Register rs, address entry) { bgezall(rs, offset(entry)); }
++  void bgezl  (Register rs, address entry) { bgezl  (rs, offset(entry)); }
++  void bgtz   (Register rs, address entry) { bgtz   (rs, offset(entry)); }
++  void bgtzl  (Register rs, address entry) { bgtzl  (rs, offset(entry)); }
++  void blez   (Register rs, address entry) { blez   (rs, offset(entry)); }
++  void blezl  (Register rs, address entry) { blezl  (rs, offset(entry)); }
++  void bltz   (Register rs, address entry) { bltz   (rs, offset(entry)); }
++  void bltzal (Register rs, address entry) { bltzal (rs, offset(entry)); }
++  void bltzall(Register rs, address entry) { bltzall(rs, offset(entry)); }
++  void bltzl  (Register rs, address entry) { bltzl  (rs, offset(entry)); }
++  void bne    (Register rs, Register rt, address entry) { bne(rs, rt, offset(entry)); }
++  void bnel   (Register rs, Register rt, address entry) { bnel(rs, rt, offset(entry)); }
++
++  void beq    (Register rs, Register rt, Label& L) { beq(rs, rt, target(L)); }
++  void beql   (Register rs, Register rt, Label& L) { beql(rs, rt, target(L)); }
++  void bgez   (Register rs, Label& L){ bgez   (rs, target(L)); }
++  void bgezal (Register rs, Label& L){ bgezal (rs, target(L)); }
++  void bgezall(Register rs, Label& L){ bgezall(rs, target(L)); }
++  void bgezl  (Register rs, Label& L){ bgezl  (rs, target(L)); }
++  void bgtz   (Register rs, Label& L){ bgtz   (rs, target(L)); }
++  void bgtzl  (Register rs, Label& L){ bgtzl  (rs, target(L)); }
++  void blez   (Register rs, Label& L){ blez   (rs, target(L)); }
++  void blezl  (Register rs, Label& L){ blezl  (rs, target(L)); }
++  void bltz   (Register rs, Label& L){ bltz   (rs, target(L)); }
++  void bltzal (Register rs, Label& L){ bltzal (rs, target(L)); }
++  void bltzall(Register rs, Label& L){ bltzall(rs, target(L)); }
++  void bltzl  (Register rs, Label& L){ bltzl  (rs, target(L)); }
++  void bne    (Register rs, Register rt, Label& L){ bne(rs, rt, target(L)); }
++  void bnel   (Register rs, Register rt, Label& L){ bnel(rs, rt, target(L)); }
++
++  void daddiu(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(daddiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); }
++  void daddu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), daddu_op)); }
++  void ddiv  (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddiv_op));  }
++  void ddivu (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddivu_op)); }
++
++  void movz  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), movz_op)); }
++  void movn  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), movn_op)); }
++
++  void movt  (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | (1 << 16) | ((int)rd->encoding() << 11) | movci_op); }
++  void movf  (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | ((int)rd->encoding() << 11) | movci_op); }
++
++  enum bshfl_ops {
++     seb_op = 0x10,
++     seh_op = 0x18
++  };
++  void seb  (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seb_op << 6) | bshfl_op); }
++  void seh  (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seh_op << 6) | bshfl_op); }
++
++  void ext  (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
++     guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]");
++
++     int lsb  = pos;
++     int msbd = size - 1;
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | ext_op);
++  }
++
++  void dext  (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
++     guarantee((0 < pos + size) && (pos + size <= 63), "pos + size must be in (0, 63]");
++
++     int lsb  = pos;
++     int msbd = size - 1;
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dext_op);
++  }
++
++  void dextm (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((32 < size) && (size <= 64), "size must be in (32, 64]");
++     guarantee((32 < pos + size) && (pos + size <= 64), "pos + size must be in (32, 64]");
++
++     int lsb  = pos;
++     int msbd = size - 1 - 32;
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dextm_op);
++  }
++
++  void rotr (Register rd, Register rt, int sa) {
++     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | srl_op);
++  }
++
++  void drotr (Register rd, Register rt, int sa) {
++     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl_op);
++  }
++
++  void drotr32 (Register rd, Register rt, int sa) {
++     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl32_op);
++  }
++
++  void rotrv (Register rd, Register rt, Register rs) {
++     emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | srlv_op);
++  }
++
++  void drotrv (Register rd, Register rt, Register rs) {
++     emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | dsrlv_op);
++  }
++
++  void div   (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, div_op)); }
++  void divu  (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, divu_op)); }
++  void dmult (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmult_op)); }
++  void dmultu(Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmultu_op)); }
++  void dsll  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll_op)); }
++  void dsllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsllv_op)); }
++  void dsll32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll32_op)); }
++  void dsra  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra_op)); }
++  void dsrav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrav_op)); }
++  void dsra32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra32_op)); }
++  void dsrl  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl_op)); }
++  void dsrlv (Register rd, Register rt, Register rs)  { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrlv_op)); }
++  void dsrl32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl32_op)); }
++  void dsubu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsubu_op)); }
++
++  void b(int off)       { beq(R0, R0, off); }
++  void b(address entry) { b(offset(entry)); }
++  void b(Label& L)      { b(target(L)); }
++
++  void j(address entry);
++  void jal(address entry);
++
++  void jalr(Register rd, Register rs) { emit_long( ((int)rs->encoding()<<21) | ((int)rd->encoding()<<11) | jalr_op); has_delay_slot(); }
++  void jalr(Register rs)              { jalr(RA, rs); }
++  void jalr()                         { jalr(RT9); }
++
++  void jr(Register rs) { emit_long(((int)rs->encoding()<<21) | jr_op); has_delay_slot(); }
++  void jr_hb(Register rs) { emit_long(((int)rs->encoding()<<21) | (1 << 10) | jr_op); has_delay_slot(); }
++
++  void lb (Register rt, Register base, int off) { emit_long(insn_ORRI(lb_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lbu(Register rt, Register base, int off) { emit_long(insn_ORRI(lbu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ld (Register rt, Register base, int off) { emit_long(insn_ORRI(ld_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ldl(Register rt, Register base, int off) { emit_long(insn_ORRI(ldl_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ldr(Register rt, Register base, int off) { emit_long(insn_ORRI(ldr_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lh (Register rt, Register base, int off) { emit_long(insn_ORRI(lh_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lhu(Register rt, Register base, int off) { emit_long(insn_ORRI(lhu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ll (Register rt, Register base, int off) { emit_long(insn_ORRI(ll_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lld(Register rt, Register base, int off) { emit_long(insn_ORRI(lld_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lui(Register rt, int imm)                { emit_long(insn_ORRI(lui_op, 0, (int)rt->encoding(), simm16(imm))); }
++  void lw (Register rt, Register base, int off) { emit_long(insn_ORRI(lw_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lwl(Register rt, Register base, int off) { emit_long(insn_ORRI(lwl_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lwr(Register rt, Register base, int off) { emit_long(insn_ORRI(lwr_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lwu(Register rt, Register base, int off) { emit_long(insn_ORRI(lwu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++
++  void lb (Register rt, Address src);
++  void lbu(Register rt, Address src);
++  void ld (Register rt, Address src);
++  void ldl(Register rt, Address src);
++  void ldr(Register rt, Address src);
++  void lh (Register rt, Address src);
++  void lhu(Register rt, Address src);
++  void ll (Register rt, Address src);
++  void lld(Register rt, Address src);
++  void lw (Register rt, Address src);
++  void lwl(Register rt, Address src);
++  void lwr(Register rt, Address src);
++  void lwu(Register rt, Address src);
++  void lea(Register rt, Address src);
++  void pref(int hint, Register base, int off) { emit_long(insn_ORRI(pref_op, (int)base->encoding(), low(hint, 5), low(off, 16))); }
++
++  void mfhi (Register rd)              { emit_long( ((int)rd->encoding()<<11) | mfhi_op ); }
++  void mflo (Register rd)              { emit_long( ((int)rd->encoding()<<11) | mflo_op ); }
++  void mthi (Register rs)              { emit_long( ((int)rs->encoding()<<21) | mthi_op ); }
++  void mtlo (Register rs)              { emit_long( ((int)rs->encoding()<<21) | mtlo_op ); }
++
++  void mult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, mult_op)); }
++  void multu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, multu_op)); }
++
++  void nor(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), nor_op)); }
++
++  void orr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), or_op)); }
++  void ori(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(ori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
++
++  void sb   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sb_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sc   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sc_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void scd  (Register rt, Register base, int off)     { emit_long(insn_ORRI(scd_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sd   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sd_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sdl  (Register rt, Register base, int off)     { emit_long(insn_ORRI(sdl_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sdr  (Register rt, Register base, int off)     { emit_long(insn_ORRI(sdr_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sh   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sh_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sll  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      sll_op)); }
++  void sllv (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), sllv_op)); }
++  void slt  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), slt_op)); }
++  void slti (Register rt, Register rs,   int imm)     { emit_long(insn_ORRI(slti_op,  (int)rs->encoding(),   (int)rt->encoding(), imm)); }
++  void sltiu(Register rt, Register rs,   int imm)     { emit_long(insn_ORRI(sltiu_op, (int)rs->encoding(),   (int)rt->encoding(), imm)); }
++  void sltu (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), sltu_op)); }
++  void sra  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      sra_op)); }
++  void srav (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), srav_op)); }
++  void srl  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      srl_op)); }
++  void srlv (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), srlv_op)); }
++
++  void subu (Register rd, Register rs,   Register rt) { dsubu (rd, rs, rt); }
++  void subu32 (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), subu_op)); }
++  void sw   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sw_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void swl  (Register rt, Register base, int off)     { emit_long(insn_ORRI(swl_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void swr  (Register rt, Register base, int off)     { emit_long(insn_ORRI(swr_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void synci(Register base, int off)                  { emit_long(insn_ORRI(regimm_op, (int)base->encoding(), synci_op, off)); }
++  void sync ()                                        {
++    if (os::is_ActiveCoresMP())
++      emit_long(0);
++    else
++      emit_long(sync_op);
++  }
++  void syscall(int code)                              { emit_long( (code<<6) | syscall_op ); }
++
++  void sb(Register rt, Address dst);
++  void sc(Register rt, Address dst);
++  void scd(Register rt, Address dst);
++  void sd(Register rt, Address dst);
++  void sdl(Register rt, Address dst);
++  void sdr(Register rt, Address dst);
++  void sh(Register rt, Address dst);
++  void sw(Register rt, Address dst);
++  void swl(Register rt, Address dst);
++  void swr(Register rt, Address dst);
++
++  void teq  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, teq_op)); }
++  void teqi (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), teqi_op, imm)); }
++  void tge  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tge_op)); }
++  void tgei (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgei_op, imm)); }
++  void tgeiu(Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgeiu_op, imm)); }
++  void tgeu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tgeu_op)); }
++  void tlt  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tlt_op)); }
++  void tlti (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tlti_op, imm)); }
++  void tltiu(Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tltiu_op, imm)); }
++  void tltu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tltu_op)); }
++  void tne  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tne_op)); }
++  void tnei (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tnei_op, imm)); }
++
++  void xorr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), xor_op)); }
++  void xori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(xori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
++
++  void nop()               { emit_long(0); }
++
++
++
++  void ldc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(ldc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void lwc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(lwc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void ldc1(FloatRegister ft, Address src);
++  void lwc1(FloatRegister ft, Address src);
++
++  //COP0
++  void mfc0  (Register rt, Register rd)       { emit_long(insn_COP0( mfc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  void dmfc0 (Register rt, FloatRegister rd)  { emit_long(insn_COP0(dmfc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  // MFGC0, DMFGC0, MTGC0, DMTGC0 not implemented yet
++  void mtc0  (Register rt, Register rd)       { emit_long(insn_COP0( mtc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  void dmtc0 (Register rt, FloatRegister rd)  { emit_long(insn_COP0(dmtc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  //COP0 end
++
++
++  //COP1
++  void mfc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1 (mfc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void dmfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmfc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void cfc1 (Register rt, int fs)           { emit_long(insn_COP1( cfc1_op, (int)rt->encoding(), fs)); }
++  void mfhc1(Register rt, int fs)           { emit_long(insn_COP1(mfhc1_op, (int)rt->encoding(), fs)); }
++  void mtc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( mtc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void dmtc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmtc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void ctc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( ctc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void ctc1 (Register rt, int fs)           { emit_long(insn_COP1(ctc1_op,  (int)rt->encoding(), fs)); }
++  void mthc1(Register rt, int fs)           { emit_long(insn_COP1(mthc1_op, (int)rt->encoding(), fs)); }
++
++  void bc1f (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcf_op, off)); has_delay_slot(); }
++  void bc1fl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcfl_op, off)); has_delay_slot(); }
++  void bc1t (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bct_op, off)); has_delay_slot(); }
++  void bc1tl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bctl_op, off));  has_delay_slot(); }
++
++  void bc1f (address entry) { bc1f(offset(entry)); }
++  void bc1fl(address entry) { bc1fl(offset(entry)); }
++  void bc1t (address entry) { bc1t(offset(entry)); }
++  void bc1tl(address entry) { bc1tl(offset(entry)); }
++
++  void bc1f (Label& L) { bc1f(target(L)); }
++  void bc1fl(Label& L) { bc1fl(target(L)); }
++  void bc1t (Label& L) { bc1t(target(L)); }
++  void bc1tl(Label& L) { bc1tl(target(L)); }
++
++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags.
++#define INSN_SINGLE(r1, r2, r3, op)   \
++  { emit_long(insn_F3RO(single_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++  void add_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fadd_op)}
++  void sub_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fsub_op)}
++  void mul_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fmul_op)}
++  void div_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fdiv_op)}
++  void sqrt_s   (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fsqrt_op)}
++  void abs_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fabs_op)}
++  void mov_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fmov_op)}
++  void neg_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fneg_op)}
++  void round_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundl_op)}
++  void trunc_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncl_op)}
++  void ceil_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceill_op)}
++  void floor_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorl_op)}
++  void round_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundw_op)}
++  void trunc_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncw_op)}
++  void ceil_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceilw_op)}
++  void floor_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorw_op)}
++  //null
++  void movf_s(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movt_s(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movz_s  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movz_f_op)}
++  void movn_s  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movn_f_op)}
++  //null
++  void recip_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frecip_op)}
++  void rsqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frsqrt_op)}
++  //null
++  void cvt_d_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtd_op)}
++  //null
++  void cvt_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtw_op)}
++  void cvt_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtl_op)}
++  void cvt_ps_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fcvtps_op)}
++  //null
++  void c_f_s   (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, f_cond)}
++  void c_un_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, un_cond)}
++  void c_eq_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, eq_cond)}
++  void c_ueq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ueq_cond)}
++  void c_olt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, olt_cond)}
++  void c_ult_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ult_cond)}
++  void c_ole_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ole_cond)}
++  void c_ule_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ule_cond)}
++  void c_sf_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, sf_cond)}
++  void c_ngle_s(FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngle_cond)}
++  void c_seq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, seq_cond)}
++  void c_ngl_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngl_cond)}
++  void c_lt_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, lt_cond)}
++  void c_nge_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, nge_cond)}
++  void c_le_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, le_cond)}
++  void c_ngt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngt_cond)}
++
++#undef INSN_SINGLE
++
++
++//R0->encoding() is 0; INSN_DOUBLE is enclosed by {} for ctags.
++#define INSN_DOUBLE(r1, r2, r3, op)   \
++  { emit_long(insn_F3RO(double_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++
++  void add_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fadd_op)}
++  void sub_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fsub_op)}
++  void mul_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fmul_op)}
++  void div_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fdiv_op)}
++  void sqrt_d   (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fsqrt_op)}
++  void abs_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fabs_op)}
++  void mov_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fmov_op)}
++  void neg_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fneg_op)}
++  void round_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundl_op)}
++  void trunc_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncl_op)}
++  void ceil_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceill_op)}
++  void floor_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorl_op)}
++  void round_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundw_op)}
++  void trunc_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncw_op)}
++  void ceil_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceilw_op)}
++  void floor_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorw_op)}
++  //null
++  void movf_d(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movt_d(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movz_d  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movz_f_op)}
++  void movn_d  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movn_f_op)}
++  //null
++  void recip_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frecip_op)}
++  void rsqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frsqrt_op)}
++  //null
++  void cvt_s_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvts_op)}
++  void cvt_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtl_op)}
++  //null
++  void cvt_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtw_op)}
++  //null
++  void c_f_d   (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, f_cond)}
++  void c_un_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, un_cond)}
++  void c_eq_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, eq_cond)}
++  void c_ueq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ueq_cond)}
++  void c_olt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, olt_cond)}
++  void c_ult_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ult_cond)}
++  void c_ole_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ole_cond)}
++  void c_ule_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ule_cond)}
++  void c_sf_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, sf_cond)}
++  void c_ngle_d(FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngle_cond)}
++  void c_seq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, seq_cond)}
++  void c_ngl_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngl_cond)}
++  void c_lt_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, lt_cond)}
++  void c_nge_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, nge_cond)}
++  void c_le_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, le_cond)}
++  void c_ngt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngt_cond)}
++
++#undef INSN_DOUBLE
++
++
++  //null
++  void cvt_s_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); }
++  void cvt_d_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); }
++  //null
++  void cvt_s_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); }
++  void cvt_d_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); }
++  //null
++
++
++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags.
++#define INSN_PS(r1, r2, r3, op)   \
++  { emit_long(insn_F3RO(ps_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++
++  void add_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fadd_op)}
++  void sub_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fsub_op)}
++  void mul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fmul_op)}
++  //null
++  void abs_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fabs_op)}
++  void mov_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fmov_op)}
++  void neg_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fneg_op)}
++  //null
++  //void movf_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movf_ps")}
++  //void movt_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movt_ps") }
++  void movz_ps  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movz_f_op)}
++  void movn_ps  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movn_f_op)}
++  //null
++  void cvt_s_pu (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvts_op)}
++  //null
++  void cvt_s_pl (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvtspl_op)}
++  //null
++  void pll_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpll_op)}
++  void plu_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fplu_op)}
++  void pul_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpul_op)}
++  void puu_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpuu_op)}
++  void c_f_ps   (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, f_cond)}
++  void c_un_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, un_cond)}
++  void c_eq_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, eq_cond)}
++  void c_ueq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ueq_cond)}
++  void c_olt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, olt_cond)}
++  void c_ult_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ult_cond)}
++  void c_ole_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ole_cond)}
++  void c_ule_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ule_cond)}
++  void c_sf_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, sf_cond)}
++  void c_ngle_ps(FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngle_cond)}
++  void c_seq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, seq_cond)}
++  void c_ngl_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngl_cond)}
++  void c_lt_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, lt_cond)}
++  void c_nge_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, nge_cond)}
++  void c_le_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, le_cond)}
++  void c_ngt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngt_cond)}
++  //null
++#undef INSN_PS
++  //COP1 end
++
++
++  //COP1X
++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags.
++#define INSN_COP1X(r0, r1, r2, r3, op)   \
++  { emit_long(insn_F3ROX((int)r0->encoding(), (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++  void madd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_s_op) }
++  void madd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_d_op) }
++  void madd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, madd_ps_op) }
++  void msub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_s_op) }
++  void msub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_d_op) }
++  void msub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, msub_ps_op) }
++  void nmadd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_s_op) }
++  void nmadd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_d_op) }
++  void nmadd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmadd_ps_op) }
++  void nmsub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_s_op) }
++  void nmsub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_d_op) }
++  void nmsub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmsub_ps_op) }
++#undef INSN_COP1X
++  //COP1X end
++
++  //SPECIAL2
++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags.
++#define INSN_S2(op)   \
++  { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | op);}
++
++  void madd    (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | madd_op); }
++  void maddu   (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | maddu_op); }
++  void mul     (Register rd, Register rs, Register rt) { INSN_S2(mul_op)     }
++  void gsandn  (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x03_op) }
++  void msub    (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msub_op); }
++  void msubu   (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msubu_op); }
++  void gsorn   (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x06_op) }
++
++  void gsmult  (Register rd, Register rs, Register rt) { INSN_S2(gsmult_op)  }
++  void gsdmult (Register rd, Register rs, Register rt) { INSN_S2(gsdmult_op) }
++  void gsmultu (Register rd, Register rs, Register rt) { INSN_S2(gsmultu_op) }
++  void gsdmultu(Register rd, Register rs, Register rt) { INSN_S2(gsdmultu_op)}
++  void gsdiv   (Register rd, Register rs, Register rt) { INSN_S2(gsdiv_op)   }
++  void gsddiv  (Register rd, Register rs, Register rt) { INSN_S2(gsddiv_op)  }
++  void gsdivu  (Register rd, Register rs, Register rt) { INSN_S2(gsdivu_op)  }
++  void gsddivu (Register rd, Register rs, Register rt) { INSN_S2(gsddivu_op) }
++  void gsmod   (Register rd, Register rs, Register rt) { INSN_S2(gsmod_op)   }
++  void gsdmod  (Register rd, Register rs, Register rt) { INSN_S2(gsdmod_op)  }
++  void gsmodu  (Register rd, Register rs, Register rt) { INSN_S2(gsmodu_op)  }
++  void gsdmodu (Register rd, Register rs, Register rt) { INSN_S2(gsdmodu_op) }
++  void clz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clz_op); }
++  void clo (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clo_op); }
++  void ctz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 0 << 6| xctx_op); }
++  void cto (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 1 << 6| xctx_op); }
++  void dctz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 2 << 6| xctx_op); }
++  void dcto(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 3 << 6| xctx_op); }
++  void dclz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclz_op); }
++  void dclo(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclo_op); }
++
++#undef INSN_S2
++
++  //SPECIAL3
++/*
++// FIXME
++#define is_0_to_32(a, b) \
++  assert (a >= 0, " just a check"); \
++  assert (a <= 0, " just a check"); \
++  assert (b >= 0, " just a check"); \
++  assert (b <= 0, " just a check"); \
++  assert (a+b >= 0, " just a check"); \
++  assert (a+b <= 0, " just a check");
++  */
++#define is_0_to_32(a, b)
++
++  void ins  (Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | ins_op); }
++  void dinsm(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos, 5) << 6) | dinsm_op); }
++  void dinsu(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos-32, 5) << 6) | dinsu_op); }
++  void dins (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
++     guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]");
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | dins_op);
++  }
++
++  void repl_qb (Register rd, int const8)  { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16)      | ((int)rd->encoding() << 11) | repl_qb_op  << 6 | re1_op); }
++  void replv_qb(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qb_op << 6 | re1_op ); }
++  void repl_ph (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_ph_op  << 6 | re1_op); }
++  void replv_ph(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ph_op << 6 | re1_op ); }
++
++  void repl_ob (Register rd, int const8)  { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16)      | ((int)rd->encoding() << 11) | repl_ob_op  << 6 | re2_op); }
++  void replv_ob(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ob_op << 6 | re2_op ); }
++  void repl_qh (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_qh_op  << 6 | re2_op); }
++  void replv_qh(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qh_op << 6 | re2_op ); }
++  void repl_pw (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_pw_op  << 6 | re2_op); }
++  void replv_pw(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_pw_op << 6 | re2_op ); }
++
++  void sdc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(sdc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void sdc1(FloatRegister ft, Address dst);
++  void swc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(swc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void swc1(FloatRegister ft, Address dst);
++
++
++  static void print_instruction(int);
++  int patched_branch(int dest_pos, int inst, int inst_pos);
++  int branch_destination(int inst, int pos);
++
++  // Loongson extension
++
++  // gssq/gslq/gssqc1/gslqc1: vAddr = sign_extend(offset << 4 ) + GPR[base]. Therefore, the off should be ">> 4".
++  void gslble(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslble_op);
++  }
++
++  void gslbgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslbgt_op);
++  }
++
++  void gslhle(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhle_op);
++  }
++
++  void gslhgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhgt_op);
++  }
++
++  void gslwle(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwle_op);
++  }
++
++  void gslwgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgt_op);
++  }
++
++  void gsldle(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldle_op);
++  }
++
++  void gsldgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgt_op);
++  }
++
++  void gslwlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwlec1_op);
++  }
++
++  void gslwgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgtc1_op);
++  }
++
++  void gsldlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldlec1_op);
++  }
++
++  void gsldgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgtc1_op);
++  }
++
++  void gslq(Register rq, Register rt, Register base, int off) {
++    assert(!(off & 0xF), "gslq: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gslq: off exceeds 9 bits");
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() );
++  }
++
++  void gslqc1(FloatRegister rq, FloatRegister rt, Register base, int off) {
++    assert(!(off & 0xF), "gslqc1: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gslqc1: off exceeds 9 bits");
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() );
++  }
++
++  void gssble(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssble_op);
++  }
++
++  void gssbgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssbgt_op);
++  }
++
++  void gsshle(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshle_op);
++  }
++
++  void gsshgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshgt_op);
++  }
++
++  void gsswle(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswle_op);
++  }
++
++  void gsswgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgt_op);
++  }
++
++  void gssdle(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdle_op);
++  }
++
++  void gssdgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgt_op);
++  }
++
++  void gsswlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswlec1_op);
++  }
++
++  void gsswgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgtc1_op);
++  }
++
++  void gssdlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdlec1_op);
++  }
++
++  void gssdgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgtc1_op);
++  }
++
++  void gssq(Register rq, Register rt, Register base, int off) {
++    assert(!(off & 0xF), "gssq: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gssq: off exceeds 9 bits");
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() );
++  }
++
++  void gssqc1(FloatRegister rq, FloatRegister rt, Register base, int off) {
++    assert(!(off & 0xF), "gssqc1: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gssqc1: off exceeds 9 bits");
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() );
++  }
++
++  //LDC2 & SDC2
++#define INSN(OPS, OP) \
++    assert(is_simm(off, 8), "NAME: off exceeds 8 bits");                                           \
++    assert(UseLEXT1, "check UseLEXT1");                                                      \
++    emit_long( (OPS << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) |         \
++               ((int)index->encoding() << 11) | (low(off, 8) << 3) | OP);
++
++#define INSN_LDC2(NAME, op)  \
++  void NAME(Register rt, Register base, Register index, int off) {                                 \
++    INSN(gs_ldc2_op, op)                                                                           \
++  }
++
++#define INSN_LDC2_F(NAME, op)  \
++  void NAME(FloatRegister rt, Register base, Register index, int off) {                            \
++    INSN(gs_ldc2_op, op)                                                                           \
++  }
++
++#define INSN_SDC2(NAME, op)  \
++  void NAME(Register rt, Register base, Register index, int off) {                                 \
++    INSN(gs_sdc2_op, op)                                                                           \
++  }
++
++#define INSN_SDC2_F(NAME, op)  \
++  void NAME(FloatRegister rt, Register base, Register index, int off) {                            \
++    INSN(gs_sdc2_op, op)                                                                           \
++  }
++
++/*
++ void gslbx(Register rt, Register base, Register index, int off) {
++    assert(is_simm(off, 8), "gslbx: off exceeds 8 bits");
++    assert(UseLEXT1, "check UseLEXT1");
++    emit_long( (gs_ldc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) |
++               ((int)index->encoding() << 11) | (low(off, 8) << 3) | gslbx_op);
++ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op);}
++
++  INSN_LDC2(gslbx, gslbx_op)
++  INSN_LDC2(gslhx, gslhx_op)
++  INSN_LDC2(gslwx, gslwx_op)
++  INSN_LDC2(gsldx, gsldx_op)
++  INSN_LDC2_F(gslwxc1, gslwxc1_op)
++  INSN_LDC2_F(gsldxc1, gsldxc1_op)
++
++  INSN_SDC2(gssbx, gssbx_op)
++  INSN_SDC2(gsshx, gsshx_op)
++  INSN_SDC2(gsswx, gsswx_op)
++  INSN_SDC2(gssdx, gssdx_op)
++  INSN_SDC2_F(gsswxc1, gsswxc1_op)
++  INSN_SDC2_F(gssdxc1, gssdxc1_op)
++*/
++  void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op) }
++  void gslhx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslhx_op) }
++  void gslwx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwx_op) }
++  void gsldx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldx_op) }
++  void gslwxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwxc1_op) }
++  void gsldxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldxc1_op) }
++
++  void gssbx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssbx_op) }
++  void gsshx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsshx_op) }
++  void gsswx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswx_op) }
++  void gssdx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdx_op) }
++  void gsswxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswxc1_op) }
++  void gssdxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdxc1_op) }
++
++#undef INSN
++#undef INSN_LDC2
++#undef INSN_LDC2_F
++#undef INSN_SDC2
++#undef INSN_SDC2_F
++
++  // cpucfg on Loongson CPUs above 3A4000
++  void cpucfg(Register rd, Register rs) { emit_long((gs_lwc2_op << 26) | ((int)rs->encoding() << 21) | (0b01000 << 16) | ((int)rd->encoding() << 11) | ( 0b00100 << 6) | 0b011000);}
++
++  enum Membar_mask_bits {
++    StoreStore = 1 << 3,
++    LoadStore  = 1 << 2,
++    StoreLoad  = 1 << 1,
++    LoadLoad   = 1 << 0
++  };
++
++  // Serializes memory and blows flags
++  void membar(Membar_mask_bits order_constraint) {
++    sync();
++  }
++
++public:
++  // Creation
++  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
++#ifdef CHECK_DELAY
++    delay_state = no_delay;
++#endif
++  }
++
++  // Decoding
++  static address locate_operand(address inst, WhichOperand which);
++  static address locate_next_instruction(address inst);
++};
++
++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/assembler_mips.inline.hpp b/src/hotspot/cpu/mips/assembler_mips.inline.hpp
+--- a/src/hotspot/cpu/mips/assembler_mips.inline.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/assembler_mips.inline.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,33 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/bytes_mips.hpp b/src/hotspot/cpu/mips/bytes_mips.hpp
+--- a/src/hotspot/cpu/mips/bytes_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/bytes_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,181 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_BYTES_MIPS_HPP
++#define CPU_MIPS_VM_BYTES_MIPS_HPP
++
++#include "memory/allocation.hpp"
++
++class Bytes: AllStatic {
++ public:
++  // Returns true if the byte ordering used by Java is different from the native byte ordering
++  // of the underlying machine. For example, this is true for Intel x86, but false for Solaris
++  // on Sparc.
++  // we use mipsel, so return true
++  static inline bool is_Java_byte_ordering_different(){ return true; }
++
++
++  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
++  // (no special code is needed since x86 CPUs can access unaligned data)
++  static inline u2   get_native_u2(address p)         {
++    if ((intptr_t)p & 0x1) {
++      return ((u2)p[1] << 8) | (u2)p[0];
++    } else {
++      return *(u2*)p;
++    }
++  }
++
++  static inline u4   get_native_u4(address p)         {
++    if ((intptr_t)p & 3) {
++      u4 res;
++      __asm__ __volatile__ (
++          " .set push\n"
++          " .set mips64\n"
++          " .set noreorder\n"
++
++          "    lwr %[res], 0(%[addr])    \n"
++          "    lwl  %[res], 3(%[addr])    \n"
++
++          " .set pop"
++          :  [res] "=&r" (res)
++          : [addr] "r" (p)
++          : "memory"
++          );
++      return res;
++    } else {
++      return *(u4*)p;
++    }
++  }
++
++  static inline u8   get_native_u8(address p)         {
++    u8 res;
++    u8 temp = 0;
++    //  u4 tp;//tmp register
++    __asm__ __volatile__ (
++        " .set push\n"
++        " .set mips64\n"
++        " .set noreorder\n"
++        " .set noat\n"
++        "    andi $1,%[addr],0x7    \n"
++        "    beqz $1,1f        \n"
++        "    nop        \n"
++        "    ldr %[temp], 0(%[addr])    \n"
++        "    ldl  %[temp], 7(%[addr])  \n"
++        "               b 2f        \n"
++        "    nop        \n"
++        "  1:\t  ld  %[temp],0(%[addr])  \n"
++        "  2:\t   sd  %[temp], %[res]    \n"
++
++        " .set at\n"
++        " .set pop\n"
++        :  [addr]"=r"(p), [temp]"=r" (temp)
++        :  "[addr]"(p), "[temp]" (temp), [res]"m" (*(volatile jint*)&res)
++        : "memory"
++        );
++
++    return res;
++  }
++
++  //use mips unaligned load instructions
++  static inline void put_native_u2(address p, u2 x)   {
++    if((intptr_t)p & 0x1) {
++      p[0] = (u_char)(x);
++      p[1] = (u_char)(x>>8);
++    } else {
++      *(u2*)p  = x;
++    }
++  }
++
++  static inline void put_native_u4(address p, u4 x)   {
++    // refer to sparc implementation.
++    // Note that sparc is big-endian, while mips is little-endian
++    switch ( intptr_t(p) & 3 ) {
++    case 0:  *(u4*)p = x;
++        break;
++
++    case 2:  ((u2*)p)[1] = x >> 16;
++       ((u2*)p)[0] = x;
++       break;
++
++    default: ((u1*)p)[3] = x >> 24;
++       ((u1*)p)[2] = x >> 16;
++       ((u1*)p)[1] = x >>  8;
++       ((u1*)p)[0] = x;
++       break;
++    }
++  }
++
++  static inline void put_native_u8(address p, u8 x)   {
++    // refer to sparc implementation.
++    // Note that sparc is big-endian, while mips is little-endian
++    switch ( intptr_t(p) & 7 ) {
++    case 0:  *(u8*)p = x;
++      break;
++
++    case 4:  ((u4*)p)[1] = x >> 32;
++      ((u4*)p)[0] = x;
++      break;
++
++    case 2:  ((u2*)p)[3] = x >> 48;
++      ((u2*)p)[2] = x >> 32;
++      ((u2*)p)[1] = x >> 16;
++      ((u2*)p)[0] = x;
++      break;
++
++    default: ((u1*)p)[7] = x >> 56;
++      ((u1*)p)[6] = x >> 48;
++      ((u1*)p)[5] = x >> 40;
++      ((u1*)p)[4] = x >> 32;
++      ((u1*)p)[3] = x >> 24;
++      ((u1*)p)[2] = x >> 16;
++      ((u1*)p)[1] = x >>  8;
++      ((u1*)p)[0] = x;
++    }
++  }
++
++
++  // Efficient reading and writing of unaligned unsigned data in Java
++  // byte ordering (i.e. big-endian ordering). Byte-order reversal is
++  // needed since MIPS64EL CPUs use little-endian format.
++  static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
++  static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
++  static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
++
++  static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, swap_u2(x)); }
++  static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, swap_u4(x)); }
++  static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, swap_u8(x)); }
++
++
++  // Efficient swapping of byte ordering
++  static inline u2   swap_u2(u2 x);                   // compiler-dependent implementation
++  static inline u4   swap_u4(u4 x);                   // compiler-dependent implementation
++  static inline u8   swap_u8(u8 x);
++};
++
++
++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
++#include OS_CPU_HEADER_INLINE(bytes)
++
++#endif // CPU_MIPS_VM_BYTES_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/c2_globals_mips.hpp b/src/hotspot/cpu/mips/c2_globals_mips.hpp
+--- a/src/hotspot/cpu/mips/c2_globals_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/c2_globals_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,95 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
++#define CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the server compiler.
++// (see c2_globals.hpp).  Alpha-sorted.
++define_pd_global(bool, BackgroundCompilation,        true);
++define_pd_global(bool, UseTLAB,                      true);
++define_pd_global(bool, ResizeTLAB,                   true);
++define_pd_global(bool, CICompileOSR,                 true);
++define_pd_global(bool, InlineIntrinsics,             true);
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 true);
++define_pd_global(bool, UseOnStackReplacement,        true);
++#ifdef CC_INTERP
++define_pd_global(bool, ProfileInterpreter,           false);
++#else
++define_pd_global(bool, ProfileInterpreter,           true);
++#endif // CC_INTERP
++// Disable C1 in server JIT
++define_pd_global(bool, TieredCompilation,            false);
++define_pd_global(intx, CompileThreshold,             10000);
++define_pd_global(intx, BackEdgeThreshold,            100000);
++
++define_pd_global(intx, OnStackReplacePercentage,     140);
++define_pd_global(intx, ConditionalMoveLimit,         3);
++define_pd_global(intx, FLOATPRESSURE,                6);
++define_pd_global(intx, FreqInlineSize,               325);
++define_pd_global(intx, MinJumpTableSize,             10);
++define_pd_global(intx, INTPRESSURE,                  13);
++define_pd_global(intx, InteriorEntryAlignment,       16);
++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
++define_pd_global(intx, LoopUnrollLimit,              60);
++define_pd_global(intx, LoopPercentProfileLimit,      10);
++// InitialCodeCacheSize derived from specjbb2000 run.
++define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
++define_pd_global(intx, CodeCacheExpansionSize,       64*K);
++
++// Ergonomics related flags
++define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
++define_pd_global(intx, RegisterCostAreaRatio,        16000);
++
++// Peephole and CISC spilling both break the graph, and so makes the
++// scheduler sick.
++define_pd_global(bool, OptoPeephole,                 false);
++define_pd_global(bool, UseCISCSpill,                 false);
++define_pd_global(bool, OptoScheduling,               false);
++define_pd_global(bool, OptoBundling,                 false);
++define_pd_global(bool, OptoRegScheduling,            false);
++define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
++define_pd_global(bool, IdealizeClearArrayNode,       true);
++
++define_pd_global(intx, ReservedCodeCacheSize,        120*M);
++define_pd_global(intx, NonProfiledCodeHeapSize,      57*M);
++define_pd_global(intx, ProfiledCodeHeapSize,         58*M);
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
++define_pd_global(uintx, CodeCacheMinBlockLength,     4);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++
++define_pd_global(bool,  TrapBasedRangeChecks,        false);
++
++// Heap related flags
++define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
++
++// Ergonomics related flags
++define_pd_global(bool, NeverActAsServerClassMachine, false);
++
++#endif // CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/c2_init_mips.cpp b/src/hotspot/cpu/mips/c2_init_mips.cpp
+--- a/src/hotspot/cpu/mips/c2_init_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/c2_init_mips.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
++
++// processor dependent initialization for mips
++
++void Compile::pd_compiler2_init() {
++  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/codeBuffer_mips.hpp b/src/hotspot/cpu/mips/codeBuffer_mips.hpp
+--- a/src/hotspot/cpu/mips/codeBuffer_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/codeBuffer_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
++#define CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
++
++private:
++  void pd_initialize() {}
++
++public:
++  void flush_bundle(bool start_new_bundle) {}
++
++#endif // CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/compiledIC_mips.cpp b/src/hotspot/cpu/mips/compiledIC_mips.cpp
+--- a/src/hotspot/cpu/mips/compiledIC_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/compiledIC_mips.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,151 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/compiledIC.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nmethod.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/safepoint.hpp"
++
++// ----------------------------------------------------------------------------
++
++#define __ _masm.
++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
++
++  if (mark == NULL) {
++    mark = cbuf.insts_mark();  // get mark within main instrs section
++  }
++
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a stub.
++  MacroAssembler _masm(&cbuf);
++
++  address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size());
++  if (base == NULL)  return NULL;  // CodeBuffer::expand failed
++  // static stub relocation stores the instruction address of the call
++
++  __ relocate(static_stub_Relocation::spec(mark), 0);
++
++  // Code stream for loading method may be changed.
++  __ synci(R0, 0);
++
++  // Rmethod contains methodOop, it should be relocated for GC
++  // static stub relocation also tags the methodOop in the code-stream.
++  __ mov_metadata(Rmethod, NULL);
++  // This is recognized as unresolved by relocs/nativeInst/ic code
++
++  __ relocate(relocInfo::runtime_call_type);
++
++  cbuf.set_insts_mark();
++  address call_pc = (address)-1;
++  __ patchable_jump(call_pc);
++  __ align(16);
++  // Update current stubs pointer and restore code_end.
++  __ end_a_stub();
++  return base;
++}
++#undef __
++
++int CompiledStaticCall::to_interp_stub_size() {
++  int size = NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeCall::instruction_size;
++  return round_to(size, 16);
++}
++
++int CompiledStaticCall::to_trampoline_stub_size() {
++  return  NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size;
++}
++
++// Relocation entries for call stub, compiled java to interpreter.
++int CompiledStaticCall::reloc_to_interp_stub() {
++  return 16;
++}
++
++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
++  address stub = find_stub(false /* is_aot */);
++  guarantee(stub != NULL, "stub not found");
++
++  if (TraceICs) {
++    ResourceMark rm;
++    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
++                  p2i(instruction_address()),
++                  callee->name_and_sig_as_C_string());
++  }
++
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump*        jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++
++  assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(),
++         "a) MT-unsafe modification of inline cache");
++  assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry,
++         "b) MT-unsafe modification of inline cache");
++
++  // Update stub.
++  method_holder->set_data((intptr_t)callee());
++  jump->set_jump_destination(entry);
++
++  // Update jump to call.
++  set_destination_mt_safe(stub);
++}
++
++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
++  // Reset stub.
++  address stub = static_stub->addr();
++  assert(stub != NULL, "stub not found");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump*        jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++  method_holder->set_data(0);
++  jump->set_jump_destination((address)-1);
++}
++
++//-----------------------------------------------------------------------------
++// Non-product mode code
++#ifndef PRODUCT
++
++void CompiledDirectStaticCall::verify() {
++  // Verify call.
++  _call->verify();
++  if (os::is_MP()) {
++    _call->verify_alignment();
++  }
++
++  // Verify stub.
++  address stub = find_stub(false /* is_aot */);
++  assert(stub != NULL, "no stub found for static call");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++
++
++  // Verify state.
++  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++}
++
++#endif // !PRODUCT
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/copy_mips.hpp b/src/hotspot/cpu/mips/copy_mips.hpp
+--- a/src/hotspot/cpu/mips/copy_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/copy_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_COPY_MIPS_HPP
++#define CPU_MIPS_VM_COPY_MIPS_HPP
++
++// Inline functions for memory copy and fill.
++
++// Contains inline asm implementations
++#include OS_CPU_HEADER_INLINE(copy)
++
++// Template for atomic, element-wise copy.
++template <class T>
++static void copy_conjoint_atomic(const T* from, T* to, size_t count) {
++  if (from > to) {
++    while (count-- > 0) {
++      // Copy forwards
++      *to++ = *from++;
++    }
++  } else {
++    from += count - 1;
++    to   += count - 1;
++    while (count-- > 0) {
++      // Copy backwards
++      *to-- = *from--;
++    }
++  }
++}
++
++
++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
++  julong* to = (julong*) tohw;
++  julong  v  = ((julong) value << 32) | value;
++  while (count-- > 0) {
++    *to++ = v;
++  }
++}
++
++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
++  pd_fill_to_words(tohw, count, value);
++}
++
++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
++  (void)memset(to, value, count);
++}
++
++static void pd_zero_to_words(HeapWord* tohw, size_t count) {
++  pd_fill_to_words(tohw, count, 0);
++}
++
++static void pd_zero_to_bytes(void* to, size_t count) {
++  (void)memset(to, 0, count);
++}
++
++#endif //CPU_MIPS_VM_COPY_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/depChecker_mips.cpp b/src/hotspot/cpu/mips/depChecker_mips.cpp
+--- a/src/hotspot/cpu/mips/depChecker_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/depChecker_mips.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "compiler/disassembler.hpp"
++#include "depChecker_mips.hpp"
++
++// Nothing to do on mips
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/depChecker_mips.hpp b/src/hotspot/cpu/mips/depChecker_mips.hpp
+--- a/src/hotspot/cpu/mips/depChecker_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/depChecker_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
++#define CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
++
++// Nothing to do on MIPS
++
++#endif // CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/disassembler_mips.hpp b/src/hotspot/cpu/mips/disassembler_mips.hpp
+--- a/src/hotspot/cpu/mips/disassembler_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/disassembler_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
++
++  static int pd_instruction_alignment() {
++    return sizeof(int);
++  }
++
++  static const char* pd_cpu_opts() {
++    return "gpr-names=64";
++  }
++
++#endif // CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/frame_mips.cpp b/src/hotspot/cpu/mips/frame_mips.cpp
+--- a/src/hotspot/cpu/mips/frame_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/frame_mips.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,690 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/markOop.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/monitorChunk.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_mips.inline.hpp"
++
++#ifdef ASSERT
++void RegisterMap::check_location_valid() {
++}
++#endif
++
++
++// Profiling/safepoint support
++// for Profiling - acting on another frame. walks sender frames
++// if valid.
++// frame profile_find_Java_sender_frame(JavaThread *thread);
++
++bool frame::safe_for_sender(JavaThread *thread) {
++  address   sp = (address)_sp;
++  address   fp = (address)_fp;
++  address   unextended_sp = (address)_unextended_sp;
++
++  // consider stack guards when trying to determine "safe" stack pointers
++  static size_t stack_guard_size = os::uses_stack_guard_pages() ?
++    JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size() : 0;
++  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
++
++  // sp must be within the usable part of the stack (not in guards)
++  bool sp_safe = (sp < thread->stack_base()) &&
++                 (sp >= thread->stack_base() - usable_stack_size);
++
++
++  if (!sp_safe) {
++    return false;
++  }
++
++  // unextended sp must be within the stack and above or equal sp
++  bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
++                            (unextended_sp >= sp);
++
++  if (!unextended_sp_safe) {
++    return false;
++  }
++
++  // an fp must be within the stack and above (but not equal) sp
++  // second evaluation on fp+ is added to handle situation where fp is -1
++  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
++
++  // We know sp/unextended_sp are safe only fp is questionable here
++
++  // If the current frame is known to the code cache then we can attempt to
++  // construct the sender and do some validation of it. This goes a long way
++  // toward eliminating issues when we get in frame construction code
++
++  if (_cb != NULL ) {
++
++    // First check if frame is complete and tester is reliable
++    // Unfortunately we can only check frame complete for runtime stubs and nmethod
++    // other generic buffer blobs are more problematic so we just assume they are
++    // ok. adapter blobs never have a frame complete and are never ok.
++
++    if (!_cb->is_frame_complete_at(_pc)) {
++      if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
++        return false;
++      }
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!_cb->code_contains(_pc)) {
++      return false;
++    }
++
++    // Entry frame checks
++    if (is_entry_frame()) {
++      // an entry frame must have a valid fp.
++      return fp_safe && is_entry_frame_valid(thread);
++    }
++
++    intptr_t* sender_sp = NULL;
++    intptr_t* sender_unextended_sp = NULL;
++    address   sender_pc = NULL;
++    intptr_t* saved_fp =  NULL;
++
++    if (is_interpreted_frame()) {
++      // fp must be safe
++      if (!fp_safe) {
++        return false;
++      }
++
++      sender_pc = (address) this->fp()[return_addr_offset];
++      // for interpreted frames, the value below is the sender "raw" sp,
++      // which can be different from the sender unextended sp (the sp seen
++      // by the sender) because of current frame local variables
++      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
++      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
++      saved_fp = (intptr_t*) this->fp()[link_offset];
++
++    } else {
++      // must be some sort of compiled/runtime frame
++      // fp does not have to be safe (although it could be check for c1?)
++
++      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
++      if (_cb->frame_size() <= 0) {
++        return false;
++      }
++
++      sender_sp = _unextended_sp + _cb->frame_size();
++      // Is sender_sp safe?
++      if ((address)sender_sp >= thread->stack_base()) {
++        return false;
++      }
++      sender_unextended_sp = sender_sp;
++      // On MIPS the return_address is always the word on the stack
++      sender_pc = (address) *(sender_sp-1);
++      // Note: frame::sender_sp_offset is only valid for compiled frame
++      saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset);
++    }
++
++
++    // If the potential sender is the interpreter then we can do some more checking
++    if (Interpreter::contains(sender_pc)) {
++
++      // FP is always saved in a recognizable place in any code we generate. However
++      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP
++      // is really a frame pointer.
++
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      return sender.is_interpreted_frame_valid(thread);
++
++    }
++
++    // We must always be able to find a recognizable pc
++    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
++    if (sender_pc == NULL ||  sender_blob == NULL) {
++      return false;
++    }
++
++    // Could be a zombie method
++    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
++      return false;
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!sender_blob->code_contains(sender_pc)) {
++      return false;
++    }
++
++    // We should never be able to see an adapter if the current frame is something from code cache
++    if (sender_blob->is_adapter_blob()) {
++      return false;
++    }
++
++    // Could be the call_stub
++    if (StubRoutines::returns_to_call_stub(sender_pc)) {
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      // Validate the JavaCallWrapper an entry frame must have
++      address jcw = (address)sender.entry_frame_call_wrapper();
++
++      bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp());
++
++      return jcw_safe;
++    }
++
++    CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
++    if (nm != NULL) {
++      if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
++        nm->method()->is_method_handle_intrinsic()) {
++        return false;
++      }
++    }
++
++    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
++    // because the return address counts against the callee's frame.
++
++    if (sender_blob->frame_size() <= 0) {
++      assert(!sender_blob->is_compiled(), "should count return address at least");
++      return false;
++    }
++
++    // We should never be able to see anything here except an nmethod. If something in the
++    // code cache (current frame) is called by an entity within the code cache that entity
++    // should not be anything but the call stub (already covered), the interpreter (already covered)
++    // or an nmethod.
++
++    if (!sender_blob->is_compiled()) {
++        return false;
++    }
++
++    // Could put some more validation for the potential non-interpreted sender
++    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
++
++    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
++
++    // We've validated the potential sender that would be created
++    return true;
++  }
++
++  // Must be native-compiled frame. Since sender will try and use fp to find
++  // linkages it must be safe
++
++  if (!fp_safe) {
++    return false;
++  }
++
++  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
++
++  if ( (address) this->fp()[return_addr_offset] == NULL) return false;
++
++
++  // could try and do some more potential verification of native frame if we could think of some...
++
++  return true;
++
++}
++
++void frame::patch_pc(Thread* thread, address pc) {
++  address* pc_addr = &(((address*) sp())[-1]);
++  if (TracePcPatching) {
++    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
++                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
++  }
++  // Either the return address is the original one or we are going to
++  // patch in the same address that's already there.
++  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
++  *pc_addr = pc;
++  _cb = CodeCache::find_blob(pc);
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    assert(original_pc == _pc, "expected original PC to be stored before patching");
++    _deopt_state = is_deoptimized;
++    // leave _pc as is
++  } else {
++    _deopt_state = not_deoptimized;
++    _pc = pc;
++  }
++}
++
++bool frame::is_interpreted_frame() const  {
++  return Interpreter::contains(pc());
++}
++
++int frame::frame_size(RegisterMap* map) const {
++  frame sender = this->sender(map);
++  return sender.sp() - sp();
++}
++
++intptr_t* frame::entry_frame_argument_at(int offset) const {
++  // convert offset to index to deal with tsi
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  // Entry frame's arguments are always in relation to unextended_sp()
++  return &unextended_sp()[index];
++}
++
++// sender_sp
++#ifdef CC_INTERP
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  // QQQ why does this specialize method exist if frame::sender_sp() does same thing?
++  // seems odd and if we always know interpreted vs. non then sender_sp() is really
++  // doing too much work.
++  return get_interpreterState()->sender_sp();
++}
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return get_interpreterState()->monitor_base();
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  return (BasicObjectLock*) get_interpreterState()->stack_base();
++}
++
++#else // CC_INTERP
++
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
++}
++
++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
++}
++
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
++  // make sure the pointer points inside the frame
++  assert((intptr_t) fp() >  (intptr_t) result, "result must <  than frame pointer");
++  assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer");
++  return result;
++}
++
++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
++  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
++}
++
++// Used by template based interpreter deoptimization
++void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
++  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
++}
++#endif // CC_INTERP
++
++frame frame::sender_for_entry_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++  // Java frame called from C; skip all C frames and return top C
++  // frame of that chunk as the sender
++  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
++  assert(!entry_frame_is_first(), "next Java fp must be non zero");
++  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
++  map->clear();
++  assert(map->include_argument_oops(), "should be set by clear");
++  if (jfa->last_Java_pc() != NULL ) {
++    frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
++    return fr;
++  }
++  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
++  return fr;
++}
++
++frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
++  // sp is the raw sp from the sender after adapter or interpreter extension
++  intptr_t* sender_sp = this->sender_sp();
++
++  // This is the sp before any possible extension (adapter/locals).
++  intptr_t* unextended_sp = interpreter_frame_sender_sp();
++
++  // The interpreter and compiler(s) always save FP in a known
++  // location on entry. We must record where that location is
++  // so this if FP was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves FP if we record where it is then
++  // we don't have to always save FP on entry and exit to c2 compiled
++  // code, on entry will be enough.
++#ifdef COMPILER2
++  if (map->update_map()) {
++    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
++  }
++#endif /* COMPILER2 */
++  return frame(sender_sp, unextended_sp, link(), sender_pc());
++}
++
++
++//------------------------------------------------------------------------------
++// frame::verify_deopt_original_pc
++//
++// Verifies the calculated original PC of a deoptimization PC for the
++// given unextended SP.  The unextended SP might also be the saved SP
++// for MethodHandle call sites.
++#ifdef ASSERT
++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
++  frame fr;
++
++  // This is ugly but it's better than to change {get,set}_original_pc
++  // to take an SP value as argument.  And it's only a debugging
++  // method anyway.
++  fr._unextended_sp = unextended_sp;
++
++  address original_pc = nm->get_original_pc(&fr);
++  assert(nm->insts_contains(original_pc),
++         "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
++}
++#endif
++
++
++//------------------------------------------------------------------------------
++// frame::adjust_unextended_sp
++void frame::adjust_unextended_sp() {
++  // On MIPS, sites calling method handle intrinsics and lambda forms are treated
++  // as any other call site. Therefore, no special action is needed when we are
++  // returning to any of these call sites.
++
++  if (_cb != NULL) {
++    CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
++    if (sender_cm != NULL) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (sender_cm->is_deopt_entry(_pc) ||
++          sender_cm->is_deopt_mh_entry(_pc)) {
++        DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
++      }
++    }
++  }
++}
++
++//------------------------------------------------------------------------------
++// frame::update_map_with_saved_link
++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
++  // The interpreter and compiler(s) always save fp in a known
++  // location on entry. We must record where that location is
++  // so that if fp was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves fp if we record where it is then
++  // we don't have to always save fp on entry and exit to c2 compiled
++  // code, on entry will be enough.
++  map->set_location(FP->as_VMReg(), (address) link_addr);
++  // this is weird "H" ought to be at a higher address however the
++  // oopMaps seems to have the "H" regs at the same address and the
++  // vanilla register.
++  // XXXX make this go away
++  if (true) {
++    map->set_location(FP->as_VMReg()->next(), (address) link_addr);
++  }
++}
++
++//------------------------------sender_for_compiled_frame-----------------------
++frame frame::sender_for_compiled_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++
++  // frame owned by optimizing compiler
++  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
++
++  intptr_t* sender_sp = unextended_sp() + _cb->frame_size();
++  intptr_t* unextended_sp = sender_sp;
++
++  // On Loongson the return_address is always the word on the stack
++  // the fp in compiler points to sender fp, but in interpreter, fp points to return address,
++  // so getting sender for compiled frame is not same as interpreter frame.
++  // we hard code here temporarily
++  // spark
++  address sender_pc = (address) *(sender_sp-1);
++
++  intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);
++
++  if (map->update_map()) {
++    // Tell GC to use argument oopmaps for some runtime stubs that need it.
++    // For C1, the runtime stub might not have oop maps, so set this flag
++    // outside of update_register_map.
++    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
++    if (_cb->oop_maps() != NULL) {
++      OopMapSet::update_register_map(this, map);
++    }
++
++    // Since the prolog does the save and restore of epb there is no oopmap
++    // for it so we must fill in its location as if there was an oopmap entry
++    // since if our caller was compiled code there could be live jvm state in it.
++    update_map_with_saved_link(map, saved_fp_addr);
++  }
++  assert(sender_sp != sp(), "must have changed");
++  return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
++}
++
++frame frame::sender(RegisterMap* map) const {
++  // Default is we done have to follow them. The sender_for_xxx will
++  // update it accordingly
++  map->set_include_argument_oops(false);
++
++  if (is_entry_frame())       return sender_for_entry_frame(map);
++  if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
++  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
++
++  if (_cb != NULL) {
++    return sender_for_compiled_frame(map);
++  }
++  // Must be native-compiled frame, i.e. the marshaling code for native
++  // methods that exists in the core system.
++  return frame(sender_sp(), link(), sender_pc());
++}
++
++bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
++// QQQ
++#ifdef CC_INTERP
++#else
++  assert(is_interpreted_frame(), "Not an interpreted frame");
++  // These are reasonable sanity checks
++  if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
++    return false;
++  }
++  // These are hacks to keep us out of trouble.
++  // The problem with these is that they mask other problems
++  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
++    return false;
++  }
++
++  // do some validation of frame elements
++
++  // first the method
++
++  Method* m = *interpreter_frame_method_addr();
++
++  // validate the method we'd find in this potential sender
++  if (!Method::is_valid_method(m)) return false;
++
++  // stack frames shouldn't be much larger than max_stack elements
++
++  //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) {
++  if (fp() - sp() > 4096) {  // stack frames shouldn't be large.
++    return false;
++  }
++
++  // validate bci/bcp
++
++  address bcp    = interpreter_frame_bcp();
++  if (m->validate_bci_from_bcp(bcp) < 0) {
++    return false;
++  }
++
++  // validate ConstantPoolCache*
++
++  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
++
++  if (MetaspaceObj::is_valid(cp) == false) return false;
++
++  // validate locals
++
++  address locals =  (address) *interpreter_frame_locals_addr();
++
++  if (locals > thread->stack_base() || locals < (address) fp()) return false;
++
++  // We'd have to be pretty unlucky to be mislead at this point
++
++#endif // CC_INTERP
++  return true;
++}
++
++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
++#ifdef CC_INTERP
++  // Needed for JVMTI. The result should always be in the interpreterState object
++  assert(false, "NYI");
++  interpreterState istate = get_interpreterState();
++#endif // CC_INTERP
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  Method* method = interpreter_frame_method();
++  BasicType type = method->result_type();
++
++  intptr_t* tos_addr;
++  if (method->is_native()) {
++    // Prior to calling into the runtime to report the method_exit the possible
++    // return value is pushed to the native stack. If the result is a jfloat/jdouble
++    // then ST0 is saved. See the note in generate_native_result
++    tos_addr = (intptr_t*)sp();
++    if (type == T_FLOAT || type == T_DOUBLE) {
++      tos_addr += 2;
++    }
++  } else {
++    tos_addr = (intptr_t*)interpreter_frame_tos_address();
++  }
++
++  switch (type) {
++    case T_OBJECT  :
++    case T_ARRAY   : {
++      oop obj;
++      if (method->is_native()) {
++#ifdef CC_INTERP
++        obj = istate->_oop_temp;
++#else
++        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
++#endif // CC_INTERP
++      } else {
++        oop* obj_p = (oop*)tos_addr;
++        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
++      }
++      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
++      *oop_result = obj;
++      break;
++    }
++    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
++    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
++    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
++    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
++    case T_INT     : value_result->i = *(jint*)tos_addr; break;
++    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
++    case T_FLOAT   : value_result->f = *(jfloat*)tos_addr; break;
++    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
++    case T_VOID    : /* Nothing to do */ break;
++    default        : ShouldNotReachHere();
++  }
++
++  return type;
++}
++
++
++intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  return &interpreter_frame_tos_address()[index];
++}
++
++#ifndef PRODUCT
++
++#define DESCRIBE_FP_OFFSET(name) \
++  values.describe(frame_no, fp() + frame::name##_offset, #name)
++
++void frame::describe_pd(FrameValues& values, int frame_no) {
++  if (is_interpreted_frame()) {
++    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_method);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
++    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
++    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
++  }
++}
++#endif
++
++intptr_t *frame::initial_deoptimization_info() {
++  // used to reset the saved FP
++  return fp();
++}
++
++intptr_t* frame::real_fp() const {
++  if (_cb != NULL) {
++    // use the frame size if valid
++    int size = _cb->frame_size();
++    if (size > 0) {
++      return unextended_sp() + size;
++    }
++  }
++  // else rely on fp()
++  assert(! is_compiled_frame(), "unknown compiled frame size");
++  return fp();
++}
++
++#ifndef PRODUCT
++// This is a generic constructor which is only used by pns() in debug.cpp.
++frame::frame(void* sp, void* fp, void* pc) {
++  init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
++}
++
++void frame::pd_ps() {}
++#endif
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/frame_mips.hpp b/src/hotspot/cpu/mips/frame_mips.hpp
+--- a/src/hotspot/cpu/mips/frame_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/frame_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,215 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_FRAME_MIPS_HPP
++#define CPU_MIPS_VM_FRAME_MIPS_HPP
++
++#include "runtime/synchronizer.hpp"
++
++// A frame represents a physical stack frame (an activation).  Frames can be
++// C or Java frames, and the Java frames can be interpreted or compiled.
++// In contrast, vframes represent source-level activations, so that one physical frame
++// can correspond to multiple source level frames because of inlining.
++// A frame is comprised of {pc, fp, sp}
++// ------------------------------ Asm interpreter ----------------------------------------
++// Layout of asm interpreter frame:
++//    [expression stack      ] * <- sp
++//    [monitors              ]   \
++//     ...                        | monitor block size
++//    [monitors              ]   /
++//    [monitor block size    ]
++//    [byte code index/pointr]                   = bcx()                bcx_offset
++//    [pointer to locals     ]                   = locals()             locals_offset
++//    [constant pool cache   ]                   = cache()              cache_offset
++//    [methodData            ]                   = mdp()                mdx_offset
++//    [methodOop             ]                   = method()             method_offset
++//    [last sp               ]                   = last_sp()            last_sp_offset
++//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
++//    [old frame pointer     ]   <- fp           = link()
++//    [return pc             ]
++//    [oop temp              ]                     (only for native calls)
++//    [locals and parameters ]
++//                               <- sender sp
++// ------------------------------ Asm interpreter ----------------------------------------
++
++// ------------------------------ C++ interpreter ----------------------------------------
++//
++// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run)
++//
++//                             <- SP (current sp)
++//    [local variables         ] BytecodeInterpreter::run local variables
++//    ...                        BytecodeInterpreter::run local variables
++//    [local variables         ] BytecodeInterpreter::run local variables
++//    [old frame pointer       ]   fp [ BytecodeInterpreter::run's fp ]
++//    [return pc               ]  (return to frame manager)
++//    [interpreter_state*      ]  (arg to BytecodeInterpreter::run)   --------------
++//    [expression stack        ] <- last_Java_sp                           |
++//    [...                     ] * <- interpreter_state.stack              |
++//    [expression stack        ] * <- interpreter_state.stack_base         |
++//    [monitors                ]   \                                       |
++//     ...                          | monitor block size                   |
++//    [monitors                ]   / <- interpreter_state.monitor_base     |
++//    [struct interpretState   ] <-----------------------------------------|
++//    [return pc               ] (return to callee of frame manager [1]
++//    [locals and parameters   ]
++//                               <- sender sp
++
++// [1] When the c++ interpreter calls a new method it returns to the frame
++//     manager which allocates a new frame on the stack. In that case there
++//     is no real callee of this newly allocated frame. The frame manager is
++//     aware of the  additional frame(s) and will pop them as nested calls
++//     complete. Howevers tTo make it look good in the debugger the frame
++//     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
++//     with a fake interpreter_state* parameter to make it easy to debug
++//     nested calls.
++
++// Note that contrary to the layout for the assembly interpreter the
++// expression stack allocated for the C++ interpreter is full sized.
++// However this is not as bad as it seems as the interpreter frame_manager
++// will truncate the unused space on succesive method calls.
++//
++// ------------------------------ C++ interpreter ----------------------------------------
++
++// Layout of interpreter frame:
++//
++//    [ monitor entry            ] <--- sp
++//      ...
++//    [ monitor entry            ]
++// -9 [ monitor block top        ] ( the top monitor entry )
++// -8 [ byte code pointer        ] (if native, bcp = 0)
++// -7 [ constant pool cache      ]
++// -6 [ methodData               ] mdx_offset(not core only)
++// -5 [ mirror                   ]
++// -4 [ methodOop                ]
++// -3 [ locals offset            ]
++// -2 [ last_sp                  ]
++// -1 [ sender's sp              ]
++//  0 [ sender's fp              ] <--- fp
++//  1 [ return address           ]
++//  2 [ oop temp offset          ] (only for native calls)
++//  3 [ result handler offset    ] (only for native calls)
++//  4 [ result type info         ] (only for native calls)
++//    [ local var m-1            ]
++//      ...
++//    [ local var 0              ]
++//    [ argumnet word n-1        ] <--- ( sender's sp )
++//        ...
++//    [ argument word 0          ] <--- S7
++
++ public:
++  enum {
++    pc_return_offset                                 =  0,
++    // All frames
++    link_offset                                      =  0,
++    return_addr_offset                               =  1,
++    // non-interpreter frames
++    sender_sp_offset                                 =  2,
++
++    // Interpreter frames
++    interpreter_frame_return_addr_offset             =  1,
++    interpreter_frame_result_handler_offset          =  3, // for native calls only
++    interpreter_frame_oop_temp_offset                =  2, // for native calls only
++
++    interpreter_frame_sender_fp_offset               =  0,
++    interpreter_frame_sender_sp_offset               = -1,
++    // outgoing sp before a call to an invoked method
++    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
++    interpreter_frame_locals_offset                  = interpreter_frame_last_sp_offset - 1,
++    interpreter_frame_method_offset                  = interpreter_frame_locals_offset - 1,
++    interpreter_frame_mirror_offset                  = interpreter_frame_method_offset - 1,
++    interpreter_frame_mdp_offset                     = interpreter_frame_mirror_offset - 1,
++    interpreter_frame_cache_offset                   = interpreter_frame_mdp_offset - 1,
++    interpreter_frame_bcp_offset                     = interpreter_frame_cache_offset - 1,
++    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
++
++    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
++    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
++
++    // Entry frames
++    entry_frame_call_wrapper_offset                  = -9,
++
++    // Native frames
++
++    native_frame_initial_param_offset                =  2
++
++  };
++
++  intptr_t ptr_at(int offset) const {
++    return *ptr_at_addr(offset);
++  }
++
++  void ptr_at_put(int offset, intptr_t value) {
++    *ptr_at_addr(offset) = value;
++  }
++
++ private:
++  // an additional field beyond _sp and _pc:
++  intptr_t*   _fp; // frame pointer
++  // The interpreter and adapters will extend the frame of the caller.
++  // Since oopMaps are based on the sp of the caller before extension
++  // we need to know that value. However in order to compute the address
++  // of the return address we need the real "raw" sp. Since sparc already
++  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
++  // original sp we use that convention.
++
++  intptr_t*     _unextended_sp;
++  void adjust_unextended_sp();
++
++  intptr_t* ptr_at_addr(int offset) const {
++    return (intptr_t*) addr_at(offset);
++  }
++#ifdef ASSERT
++  // Used in frame::sender_for_{interpreter,compiled}_frame
++  static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp);
++#endif
++
++ public:
++  // Constructors
++
++  frame(intptr_t* sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* fp);
++
++  void init(intptr_t* sp, intptr_t* fp, address pc);
++
++  // accessors for the instance variables
++  intptr_t*   fp() const { return _fp; }
++
++  inline address* sender_pc_addr() const;
++
++  // expression stack tos if we are nested in a java call
++  intptr_t* interpreter_frame_last_sp() const;
++
++  // helper to update a map with callee-saved FP
++  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
++
++  // deoptimization support
++  void interpreter_frame_set_last_sp(intptr_t* sp);
++
++  static jint interpreter_frame_expression_stack_direction() { return -1; }
++
++#endif // CPU_MIPS_VM_FRAME_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/frame_mips.inline.hpp b/src/hotspot/cpu/mips/frame_mips.inline.hpp
+--- a/src/hotspot/cpu/mips/frame_mips.inline.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/frame_mips.inline.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,238 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
++
++#include "code/codeCache.hpp"
++#include "code/vmreg.inline.hpp"
++
++// Inline functions for Loongson frames:
++
++// Constructors:
++
++inline frame::frame() {
++  _pc = NULL;
++  _sp = NULL;
++  _unextended_sp = NULL;
++  _fp = NULL;
++  _cb = NULL;
++  _deopt_state = unknown;
++}
++
++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
++  init(sp, fp, pc);
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = unextended_sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = (address)(sp[-1]);
++
++  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
++  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
++  // unlucky the junk value could be to a zombied method and we'll die on the
++  // find_blob call. This is also why we can have no asserts on the validity
++  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
++  // -> pd_last_frame should use a specialized version of pd_last_frame which could
++  // call a specilaized frame constructor instead of this one.
++  // Then we could use the assert below. However this assert is of somewhat dubious
++  // value.
++  // assert(_pc != NULL, "no pc?");
++
++  _cb = CodeCache::find_blob(_pc);
++  adjust_unextended_sp();
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++// Accessors
++
++inline bool frame::equal(frame other) const {
++  bool ret =  sp() == other.sp()
++              && unextended_sp() == other.unextended_sp()
++              && fp() == other.fp()
++              && pc() == other.pc();
++  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
++  return ret;
++}
++
++// Return unique id for this frame. The id must have a value where we can distinguish
++// identity and younger/older relationship. NULL represents an invalid (incomparable)
++// frame.
++inline intptr_t* frame::id(void) const { return unextended_sp(); }
++
++// Relationals on frames based
++// Return true if the frame is younger (more recent activation) than the frame represented by id
++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() < id ; }
++
++// Return true if the frame is older (less recent activation) than the frame represented by id
++inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() > id ; }
++
++inline intptr_t* frame::link() const {
++  return (intptr_t*) *(intptr_t **)addr_at(link_offset);
++}
++
++inline intptr_t* frame::link_or_null() const {
++  intptr_t** ptr = (intptr_t **)addr_at(link_offset);
++  return os::is_readable_pointer(ptr) ? *ptr : NULL;
++}
++
++inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
++
++// Return address:
++
++inline address* frame::sender_pc_addr()      const { return (address*) addr_at( return_addr_offset); }
++inline address  frame::sender_pc()           const { return *sender_pc_addr(); }
++
++inline intptr_t*    frame::sender_sp()        const { return            addr_at(   sender_sp_offset); }
++
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_last_sp() const {
++  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_bcp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
++}
++
++
++inline intptr_t* frame::interpreter_frame_mdp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
++}
++
++
++
++// Constant pool cache
++
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
++}
++
++// Method
++
++inline Method** frame::interpreter_frame_method_addr() const {
++  return (Method**)addr_at(interpreter_frame_method_offset);
++}
++
++// Mirror
++
++inline oop* frame::interpreter_frame_mirror_addr() const {
++  return (oop*)addr_at(interpreter_frame_mirror_offset);
++}
++
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++  intptr_t* last_sp = interpreter_frame_last_sp();
++  if (last_sp == NULL ) {
++    return sp();
++  } else {
++    // sp() may have been extended by an adapter
++    assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos");
++    return last_sp;
++  }
++}
++
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
++}
++
++inline int frame::interpreter_frame_monitor_size() {
++  return BasicObjectLock::size();
++}
++
++
++// expression stack
++// (the max_stack arguments are used by the GC; see class FrameClosure)
++
++inline intptr_t* frame::interpreter_frame_expression_stack() const {
++  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
++  return monitor_end-1;
++}
++
++// Entry frames
++
++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
++  return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
++}
++
++// Compiled frames
++
++inline oop frame::saved_oop_result(RegisterMap* map) const       {
++  return *((oop*) map->location(V0->as_VMReg()));
++}
++
++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
++  *((oop*) map->location(V0->as_VMReg())) = obj;
++}
++
++#endif // CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp
+--- a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,364 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/g1/g1BarrierSet.hpp"
++#include "gc/g1/g1BarrierSetAssembler.hpp"
++#include "gc/g1/g1BarrierSetRuntime.hpp"
++#include "gc/g1/g1CardTable.hpp"
++#include "gc/g1/g1ThreadLocalData.hpp"
++#include "gc/g1/heapRegion.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "utilities/macros.hpp"
++
++#define __ masm->
++
++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                            Register addr, Register count) {
++  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
++
++  if (!dest_uninitialized) {
++#ifndef OPT_THREAD
++    Register thread = T9;
++    __ get_thread(thread);
++#else
++    Register thread = TREG;
++#endif
++
++    Label filtered;
++    Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++    // Is marking active?
++    if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++      __ lw(AT, in_progress);
++    } else {
++      assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++      __ lb(AT, in_progress);
++    }
++
++    __ beq(AT, R0, filtered);
++    __ delayed()->nop();
++
++    __ pushad();                      // push registers
++    if (count == A0) {
++      if (addr == A1) {
++        __ move(AT, A0);
++        __ move(A0, A1);
++        __ move(A1, AT);
++      } else {
++        __ move(A1, count);
++        __ move(A0, addr);
++      }
++    } else {
++      __ move(A0, addr);
++      __ move(A1, count);
++    }
++    if (UseCompressedOops) {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
++    } else {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
++    }
++    __ popad();
++
++    __ bind(filtered);
++  }
++}
++
++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                             Register addr, Register count, Register tmp) {
++  __ pushad();             // push registers (overkill)
++  if (count == A0) {
++    assert_different_registers(A1, addr);
++    __ move(A1, count);
++    __ move(A0, addr);
++  } else {
++    assert_different_registers(A0, count);
++    __ move(A0, addr);
++    __ move(A1, count);
++  }
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
++  __ popad();
++}
++
++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
++  bool on_oop = type == T_OBJECT || type == T_ARRAY;
++  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
++  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
++  bool on_reference = on_weak || on_phantom;
++  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++  if (on_oop && on_reference) {
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    // Generate the G1 pre-barrier code to log the value of
++    // the referent field in an SATB buffer.
++    g1_write_barrier_pre(masm /* masm */,
++                         noreg /* obj */,
++                         dst /* pre_val */,
++                         thread /* thread */,
++                         tmp1 /* tmp */,
++                         true /* tosca_live */,
++                         true /* expand_call */);
++  }
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
++                                                 Register obj,
++                                                 Register pre_val,
++                                                 Register thread,
++                                                 Register tmp,
++                                                 bool tosca_live,
++                                                 bool expand_call) {
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++
++  assert(thread == TREG, "must be");
++
++  Label done;
++  Label runtime;
++
++  assert(pre_val != noreg, "check this code");
++
++  if (obj != noreg) {
++    assert_different_registers(obj, pre_val, tmp);
++    assert(pre_val != V0, "check this code");
++  }
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  // Is marking active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++    __ lw(AT, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ lb(AT, in_progress);
++  }
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    __ load_heap_oop(pre_val, Address(obj, 0));
++  }
++
++  // Is the previous value null?
++  __ beq(pre_val, R0, done);
++  __ delayed()->nop();
++
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
++
++  __ ld(tmp, index);
++  __ beq(tmp, R0, runtime);
++  __ delayed()->nop();
++
++  __ daddiu(tmp, tmp, -1 * wordSize);
++  __ sd(tmp, index);
++  __ ld(AT, buffer);
++  __ daddu(tmp, tmp, AT);
++
++  // Record the previous value
++  __ sd(pre_val, tmp, 0);
++  __ beq(R0, R0, done);
++  __ delayed()->nop();
++
++  __ bind(runtime);
++  // save the live input values
++  if (tosca_live) __ push(V0);
++
++  if (obj != noreg && obj != V0) __ push(obj);
++
++  if (pre_val != V0) __ push(pre_val);
++
++  // Calling the runtime using the regular call_VM_leaf mechanism generates
++  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
++  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
++  //
++  // If we care generating the pre-barrier without a frame (e.g. in the
++  // intrinsified Reference.get() routine) then ebp might be pointing to
++  // the caller frame and so this check will most likely fail at runtime.
++  //
++  // Expanding the call directly bypasses the generation of the check.
++  // So when we do not have have a full interpreter frame on the stack
++  // expand_call should be passed true.
++
++  if (expand_call) {
++    assert(pre_val != A1, "smashed arg");
++    if (thread != A1) __ move(A1, thread);
++    if (pre_val != A0) __ move(A0, pre_val);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  }
++
++  // save the live input values
++  if (pre_val != V0)
++    __ pop(pre_val);
++
++  if (obj != noreg && obj != V0)
++    __ pop(obj);
++
++  if (tosca_live) __ pop(V0);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
++                                                  Register store_addr,
++                                                  Register new_val,
++                                                  Register thread,
++                                                  Register tmp,
++                                                  Register tmp2) {
++  assert_different_registers(tmp, tmp2, AT);
++  assert(thread == TREG, "must be");
++
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
++  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  Label done;
++  Label runtime;
++
++  // Does store cross heap regions?
++  __ xorr(AT, store_addr, new_val);
++  __ dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  // crosses regions, storing NULL?
++  __ beq(new_val, R0, done);
++  __ delayed()->nop();
++
++  // storing region crossing non-NULL, is card already dirty?
++  const Register card_addr = tmp;
++  const Register cardtable = tmp2;
++
++  __ move(card_addr, store_addr);
++  __ dsrl(card_addr, card_addr, CardTable::card_shift);
++  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
++  // a valid address and therefore is not properly handled by the relocation code.
++  __ set64(cardtable, (intptr_t)ct->card_table()->byte_map_base());
++  __ daddu(card_addr, card_addr, cardtable);
++
++  __ lb(AT, card_addr, 0);
++  __ daddiu(AT, AT, -1 * (int)G1CardTable::g1_young_card_val());
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  __ sync();
++  __ lb(AT, card_addr, 0);
++  __ daddiu(AT, AT, -1 * (int)G1CardTable::dirty_card_val());
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  // storing a region crossing, non-NULL oop, card is clean.
++  // dirty card and log.
++  __ move(AT, (int)G1CardTable::dirty_card_val());
++  __ sb(AT, card_addr, 0);
++
++  __ lw(AT, queue_index);
++  __ beq(AT, R0, runtime);
++  __ delayed()->nop();
++  __ daddiu(AT, AT, -1 * wordSize);
++  __ sw(AT, queue_index);
++  __ ld(tmp2, buffer);
++  __ ld(AT, queue_index);
++  __ daddu(tmp2, tmp2, AT);
++  __ sd(card_addr, tmp2, 0);
++  __ beq(R0, R0, done);
++  __ delayed()->nop();
++
++  __ bind(runtime);
++  // save the live input values
++  __ push(store_addr);
++  __ push(new_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG);
++  __ pop(new_val);
++  __ pop(store_addr);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool as_normal = (decorators & AS_NORMAL) != 0;
++  assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported");
++
++  bool needs_pre_barrier = as_normal;
++  bool needs_post_barrier = val != noreg && in_heap;
++
++  Register tmp3 = RT3;
++  Register rthread = TREG;
++  // flatten object address if needed
++  // We do it regardless of precise because we need the registers
++  if (dst.index() == noreg && dst.disp() == 0) {
++    if (dst.base() != tmp3) {
++      __ move(tmp3, dst.base());
++    }
++  } else {
++    __ lea(tmp3, dst);
++  }
++
++  if (needs_pre_barrier) {
++    g1_write_barrier_pre(masm /*masm*/,
++                         tmp3 /* obj */,
++                         tmp2 /* pre_val */,
++                         rthread /* thread */,
++                         tmp1  /* tmp */,
++                         val != noreg /* tosca_live */,
++                         false /* expand_call */);
++  }
++  if (val == noreg) {
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg);
++  } else {
++    Register new_val = val;
++    if (needs_post_barrier) {
++      // G1 barrier needs uncompressed oop for region cross check.
++      if (UseCompressedOops) {
++        new_val = tmp2;
++        __ move(new_val, val);
++      }
++    }
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg);
++    if (needs_post_barrier) {
++      g1_write_barrier_post(masm /*masm*/,
++                            tmp3 /* store_adr */,
++                            new_val /* new_val */,
++                            rthread /* thread */,
++                            tmp1 /* tmp */,
++                            tmp2 /* tmp2 */);
++    }
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp
+--- a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++class LIR_Assembler;
++class StubAssembler;
++class G1PreBarrierStub;
++class G1PostBarrierStub;
++
++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
++ protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count);
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp);
++
++  void g1_write_barrier_pre(MacroAssembler* masm,
++                            Register obj,
++                            Register pre_val,
++                            Register thread,
++                            Register tmp,
++                            bool tosca_live,
++                            bool expand_call);
++
++  void g1_write_barrier_post(MacroAssembler* masm,
++                             Register store_addr,
++                             Register new_val,
++                             Register thread,
++                             Register tmp,
++                             Register tmp2);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++
++ public:
++  void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
++  void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
++
++  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++};
++
++#endif // CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp
+--- a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,194 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/thread.hpp"
++
++#define __ masm->
++
++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (UseCompressedOops) {
++        __ lwu(dst, src);
++        if (is_not_null) {
++          __ decode_heap_oop_not_null(dst);
++        } else {
++          __ decode_heap_oop(dst);
++        }
++      } else
++      {
++        __ ld_ptr(dst, src);
++      }
++    } else {
++      assert(in_native, "why else?");
++      __ ld_ptr(dst, src);
++    }
++    break;
++  }
++  case T_BOOLEAN: __ lbu   (dst, src);    break;
++  case T_BYTE:    __ lb    (dst, src);    break;
++  case T_CHAR:    __ lhu   (dst, src);    break;
++  case T_SHORT:   __ lh    (dst, src);    break;
++  case T_INT:     __ lw    (dst, src);    break;
++  case T_LONG:    __ ld    (dst, src);    break;
++  case T_ADDRESS: __ ld_ptr(dst, src);    break;
++  case T_FLOAT:
++    assert(dst == noreg, "only to ftos");
++    __ lwc1(FSF, src);
++    break;
++  case T_DOUBLE:
++    assert(dst == noreg, "only to dtos");
++    __ ldc1(FSF, src);
++    break;
++  default: Unimplemented();
++  }
++}
++
++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                   Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (val == noreg) {
++        assert(!is_not_null, "inconsistent access");
++        if (UseCompressedOops) {
++          __ sw(R0, dst);
++        } else {
++          __ sd(R0, dst);
++        }
++      } else {
++        if (UseCompressedOops) {
++          assert(!dst.uses(val), "not enough registers");
++          if (is_not_null) {
++            __ encode_heap_oop_not_null(val);
++          } else {
++            __ encode_heap_oop(val);
++          }
++          __ sw(val, dst);
++        } else
++        {
++          __ st_ptr(val, dst);
++        }
++      }
++    } else {
++      assert(in_native, "why else?");
++      assert(val != noreg, "not supported");
++      __ st_ptr(val, dst);
++    }
++    break;
++  }
++  case T_BOOLEAN:
++    __ andi(val, val, 0x1);  // boolean is true if LSB is 1
++    __ sb(val, dst);
++    break;
++  case T_BYTE:
++    __ sb(val, dst);
++    break;
++  case T_SHORT:
++    __ sh(val, dst);
++    break;
++  case T_CHAR:
++    __ sh(val, dst);
++    break;
++  case T_INT:
++    __ sw(val, dst);
++    break;
++  case T_LONG:
++    __ sd(val, dst);
++    break;
++  case T_FLOAT:
++    assert(val == noreg, "only tos");
++    __ swc1(FSF, dst);
++    break;
++  case T_DOUBLE:
++    assert(val == noreg, "only tos");
++    __ sdc1(FSF, dst);
++    break;
++  case T_ADDRESS:
++    __ st_ptr(val, dst);
++    break;
++  default: Unimplemented();
++  }
++}
++
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Address obj2) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Register obj2) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                                        Register obj, Register tmp, Label& slowpath) {
++  __ clear_jweak_tag(obj);
++  __ ld_ptr(obj, Address(obj, 0));
++}
++
++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm,
++                                        Register thread, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Register t2,
++                                        Label& slow_case) {
++  Unimplemented();
++}
++
++// Defines obj, preserves var_size_in_bytes
++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm,
++                                        Register thread, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Label& slow_case) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, Register thread,
++                                               Register var_size_in_bytes,
++                                               int con_size_in_bytes,
++                                               Register t1) {
++  Unimplemented();
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp
+--- a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,83 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "memory/allocation.hpp"
++#include "oops/access.hpp"
++
++class InterpreterMacroAssembler;
++
++class BarrierSetAssembler: public CHeapObj<mtGC> {
++private:
++  void incr_allocated_bytes(MacroAssembler* masm, Register thread,
++                            Register var_size_in_bytes,
++                            int con_size_in_bytes,
++                            Register t1);
++
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG) {}
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG) {}
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++
++  virtual void obj_equals(MacroAssembler* masm,
++                          Register obj1, Register obj2);
++  virtual void obj_equals(MacroAssembler* masm,
++                          Register obj1, Address obj2);
++
++  virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) {
++    // Default implementation does not need to do anything.
++  }
++
++  // Support for jniFastGetField to try resolving a jobject/jweak in native
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
++
++  virtual void tlab_allocate(MacroAssembler* masm,
++                             Register thread, Register obj,
++                             Register var_size_in_bytes,
++                             int con_size_in_bytes,
++                             Register t1, Register t2,
++                             Label& slow_case);
++  virtual void eden_allocate(MacroAssembler* masm,
++                             Register thread, Register obj,
++                             Register var_size_in_bytes,
++                             int con_size_in_bytes,
++                             Register t1,
++                             Label& slow_case);
++
++  virtual void barrier_stubs_init() {}
++};
++
++#endif // CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp
+--- a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,147 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/cardTableBarrierSetAssembler.hpp"
++
++#define __ masm->
++
++#define T9 RT9
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++
++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                                    Register addr, Register count, Register tmp) {
++  BarrierSet *bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++  intptr_t disp = (intptr_t) ct->byte_map_base();
++
++  Label L_loop, L_done;
++  const Register end = count;
++  assert_different_registers(addr, end);
++
++  __ beq(count, R0, L_done); // zero count - nothing to do
++  __ delayed()->nop();
++
++  if (ct->scanned_concurrently()) __ membar(Assembler::StoreStore);
++
++  __ set64(tmp, disp);
++
++  __ lea(end, Address(addr, count, TIMES_OOP, 0));  // end == addr+count*oop_size
++  __ daddiu(end, end, -BytesPerHeapOop); // end - 1 to make inclusive
++  __ shr(addr, CardTable::card_shift);
++  __ shr(end, CardTable::card_shift);
++  __ dsubu(end, end, addr); // end --> cards count
++
++  __ daddu(addr, addr, tmp);
++
++  __ BIND(L_loop);
++  if (UseLEXT1) {
++    __ gssbx(R0, addr, count, 0);
++  } else {
++    __ daddu(AT, addr, count);
++    __ sb(R0, AT, 0);
++  }
++  __ daddiu(count, count, -1);
++  __ bgez(count, L_loop);
++  __ delayed()->nop();
++
++  __ BIND(L_done);
++}
++
++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) {
++  // Does a store check for the oop in register obj. The content of
++  // register obj is destroyed afterwards.
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  __ shr(obj, CardTable::card_shift);
++
++  Address card_addr;
++
++  intptr_t byte_map_base = (intptr_t)ct->byte_map_base();
++  Register tmp = T9;
++  assert_different_registers(tmp, obj);
++  __ li(tmp, byte_map_base);
++  __ addu(tmp, tmp, obj);
++
++  assert(CardTable::dirty_card_val() == 0, "must be");
++
++  jbyte dirty = CardTable::dirty_card_val();
++  if (UseCondCardMark) {
++    Label L_already_dirty;
++    __ membar(Assembler::StoreLoad);
++    __ lb(AT, tmp, 0);
++    __ addiu(AT, AT, -1 * dirty);
++    __ beq(AT, R0, L_already_dirty);
++    __ delayed()->nop();
++    __ sb(R0, tmp, 0);
++    __ bind(L_already_dirty);
++  } else {
++    if (ct->scanned_concurrently()) {
++      __ membar(Assembler::StoreStore);
++    }
++    __ sb(R0, tmp, 0);
++  }
++}
++
++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                                Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++
++  bool is_array = (decorators & IS_ARRAY) != 0;
++  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
++  bool precise = is_array || on_anonymous;
++
++  bool needs_post_barrier = val != noreg && in_heap;
++
++  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
++  if (needs_post_barrier) {
++    // flatten object address if needed
++    if (!precise || (dst.index() == noreg && dst.disp() == 0)) {
++      store_check(masm, dst.base(), dst);
++    } else {
++      __ lea(tmp1, dst);
++      store_check(masm, tmp1, dst);
++    }
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp
+--- a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
++protected:
++  void store_check(MacroAssembler* masm, Register obj, Address dst);
++
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++};
++
++#endif // CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp
+--- a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++#define __ masm->
++
++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register dst, Register count, Register scratch) {
++  if (is_oop) {
++    gen_write_ref_array_pre_barrier(masm, decorators, dst, count);
++  }
++}
++
++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register dst, Register count, Register scratch) {
++  if (is_oop) {
++    gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch);
++  }
++}
++
++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  if (type == T_OBJECT || type == T_ARRAY) {
++    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  } else {
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp
+--- a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++
++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
++// accesses, which are overridden in the concrete BarrierSetAssembler.
++
++class ModRefBarrierSetAssembler: public BarrierSetAssembler {
++protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                               Register addr, Register count) {}
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register addr, Register count, Register tmp) {}
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG);
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG);
++
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++};
++
++#endif // CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/globalDefinitions_mips.hpp b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp
+--- a/src/hotspot/cpu/mips/globalDefinitions_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
++#define CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
++// Size of MIPS Instructions
++const int BytesPerInstWord = 4;
++
++const int StackAlignmentInBytes = (2*wordSize);
++
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are properly extended to 64 bits.
++// If set, SharedRuntime::c_calling_convention() must adapt
++// signatures accordingly.
++const bool CCallingConventionRequiresIntsAsLongs = false;
++
++#define SUPPORTS_NATIVE_CX8
++
++#define SUPPORT_RESERVED_STACK_AREA
++
++#define THREAD_LOCAL_POLL
++
++#endif // CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/globals_mips.hpp b/src/hotspot/cpu/mips/globals_mips.hpp
+--- a/src/hotspot/cpu/mips/globals_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/globals_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,137 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_GLOBALS_MIPS_HPP
++#define CPU_MIPS_VM_GLOBALS_MIPS_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, ShareVtableStubs,         true);
++define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
++
++define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
++define_pd_global(bool, TrapBasedNullChecks,      false); // Not needed on x86.
++define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs passed to check cast
++
++define_pd_global(uintx, CodeCacheSegmentSize,    64);
++define_pd_global(intx, CodeEntryAlignment,       16);
++define_pd_global(intx, OptoLoopAlignment,        16);
++define_pd_global(intx, InlineFrequencyCount,     100);
++// MIPS generates 3x instructions than X86
++define_pd_global(intx, InlineSmallCode,          4000);
++
++#define DEFAULT_STACK_YELLOW_PAGES (2)
++#define DEFAULT_STACK_RED_PAGES (1)
++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4))
++#define DEFAULT_STACK_RESERVED_PAGES (1)
++define_pd_global(uintx, TLABSize,                 0);
++define_pd_global(uintx, NewSize,                  1024 * K);
++define_pd_global(intx,  PreInflateSpin,      10);
++
++define_pd_global(intx, PrefetchCopyIntervalInBytes, -1);
++define_pd_global(intx, PrefetchScanIntervalInBytes, -1);
++define_pd_global(intx, PrefetchFieldsAhead,         -1);
++
++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
++#define MIN_STACK_RED_PAGES    DEFAULT_STACK_RED_PAGES
++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
++#define MIN_STACK_RESERVED_PAGES (0)
++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
++
++define_pd_global(intx, StackYellowPages, 2);
++define_pd_global(intx, StackRedPages, 1);
++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
++
++define_pd_global(bool, RewriteBytecodes,     true);
++define_pd_global(bool, RewriteFrequentPairs, true);
++define_pd_global(bool, UseMembar,            true);
++// GC Ergo Flags
++define_pd_global(intx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
++
++define_pd_global(uintx, TypeProfileLevel, 111);
++
++define_pd_global(bool, CompactStrings, true);
++
++define_pd_global(bool, PreserveFramePointer, false);
++
++define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
++
++define_pd_global(bool, ThreadLocalHandshakes, true);
++// Only c2 cares about this at the moment
++define_pd_global(intx, AllocatePrefetchStyle,        2);
++define_pd_global(intx, AllocatePrefetchDistance,     -1);
++
++#define ARCH_FLAGS(develop, \
++                   product, \
++                   diagnostic, \
++                   experimental, \
++                   notproduct, \
++                   range, \
++                   constraint, \
++                   writeable) \
++                                                                            \
++  product(bool, UseLEXT1, false,                                            \
++                "Use LoongISA general EXTensions 1")                        \
++                                                                            \
++  product(bool, UseLEXT2, false,                                            \
++                "Use LoongISA general EXTensions 2")                        \
++                                                                            \
++  product(bool, UseLEXT3, false,                                            \
++                "Use LoongISA general EXTensions 3")                        \
++                                                                            \
++  product(bool, UseCodeCacheAllocOpt, true,                                 \
++                "Allocate code cache within 32-bit memory address space")   \
++                                                                            \
++  product(intx, UseSyncLevel, 10000,                                        \
++                "The sync level on Loongson CPUs"                           \
++                "UseSyncLevel == 10000, 111, for all Loongson CPUs, "       \
++                "UseSyncLevel == 4000, 101, maybe for GS464V"               \
++                "UseSyncLevel == 3000, 001, maybe for GS464V"               \
++                "UseSyncLevel == 2000, 011, maybe for GS464E/GS264"         \
++                "UseSyncLevel == 1000, 110, maybe for GS464")               \
++                                                                            \
++  develop(bool, UseBoundCheckInstruction, false,                            \
++                "Use bound check instruction")                              \
++                                                                            \
++  product(intx, SetFSFOFN, 999,                                             \
++          "Set the FS/FO/FN bits in FCSR"                                   \
++          "999 means FS/FO/FN will not be changed"                          \
++          "=XYZ, with X:FS, Y:FO, Z:FN, X, Y and Z in 0=off, 1=on")         \
++                                                                            \
++  /* assembler */                                                           \
++  product(bool, UseCountLeadingZerosInstructionMIPS64, true,                \
++          "Use count leading zeros instruction")                            \
++                                                                            \
++  product(bool, UseCountTrailingZerosInstructionMIPS64, false,              \
++          "Use count trailing zeros instruction")                           \
++                                                                            \
++  product(bool, UseActiveCoresMP, false,                                    \
++                "Eliminate barriers for single active cpu")
++
++#endif // CPU_MIPS_VM_GLOBALS_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/icache_mips.cpp b/src/hotspot/cpu/mips/icache_mips.cpp
+--- a/src/hotspot/cpu/mips/icache_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/icache_mips.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
++
++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub)
++{
++#define __ _masm->
++  StubCodeMark mark(this, "ICache", "flush_icache_stub");
++  address start = __ pc();
++
++  __ jr_hb(RA);
++  __ delayed()->ori(V0, A2, 0);
++
++  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
++#undef __
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/icache_mips.hpp b/src/hotspot/cpu/mips/icache_mips.hpp
+--- a/src/hotspot/cpu/mips/icache_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/icache_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_ICACHE_MIPS_HPP
++#define CPU_MIPS_VM_ICACHE_MIPS_HPP
++
++// Interface for updating the instruction cache.  Whenever the VM modifies
++// code, part of the processor instruction cache potentially has to be flushed.
++
++class ICache : public AbstractICache {
++ public:
++  enum {
++    stub_size      = 2 * BytesPerInstWord,  // Size of the icache flush stub in bytes
++    line_size      = 32,  // flush instruction affects a dword
++    log2_line_size = 5    // log2(line_size)
++  };
++};
++
++#endif // CPU_MIPS_VM_ICACHE_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/icBuffer_mips.cpp b/src/hotspot/cpu/mips/icBuffer_mips.cpp
+--- a/src/hotspot/cpu/mips/icBuffer_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/icBuffer_mips.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,88 @@
++/*
++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/icBuffer.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/bytecodes.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/oop.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++int InlineCacheBuffer::ic_stub_code_size() {
++  return NativeMovConstReg::instruction_size +
++         NativeGeneralJump::instruction_size +
++         1;
++  // so that code_end can be set in CodeBuffer
++  // 64bit 15 = 6 + 8 bytes + 1 byte
++  // 32bit 7 = 2 + 4 bytes + 1 byte
++}
++
++
++// we use T1 as cached oop(klass) now. this is the target of virtual call,
++// when reach here, the receiver in T0
++// refer to shareRuntime_mips.cpp,gen_i2c2i_adapters
++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
++  ResourceMark rm;
++  CodeBuffer      code(code_begin, ic_stub_code_size());
++  MacroAssembler* masm            = new MacroAssembler(&code);
++  // note: even though the code contains an embedded oop, we do not need reloc info
++  // because
++  // (1) the oop is old (i.e., doesn't matter for scavenges)
++  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
++//  assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop");
++#define __ masm->
++  __ patchable_set48(T1, (long)cached_value);
++
++  __ patchable_jump(entry_point);
++  __ flush();
++#undef __
++}
++
++
++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  return jump->jump_destination();
++}
++
++
++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
++  // creation also verifies the object
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);
++  // Verifies the jump
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  void* o= (void*)move->data();
++  return o;
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/interp_masm_mips_64.cpp b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp
+--- a/src/hotspot/cpu/mips/interp_masm_mips_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,2126 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interp_masm_mips.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/markOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of InterpreterMacroAssembler
++
++#ifdef CC_INTERP
++void InterpreterMacroAssembler::get_method(Register reg) {
++}
++#endif // CC_INTERP
++
++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
++  // The runtime address of BCP may be unaligned.
++  // Refer to the SPARC implementation.
++  lbu(reg, BCP, offset+1);
++  lbu(tmp, BCP, offset);
++  dsll(reg, reg, 8);
++  daddu(reg, tmp, reg);
++}
++
++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
++  assert(reg != tmp, "need separate temp register");
++  if (offset & 3) { // Offset unaligned?
++    lbu(reg, BCP, offset+3);
++    lbu(tmp, BCP, offset+2);
++    dsll(reg, reg, 8);
++    daddu(reg, tmp, reg);
++    lbu(tmp, BCP, offset+1);
++    dsll(reg, reg, 8);
++    daddu(reg, tmp, reg);
++    lbu(tmp, BCP, offset);
++    dsll(reg, reg, 8);
++    daddu(reg, tmp, reg);
++  } else {
++    lwu(reg, BCP, offset);
++  }
++}
++
++void InterpreterMacroAssembler::jump_to_entry(address entry) {
++  assert(entry, "Entry must have been generated by now");
++  jmp(entry);
++}
++
++#ifndef CC_INTERP
++
++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
++                                                  int number_of_arguments) {
++  // interpreter specific
++  //
++  // Note: No need to save/restore bcp & locals (r13 & r14) pointer
++  //       since these are callee saved registers and no blocking/
++  //       GC can happen in leaf calls.
++  // Further Note: DO NOT save/restore bcp/locals. If a caller has
++  // already saved them so that it can use BCP/LVP as temporaries
++  // then a save/restore here will DESTROY the copy the caller
++  // saved! There used to be a save_bcp() that only happened in
++  // the ASSERT path (no restore_bcp). Which caused bizarre failures
++  // when jvm built with ASSERTs.
++#ifdef ASSERT
++  save_bcp();
++  {
++    Label L;
++    ld(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT,R0,L);
++    delayed()->nop();
++    stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL");
++    bind(L);
++  }
++#endif
++  // super call
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
++  // interpreter specific
++  // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals
++  // but since they may not have been saved (and we don't want to
++  // save them here (see note above) the assert is invalid.
++}
++
++void InterpreterMacroAssembler::call_VM_base(Register oop_result,
++                                             Register java_thread,
++                                             Register last_java_sp,
++                                             address  entry_point,
++                                             int      number_of_arguments,
++                                             bool     check_exceptions) {
++  // interpreter specific
++  //
++  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
++  //       really make a difference for these runtime calls, since they are
++  //       slow anyway. Btw., bcp must be saved/restored since it may change
++  //       due to GC.
++  assert(java_thread == noreg , "not expecting a precomputed java thread");
++  save_bcp();
++#ifdef ASSERT
++  {
++    Label L;
++    ld(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT, R0, L);
++    delayed()->nop();
++    stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL");
++    bind(L);
++  }
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp,
++                               entry_point, number_of_arguments,
++                               check_exceptions);
++  // interpreter specific
++  restore_bcp();
++  restore_locals();
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
++  if (JvmtiExport::can_pop_frame()) {
++    Label L;
++    // Initiate popframe handling only if it is not already being
++    // processed.  If the flag has the popframe_processing bit set, it
++    // means that this code is called *during* popframe handling - we
++    // don't want to reenter.
++    // This method is only called just after the call into the vm in
++    // call_VM_base, so the arg registers are available.
++    // Not clear if any other register is available, so load AT twice
++    assert(AT != java_thread, "check");
++    lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_pending_bit);
++    beq(AT, R0, L);
++    delayed()->nop();
++
++    lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_processing_bit);
++    bne(AT, R0, L);
++    delayed()->nop();
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
++    jr(V0);
++    delayed()->nop();
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++  const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset()));
++  const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++  //V0, oop_addr,V1,val_addr
++  switch (state) {
++    case atos:
++      ld_ptr(V0, oop_addr);
++      st_ptr(R0, oop_addr);
++      verify_oop(V0, state);
++      break;
++    case ltos:
++      ld_ptr(V0, val_addr);               // fall through
++      break;
++    case btos:                                     // fall through
++    case ztos:                                     // fall through
++    case ctos:                                     // fall through
++    case stos:                                     // fall through
++    case itos:
++      lw(V0, val_addr);
++      break;
++    case ftos:
++      lwc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case dtos:
++      ldc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case vtos: /* nothing to do */                    break;
++    default  : ShouldNotReachHere();
++  }
++  // Clean up tos value in the thread object
++  move(AT, (int)ilgl);
++  sw(AT, tos_addr);
++  sw(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
++  if (JvmtiExport::can_force_early_return()) {
++    Label L;
++    Register tmp = T9;
++
++    assert(java_thread != AT, "check");
++    assert(java_thread != tmp, "check");
++    ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    beq(AT, R0, L);
++    delayed()->nop();
++
++    // Initiate earlyret handling only if it is not already being processed.
++    // If the flag has the earlyret_processing bit set, it means that this code
++    // is called *during* earlyret handling - we don't want to reenter.
++    lw(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++    move(tmp, JvmtiThreadState::earlyret_pending);
++    bne(tmp, AT, L);
++    delayed()->nop();
++
++    // Call Interpreter::remove_activation_early_entry() to get the address of the
++    // same-named entrypoint in the generated interpreter code.
++    ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    lw(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++    move(A0, AT);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0);
++    jr(V0);
++    delayed()->nop();
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg,
++                                                                 int bcp_offset) {
++  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
++  lbu(AT, BCP, bcp_offset);
++  lbu(reg, BCP, bcp_offset + 1);
++  ins(reg, AT, 8, 8);
++}
++
++
++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
++                                                       int bcp_offset,
++                                                       size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  if (index_size == sizeof(u2)) {
++    get_2_byte_integer_at_bcp(index, AT, bcp_offset);
++  } else if (index_size == sizeof(u4)) {
++    get_4_byte_integer_at_bcp(index, AT, bcp_offset);
++    // Check if the secondary index definition is still ~x, otherwise
++    // we have to change the following assembler code to calculate the
++    // plain index.
++    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
++    nor(index, index, R0);
++    sll(index, index, 0);
++  } else if (index_size == sizeof(u1)) {
++    lbu(index, BCP, bcp_offset);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
++                                                           Register index,
++                                                           int bcp_offset,
++                                                           size_t index_size) {
++  assert_different_registers(cache, index);
++  get_cache_index_at_bcp(index, bcp_offset, index_size);
++  ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line");
++  shl(index, 2);
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
++                                                                        Register index,
++                                                                        Register bytecode,
++                                                                        int byte_no,
++                                                                        int bcp_offset,
++                                                                        size_t index_size) {
++  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
++  // We use a 32-bit load here since the layout of 64-bit words on
++  // little-endian machines allow us that.
++  dsll(AT, index, Address::times_ptr);
++  daddu(AT, cache, AT);
++  lw(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
++  if(os::is_MP()) {
++    sync(); // load acquire
++  }
++
++  const int shift_count = (1 + byte_no) * BitsPerByte;
++  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
++         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
++         "correct shift count");
++  dsrl(bytecode, bytecode, shift_count);
++  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
++  move(AT, ConstantPoolCacheEntry::bytecode_1_mask);
++  andr(bytecode, bytecode, AT);
++}
++
++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
++                                                               Register tmp,
++                                                               int bcp_offset,
++                                                               size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  assert(cache != tmp, "must use different register");
++  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // convert from field index to ConstantPoolCacheEntry index
++  // and from word offset to byte offset
++  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
++  shl(tmp, 2 + LogBytesPerWord);
++  ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  // skip past the header
++  daddiu(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
++  daddu(cache, cache, tmp);
++}
++
++void InterpreterMacroAssembler::get_method_counters(Register method,
++                                                    Register mcs, Label& skip) {
++  Label has_counters;
++  ld(mcs, method, in_bytes(Method::method_counters_offset()));
++  bne(mcs, R0, has_counters);
++  delayed()->nop();
++  call_VM(noreg, CAST_FROM_FN_PTR(address,
++          InterpreterRuntime::build_method_counters), method);
++  ld(mcs, method, in_bytes(Method::method_counters_offset()));
++  beq(mcs, R0, skip);   // No MethodCounters allocated, OutOfMemory
++  delayed()->nop();
++  bind(has_counters);
++}
++
++// Load object from cpool->resolved_references(index)
++void InterpreterMacroAssembler::load_resolved_reference_at_index(
++                                           Register result, Register index, Register tmp) {
++  assert_different_registers(result, index);
++  // convert from field index to resolved_references() index and from
++  // word index to byte offset. Since this is a java object, it can be compressed
++  shl(index, LogBytesPerHeapOop);
++
++  get_constant_pool(result);
++  // load pointer for resolved_references[] objArray
++  ld(result, result, ConstantPool::cache_offset_in_bytes());
++  ld(result, result, ConstantPoolCache::resolved_references_offset_in_bytes());
++  resolve_oop_handle(result, tmp);
++  // Add in the index
++  daddu(result, result, index);
++  load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp);
++}
++
++// load cpool->resolved_klass_at(index)
++void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool,
++                                           Register index, Register klass) {
++  dsll(AT, index, Address::times_ptr);
++  if (UseLEXT1 && Assembler::is_simm(sizeof(ConstantPool), 8)) {
++    gslhx(index, cpool, AT, sizeof(ConstantPool));
++  } else {
++    daddu(AT, cpool, AT);
++    lh(index, AT, sizeof(ConstantPool));
++  }
++  Register resolved_klasses = cpool;
++  ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes()));
++  dsll(AT, index, Address::times_ptr);
++  daddu(AT, resolved_klasses, AT);
++  ld(klass, AT, Array<Klass*>::base_offset_in_bytes());
++}
++
++// Resets LVP to locals.  Register sub_klass cannot be any of the above.
++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) {
++  assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" );
++  assert( Rsub_klass != T1, "T1 holds 2ndary super array length" );
++  assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" );
++  // Profile the not-null value's klass.
++  // Here T9 and T1 are used as temporary registers.
++  profile_typecheck(T9, Rsub_klass, T1); // blows T9, reloads T1
++
++  // Do the check.
++  check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1
++
++  // Profile the failure of the check.
++  profile_typecheck_failed(T9); // blows T9
++}
++
++
++
++// Java Expression Stack
++
++void InterpreterMacroAssembler::pop_ptr(Register r) {
++  ld(r, SP, 0);
++  daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_i(Register r) {
++  lw(r, SP, 0);
++  daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_l(Register r) {
++  ld(r, SP, 0);
++  daddiu(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_f(FloatRegister r) {
++  lwc1(r, SP, 0);
++  daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_d(FloatRegister r) {
++  ldc1(r, SP, 0);
++  daddiu(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_ptr(Register r) {
++  daddiu(SP, SP, - Interpreter::stackElementSize);
++  sd(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_i(Register r) {
++  // For compatibility reason, don't change to sw.
++  daddiu(SP, SP, - Interpreter::stackElementSize);
++  sd(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_l(Register r) {
++  daddiu(SP, SP, -2 * Interpreter::stackElementSize);
++  sd(r, SP, 0);
++  sd(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_f(FloatRegister r) {
++  daddiu(SP, SP, - Interpreter::stackElementSize);
++  swc1(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_d(FloatRegister r) {
++  daddiu(SP, SP, -2 * Interpreter::stackElementSize);
++  sdc1(r, SP, 0);
++  sd(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop(TosState state) {
++  switch (state) {
++    case atos: pop_ptr();           break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: pop_i();             break;
++    case ltos: pop_l();             break;
++    case ftos: pop_f();             break;
++    case dtos: pop_d();             break;
++    case vtos: /* nothing to do */  break;
++    default:   ShouldNotReachHere();
++  }
++  verify_oop(FSR, state);
++}
++
++//FSR=V0,SSR=V1
++void InterpreterMacroAssembler::push(TosState state) {
++  verify_oop(FSR, state);
++  switch (state) {
++    case atos: push_ptr();          break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: push_i();            break;
++    case ltos: push_l();            break;
++    case ftos: push_f();            break;
++    case dtos: push_d();            break;
++    case vtos: /* nothing to do */  break;
++    default  : ShouldNotReachHere();
++  }
++}
++
++
++
++void InterpreterMacroAssembler::load_ptr(int n, Register val) {
++  ld(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++void InterpreterMacroAssembler::store_ptr(int n, Register val) {
++  sd(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++// Jump to from_interpreted entry of a call unless single stepping is possible
++// in this thread in which case we must call the i2i entry
++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
++  // record last_sp
++  move(Rsender, SP);
++  sd(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++#ifndef OPT_THREAD
++    Register thread = temp;
++    get_thread(thread);
++#else
++    Register thread = TREG;
++#endif
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    lw(AT, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(AT, R0, run_compiled_code);
++    delayed()->nop();
++    ld(AT, method, in_bytes(Method::interpreter_entry_offset()));
++    jr(AT);
++    delayed()->nop();
++    bind(run_compiled_code);
++  }
++
++  ld(AT, method, in_bytes(Method::from_interpreted_offset()));
++  jr(AT);
++  delayed()->nop();
++}
++
++
++// The following two routines provide a hook so that an implementation
++// can schedule the dispatch in two parts.  mips64 does not do this.
++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
++  // Nothing mips64 specific to be done here
++}
++
++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
++  dispatch_next(state, step);
++}
++
++// assume the next bytecode in T8.
++void InterpreterMacroAssembler::dispatch_base(TosState state,
++                                              address* table,
++                                              bool verifyoop,
++                                              bool generate_poll) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++
++  if (VerifyActivationFrameSize) {
++    Label L;
++
++    dsubu(T2, FP, SP);
++    int min_frame_size = (frame::link_offset -
++      frame::interpreter_frame_initial_sp_offset) * wordSize;
++    daddiu(T2, T2, -min_frame_size);
++    bgez(T2, L);
++    delayed()->nop();
++    stop("broken stack frame");
++    bind(L);
++  }
++  // FIXME: I do not know which register should pass to verify_oop
++  if (verifyoop) verify_oop(FSR, state);
++  dsll(T2, Rnext, LogBytesPerWord);
++
++  Label safepoint;
++  address* const safepoint_table = Interpreter::safept_table(state);
++  bool needs_thread_local_poll = generate_poll &&
++    SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
++
++  if (needs_thread_local_poll) {
++    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
++    ld(T3, thread, in_bytes(Thread::polling_page_offset()));
++    andi(T3, T3, SafepointMechanism::poll_bit());
++    bne(T3, R0, safepoint);
++    delayed()->nop();
++  }
++
++  if((long)table >= (long)Interpreter::dispatch_table(btos) &&
++     (long)table <= (long)Interpreter::dispatch_table(vtos)
++    ) {
++     int table_size = (long)Interpreter::dispatch_table(itos) - (long)Interpreter::dispatch_table(stos);
++     int table_offset = ((int)state - (int)itos) * table_size;
++
++     // GP points to the starting address of Interpreter::dispatch_table(itos).
++     // See StubGenerator::generate_call_stub(address& return_address) for the initialization of GP.
++     if(table_offset != 0) {
++        daddiu(T3, GP, table_offset);
++        if (UseLEXT1) {
++          gsldx(T3, T2, T3, 0);
++        } else {
++          daddu(T3, T2, T3);
++          ld(T3, T3, 0);
++        }
++     } else {
++        if (UseLEXT1) {
++          gsldx(T3, T2, GP, 0);
++        } else {
++          daddu(T3, T2, GP);
++          ld(T3, T3, 0);
++        }
++     }
++  } else {
++     li(T3, (long)table);
++     if (UseLEXT1) {
++       gsldx(T3, T2, T3, 0);
++     } else {
++       daddu(T3, T2, T3);
++       ld(T3, T3, 0);
++     }
++  }
++  jr(T3);
++  delayed()->nop();
++
++  if (needs_thread_local_poll) {
++    bind(safepoint);
++    li(T3, (long)safepoint_table);
++    if (UseLEXT1) {
++       gsldx(T3, T2, T3, 0);
++     } else {
++       daddu(T3, T2, T3);
++       ld(T3, T3, 0);
++     }
++    jr(T3);
++    delayed()->nop();
++  }
++}
++
++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) {
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state));
++}
++
++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state), false);
++}
++
++
++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
++  // load next bytecode (load before advancing r13 to prevent AGI)
++  lbu(Rnext, BCP, step);
++  increment(BCP, step);
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
++  // load current bytecode
++  lbu(Rnext, BCP, 0);
++  dispatch_base(state, table);
++}
++
++// remove activation
++//
++// Unlock the receiver if this is a synchronized method.
++// Unlock any Java monitors from syncronized blocks.
++// Remove the activation from the stack.
++//
++// If there are locked Java monitors
++//    If throw_monitor_exception
++//       throws IllegalMonitorStateException
++//    Else if install_monitor_exception
++//       installs IllegalMonitorStateException
++//    Else
++//       no error processing
++// used registers : T1, T2, T3, T8
++// T1 : thread, method access flags
++// T2 : monitor entry pointer
++// T3 : method, monitor top
++// T8 : unlock flag
++void InterpreterMacroAssembler::remove_activation(
++        TosState state,
++        Register ret_addr,
++        bool throw_monitor_exception,
++        bool install_monitor_exception,
++  bool notify_jvmdi) {
++  // Note: Registers V0, V1 and F0, F1 may be in use for the result
++  // check if synchronized method
++  Label unlocked, unlock, no_unlock;
++
++  // get the value of _do_not_unlock_if_synchronized into T8
++#ifndef OPT_THREAD
++  Register thread = T1;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  lb(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // reset the flag
++  sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // get method access flags
++  ld(T3, FP, frame::interpreter_frame_method_offset * wordSize);
++  lw(T1, T3, in_bytes(Method::access_flags_offset()));
++  andi(T1, T1, JVM_ACC_SYNCHRONIZED);
++  beq(T1, R0, unlocked);
++  delayed()->nop();
++
++  // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set.
++  bne(T8, R0, no_unlock);
++  delayed()->nop();
++  // unlock monitor
++  push(state); // save result
++
++  // BasicObjectLock will be first in list, since this is a
++  // synchronized method. However, need to check that the object has
++  // not been unlocked by an explicit monitorexit bytecode.
++  daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize
++      - (int)sizeof(BasicObjectLock));
++  // address of first monitor
++  ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  bne(T1, R0, unlock);
++  delayed()->nop();
++  pop(state);
++  if (throw_monitor_exception) {
++    // Entry already unlocked, need to throw exception
++    // I think mips do not need empty_FPU_stack
++    // remove possible return value from FPU-stack, otherwise stack could overflow
++    empty_FPU_stack();
++    call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::throw_illegal_monitor_state_exception));
++    should_not_reach_here();
++  } else {
++    // Monitor already unlocked during a stack unroll. If requested,
++    // install an illegal_monitor_state_exception.  Continue with
++    // stack unrolling.
++    if (install_monitor_exception) {
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::new_illegal_monitor_state_exception));
++
++    }
++
++    b(unlocked);
++    delayed()->nop();
++  }
++
++  bind(unlock);
++  unlock_object(c_rarg0);
++  pop(state);
++
++  // Check that for block-structured locking (i.e., that all locked
++  // objects has been unlocked)
++  bind(unlocked);
++
++  // V0, V1: Might contain return value
++
++  // Check that all monitors are unlocked
++  {
++    Label loop, exception, entry, restart;
++    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++    const Address monitor_block_top(FP,
++        frame::interpreter_frame_monitor_block_top_offset * wordSize);
++
++    bind(restart);
++    // points to current entry, starting with top-most entry
++    ld(c_rarg0, monitor_block_top);
++    // points to word before bottom of monitor block
++    daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    b(entry);
++    delayed()->nop();
++
++    // Entry already locked, need to throw exception
++    bind(exception);
++
++    if (throw_monitor_exception) {
++      // Throw exception
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::throw_illegal_monitor_state_exception));
++      should_not_reach_here();
++    } else {
++      // Stack unrolling. Unlock object and install illegal_monitor_exception
++      // Unlock does not block, so don't have to worry about the frame
++      // We don't have to preserve c_rarg0, since we are going to
++      // throw an exception
++
++      push(state);
++      unlock_object(c_rarg0);
++      pop(state);
++
++      if (install_monitor_exception) {
++        empty_FPU_stack();
++        call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                        InterpreterRuntime::new_illegal_monitor_state_exception));
++      }
++
++      b(restart);
++      delayed()->nop();
++    }
++
++    bind(loop);
++    ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    bne(T1, R0, exception);// check if current entry is used
++    delayed()->nop();
++
++    daddiu(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry
++    bind(entry);
++    bne(c_rarg0, T3, loop);  // check if bottom reached
++    delayed()->nop();  // if not at bottom then check this entry
++  }
++
++  bind(no_unlock);
++
++  // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame)
++  if (notify_jvmdi) {
++    notify_method_exit(state, NotifyJVMTI); // preserve TOSCA
++  } else {
++    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
++  }
++
++  // remove activation
++  ld(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  if (StackReservedPages > 0) {
++    // testing if reserved zone needs to be re-enabled
++    Label no_reserved_zone_enabling;
++
++    ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset()));
++    dsubu(AT, TSR, AT);
++    blez(AT, no_reserved_zone_enabling);
++    delayed()->nop();
++
++    call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                   InterpreterRuntime::throw_delayed_StackOverflowError));
++    should_not_reach_here();
++
++    bind(no_reserved_zone_enabling);
++  }
++  ld(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize);
++  ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize);
++  move(SP, TSR); // set sp to sender sp
++}
++
++#endif // CC_INTERP
++
++// Lock object
++//
++// Args:
++//      c_rarg0: BasicObjectLock to be used for locking
++//
++// Kills:
++//      T1
++//      T2
++void InterpreterMacroAssembler::lock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++  } else {
++    Label done, slow_case;
++    const Register tmp_reg = T2;
++    const Register scr_reg = T1;
++    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
++    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
++    const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes();
++
++    // Load object pointer into scr_reg
++    ld(scr_reg, lock_reg, obj_offset);
++
++    if (UseBiasedLocking) {
++      // Note: we use noreg for the temporary register since it's hard
++      // to come up with a free register on all incoming code paths
++      biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case);
++    }
++
++    // Load (object->mark() | 1) into tmp_reg
++    ld(AT, scr_reg, 0);
++    ori(tmp_reg, AT, 1);
++
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    sd(tmp_reg, lock_reg, mark_offset);
++
++    assert(lock_offset == 0, "displached header must be first word in BasicObjectLock");
++
++    if (PrintBiasedLockingStatistics) {
++      Label succ, fail;
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail);
++      bind(succ);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++      b(done);
++      delayed()->nop();
++      bind(fail);
++    } else {
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done);
++    }
++
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) SP <= mark < SP + os::pagesize()
++    //
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in tmp_reg as the result of cmpxchg
++
++    dsubu(tmp_reg, tmp_reg, SP);
++    move(AT, 7 - os::vm_page_size());
++    andr(tmp_reg, tmp_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    sd(tmp_reg, lock_reg, mark_offset);
++    if (PrintBiasedLockingStatistics) {
++      bne(tmp_reg, R0, slow_case);
++      delayed()->nop();
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++    }
++    beq(tmp_reg, R0, done);
++    delayed()->nop();
++
++    bind(slow_case);
++    // Call the runtime routine for slow case
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++
++    bind(done);
++  }
++}
++
++
++// Unlocks an object. Used in monitorexit bytecode and
++// remove_activation.  Throws an IllegalMonitorException if object is
++// not locked by current thread.
++//
++// Args:
++//      c_rarg0: BasicObjectLock for lock
++//
++// Kills:
++//      T1
++//      T2
++//      T3
++// Throw an IllegalMonitorException if object is not locked by current thread
++void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
++  } else {
++    Label done;
++
++    const Register tmp_reg = T1;
++    const Register scr_reg = T2;
++    const Register hdr_reg = T3;
++
++    save_bcp(); // Save in case of exception
++
++    // Convert from BasicObjectLock structure to object and BasicLock structure
++    // Store the BasicLock address into %T2
++    daddiu(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes());
++
++    // Load oop into scr_reg(%T1)
++    ld(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    // free entry
++    sd(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    if (UseBiasedLocking) {
++      biased_locking_exit(scr_reg, hdr_reg, done);
++    }
++
++    // Load the old header from BasicLock structure
++    ld(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes());
++    // zero for recursive case
++    beq(hdr_reg, R0, done);
++    delayed()->nop();
++
++    // Atomic swap back the old header
++    cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done);
++
++    // Call the runtime routine for slow case.
++    sd(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj
++    call_VM(NOREG,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++            lock_reg);
++
++    bind(done);
++
++    restore_bcp();
++  }
++}
++
++#ifndef CC_INTERP
++
++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
++                                                         Label& zero_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ld(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++  beq(mdp, R0, zero_continue);
++  delayed()->nop();
++}
++
++
++// Set the method data pointer for the current bcp.
++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Label set_mdp;
++
++  // V0 and T0 will be used as two temporary registers.
++  push2(V0, T0);
++
++  get_method(T0);
++  // Test MDO to avoid the call if it is NULL.
++  ld(V0, T0, in_bytes(Method::method_data_offset()));
++  beq(V0, R0, set_mdp);
++  delayed()->nop();
++
++  // method: T0
++  // bcp: BCP --> S0
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP);
++  // mdi: V0
++  // mdo is guaranteed to be non-zero here, we checked for it before the call.
++  get_method(T0);
++  ld(T0, T0, in_bytes(Method::method_data_offset()));
++  daddiu(T0, T0, in_bytes(MethodData::data_offset()));
++  daddu(V0, T0, V0);
++  bind(set_mdp);
++  sd(V0, FP, frame::interpreter_frame_mdp_offset * wordSize);
++  pop2(V0, T0);
++}
++
++void InterpreterMacroAssembler::verify_method_data_pointer() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++#ifdef ASSERT
++  Label verify_continue;
++  Register method = V0;
++  Register mdp = V1;
++  Register tmp = A0;
++  push(method);
++  push(mdp);
++  push(tmp);
++  test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue
++  get_method(method);
++
++  // If the mdp is valid, it will point to a DataLayout header which is
++  // consistent with the bcp.  The converse is highly probable also.
++  lhu(tmp, mdp, in_bytes(DataLayout::bci_offset()));
++  ld(AT, method, in_bytes(Method::const_offset()));
++  daddu(tmp, tmp, AT);
++  daddiu(tmp, tmp, in_bytes(ConstMethod::codes_offset()));
++  beq(tmp, BCP, verify_continue);
++  delayed()->nop();
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp);
++  bind(verify_continue);
++  pop(tmp);
++  pop(mdp);
++  pop(method);
++#endif // ASSERT
++}
++
++
++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
++                                                int constant,
++                                                Register value) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Address data(mdp_in, constant);
++  sd(value, data);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      int constant,
++                                                      bool decrement) {
++  // Counter address
++  Address data(mdp_in, constant);
++
++  increment_mdp_data_at(data, decrement);
++}
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Address data,
++                                                      bool decrement) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  // %%% this does 64bit counters at best it is wasting space
++  // at worst it is a rare bug when counters overflow
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Decrement the register.
++    ld(AT, data);
++    sltu(tmp, R0, AT);
++    dsubu(AT, AT, tmp);
++    sd(AT, data);
++  } else {
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Increment the register.
++    ld(AT, data);
++    daddiu(tmp, AT, DataLayout::counter_increment);
++    sltu(tmp, R0, tmp);
++    daddu(AT, AT, tmp);
++    sd(AT, data);
++  }
++  pop(tmp);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      Register reg,
++                                                      int constant,
++                                                      bool decrement) {
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    assert(Assembler::is_simm16(constant), "constant is not a simm16 !");
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Decrement the register.
++    daddu(tmp, mdp_in, reg);
++    ld(AT, tmp, constant);
++    sltu(tmp, R0, AT);
++    dsubu(AT, AT, tmp);
++    daddu(tmp, mdp_in, reg);
++    sd(AT, tmp, constant);
++  } else {
++    assert(Assembler::is_simm16(constant), "constant is not a simm16 !");
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Increment the register.
++    daddu(tmp, mdp_in, reg);
++    ld(AT, tmp, constant);
++    daddiu(tmp, AT, DataLayout::counter_increment);
++    sltu(tmp, R0, tmp);
++    daddu(AT, AT, tmp);
++    daddu(tmp, mdp_in, reg);
++    sd(AT, tmp, constant);
++  }
++  pop(tmp);
++}
++
++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
++                                                int flag_byte_constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  int header_offset = in_bytes(DataLayout::header_offset());
++  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
++  // Set the flag
++  lw(AT, Address(mdp_in, header_offset));
++  if(Assembler::is_simm16(header_bits)) {
++    ori(AT, AT, header_bits);
++  } else {
++    push(T8);
++    // T8 is used as a temporary register.
++    move(T8, header_bits);
++    orr(AT, AT, T8);
++    pop(T8);
++  }
++  sw(AT, Address(mdp_in, header_offset));
++}
++
++
++
++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
++                                                 int offset,
++                                                 Register value,
++                                                 Register test_value_out,
++                                                 Label& not_equal_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if (test_value_out == noreg) {
++    ld(AT, Address(mdp_in, offset));
++    bne(AT, value, not_equal_continue);
++    delayed()->nop();
++  } else {
++    // Put the test value into a register, so caller can use it:
++    ld(test_value_out, Address(mdp_in, offset));
++    bne(value, test_value_out, not_equal_continue);
++    delayed()->nop();
++  }
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16");
++  ld(AT, mdp_in, offset_of_disp);
++  daddu(mdp_in, mdp_in, AT);
++  sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     Register reg,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  daddu(AT, reg, mdp_in);
++  assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16");
++  ld(AT, AT, offset_of_disp);
++  daddu(mdp_in, mdp_in, AT);
++  sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
++                                                       int constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if(Assembler::is_simm16(constant)) {
++    daddiu(mdp_in, mdp_in, constant);
++  } else {
++    move(AT, constant);
++    daddu(mdp_in, mdp_in, AT);
++  }
++  sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  push(return_bci); // save/restore across call_VM
++  call_VM(noreg,
++          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
++          return_bci);
++  pop(return_bci);
++}
++
++
++void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
++                                                     Register bumped_count) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    // Otherwise, assign to mdp
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the taken count.
++    // We inline increment_mdp_data_at to return bumped_count in a register
++    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
++    ld(bumped_count, mdp, in_bytes(JumpData::taken_offset()));
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    daddiu(AT, bumped_count, DataLayout::counter_increment);
++    sltu(AT, R0, AT);
++    daddu(bumped_count, bumped_count, AT);
++    sd(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the not taken count.
++    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
++
++    // The method data pointer needs to be updated to correspond to
++    // the next bytecode
++    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_final_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
++                                                     Register mdp,
++                                                     Register reg2,
++                                                     bool receiver_can_be_null) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label skip_receiver_profile;
++    if (receiver_can_be_null) {
++      Label not_null;
++      bne(receiver, R0, not_null);
++      delayed()->nop();
++      // We are making a call.  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++      beq(R0, R0, skip_receiver_profile);
++      delayed()->nop();
++      bind(not_null);
++    }
++
++    // Record the receiver type.
++    record_klass_in_profile(receiver, mdp, reg2, true);
++    bind(skip_receiver_profile);
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++#if INCLUDE_JVMCI
++void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) {
++  assert_different_registers(method, mdp, reg2);
++  if (ProfileInterpreter && MethodProfileWidth > 0) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label done;
++    record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth,
++      &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset()));
++    bind(done);
++
++    update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++#endif // INCLUDE_JVMCI
++
++// This routine creates a state machine for updating the multi-row
++// type profile at a virtual call site (or other type-sensitive bytecode).
++// The machine visits each row (of receiver/count) until the receiver type
++// is found, or until it runs out of rows.  At the same time, it remembers
++// the location of the first empty row.  (An empty row records null for its
++// receiver, and can be allocated for a newly-observed receiver type.)
++// Because there are two degrees of freedom in the state, a simple linear
++// search will not work; it must be a decision tree.  Hence this helper
++// function is recursive, to generate the required tree structured code.
++// It's the interpreter, so we are trading off code space for speed.
++// See below for example code.
++void InterpreterMacroAssembler::record_klass_in_profile_helper(
++                                        Register receiver, Register mdp,
++                                        Register reg2, int start_row,
++                                        Label& done, bool is_virtual_call) {
++  if (TypeProfileWidth == 0) {
++    if (is_virtual_call) {
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++    }
++    return;
++  }
++
++  int last_row = VirtualCallData::row_limit() - 1;
++  assert(start_row <= last_row, "must be work left to do");
++  // Test this row for both the receiver and for null.
++  // Take any of three different outcomes:
++  //   1. found receiver => increment count and goto done
++  //   2. found null => keep looking for case 1, maybe allocate this cell
++  //   3. found something else => keep looking for cases 1 and 2
++  // Case 3 is handled by a recursive call.
++  for (int row = start_row; row <= last_row; row++) {
++    Label next_test;
++    bool test_for_null_also = (row == start_row);
++
++    // See if the receiver is receiver[n].
++    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
++    test_mdp_data_at(mdp, recvr_offset, receiver,
++                     (test_for_null_also ? reg2 : noreg),
++                     next_test);
++    // (Reg2 now contains the receiver from the CallData.)
++
++    // The receiver is receiver[n].  Increment count[n].
++    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
++    increment_mdp_data_at(mdp, count_offset);
++    beq(R0, R0, done);
++    delayed()->nop();
++    bind(next_test);
++
++    if (test_for_null_also) {
++      Label found_null;
++      // Failed the equality check on receiver[n]...  Test for null.
++      if (start_row == last_row) {
++        // The only thing left to do is handle the null case.
++        if (is_virtual_call) {
++          beq(reg2, R0, found_null);
++          delayed()->nop();
++          // Receiver did not match any saved receiver and there is no empty row for it.
++          // Increment total counter to indicate polymorphic case.
++          increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++          beq(R0, R0, done);
++          delayed()->nop();
++          bind(found_null);
++        } else {
++          bne(reg2, R0, done);
++          delayed()->nop();
++        }
++        break;
++      }
++      // Since null is rare, make it be the branch-taken case.
++      beq(reg2, R0, found_null);
++      delayed()->nop();
++
++      // Put all the "Case 3" tests here.
++      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
++
++      // Found a null.  Keep searching for a matching receiver,
++      // but remember that this is an empty (unused) slot.
++      bind(found_null);
++    }
++  }
++
++  // In the fall-through case, we found no matching receiver, but we
++  // observed the receiver[start_row] is NULL.
++
++  // Fill in the receiver field and increment the count.
++  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
++  set_mdp_data_at(mdp, recvr_offset, receiver);
++  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
++  move(reg2, DataLayout::counter_increment);
++  set_mdp_data_at(mdp, count_offset, reg2);
++  if (start_row > 0) {
++    beq(R0, R0, done);
++    delayed()->nop();
++  }
++}
++
++// Example state machine code for three profile rows:
++//   // main copy of decision tree, rooted at row[1]
++//   if (row[0].rec == rec) { row[0].incr(); goto done; }
++//   if (row[0].rec != NULL) {
++//     // inner copy of decision tree, rooted at row[1]
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[1].rec != NULL) {
++//       // degenerate decision tree, rooted at row[2]
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       if (row[2].rec != NULL) { goto done; } // overflow
++//       row[2].init(rec); goto done;
++//     } else {
++//       // remember row[1] is empty
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       row[1].init(rec); goto done;
++//     }
++//   } else {
++//     // remember row[0] is empty
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[2].rec == rec) { row[2].incr(); goto done; }
++//     row[0].init(rec); goto done;
++//   }
++//   done:
++
++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
++                                                        Register mdp, Register reg2,
++                                                        bool is_virtual_call) {
++  assert(ProfileInterpreter, "must be profiling");
++  Label done;
++
++  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
++
++  bind (done);
++}
++
++void InterpreterMacroAssembler::profile_ret(Register return_bci,
++                                            Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++    uint row;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the total ret count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    for (row = 0; row < RetData::row_limit(); row++) {
++      Label next_test;
++
++      // See if return_bci is equal to bci[n]:
++      test_mdp_data_at(mdp,
++                       in_bytes(RetData::bci_offset(row)),
++                       return_bci, noreg,
++                       next_test);
++
++      // return_bci is equal to bci[n].  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
++
++      // The method data pointer needs to be updated to reflect the new target.
++      update_mdp_by_offset(mdp,
++                           in_bytes(RetData::bci_displacement_offset(row)));
++      beq(R0, R0, profile_continue);
++      delayed()->nop();
++      bind(next_test);
++    }
++
++    update_mdp_for_ret(return_bci);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
++  if (ProfileInterpreter && TypeProfileCasts) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    int count_offset = in_bytes(CounterData::count_offset());
++    // Back up the address, since we have already bumped the mdp.
++    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
++
++    // *Decrement* the counter.  We expect to see zero or small negatives.
++    increment_mdp_data_at(mdp, count_offset, true);
++
++    bind (profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++
++      // Record the object type.
++      record_klass_in_profile(klass, mdp, reg2, false);
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the default case count
++    increment_mdp_data_at(mdp,
++                          in_bytes(MultiBranchData::default_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         in_bytes(MultiBranchData::
++                                  default_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_case(Register index,
++                                                    Register mdp,
++                                                    Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Build the base (index * per_case_size_in_bytes()) +
++    // case_array_offset_in_bytes()
++    move(reg2, in_bytes(MultiBranchData::per_case_size()));
++    if (UseLEXT1) {
++      gsdmult(index, index, reg2);
++    } else {
++      dmult(index, reg2);
++      mflo(index);
++    }
++    daddiu(index, index, in_bytes(MultiBranchData::case_array_offset()));
++
++    // Update the case count
++    increment_mdp_data_at(mdp,
++                          index,
++                          in_bytes(MultiBranchData::relative_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         index,
++                         in_bytes(MultiBranchData::
++                                  relative_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::narrow(Register result) {
++
++  // Get method->_constMethod->_result_type
++  ld(T9, FP, frame::interpreter_frame_method_offset * wordSize);
++  ld(T9, T9, in_bytes(Method::const_offset()));
++  lbu(T9, T9, in_bytes(ConstMethod::result_type_offset()));
++
++  Label done, notBool, notByte, notChar;
++
++  // common case first
++  addiu(AT, T9, -T_INT);
++  beq(AT, R0, done);
++  delayed()->nop();
++
++  // mask integer result to narrower return type.
++  addiu(AT, T9, -T_BOOLEAN);
++  bne(AT, R0, notBool);
++  delayed()->nop();
++  andi(result, result, 0x1);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(notBool);
++  addiu(AT, T9, -T_BYTE);
++  bne(AT, R0, notByte);
++  delayed()->nop();
++  seb(result, result);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(notByte);
++  addiu(AT, T9, -T_CHAR);
++  bne(AT, R0, notChar);
++  delayed()->nop();
++  andi(result, result, 0xFFFF);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(notChar);
++  seh(result, result);
++
++  // Nothing to do for T_INT
++  bind(done);
++}
++
++
++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
++  Label update, next, none;
++
++  verify_oop(obj);
++
++  if (mdo_addr.index() != noreg) {
++    guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !");
++    guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !");
++    push(T0);
++    dsll(T0, mdo_addr.index(), mdo_addr.scale());
++    daddu(T0, T0, mdo_addr.base());
++  }
++
++  bne(obj, R0, update);
++  delayed()->nop();
++
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::null_seen);
++  if (mdo_addr.index() == noreg) {
++    sd(AT, mdo_addr);
++  } else {
++    sd(AT, T0, mdo_addr.disp());
++  }
++
++  beq(R0, R0, next);
++  delayed()->nop();
++
++  bind(update);
++  load_klass(obj, obj);
++
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  dextm(AT, obj, 2, 62);
++  beq(AT, R0, next);
++  delayed()->nop();
++
++  andi(AT, obj, TypeEntries::type_unknown);
++  bne(AT, R0, next);
++  delayed()->nop();
++
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  beq(AT, R0, none);
++  delayed()->nop();
++
++  daddiu(AT, AT, -(TypeEntries::null_seen));
++  beq(AT, R0, none);
++  delayed()->nop();
++
++  // There is a chance that the checks above (re-reading profiling
++  // data from memory) fail if another thread has just set the
++  // profiling to this obj's klass
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  dextm(AT, obj, 2, 62);
++  beq(AT, R0, next);
++  delayed()->nop();
++
++  // different than before. Cannot keep accurate profile.
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::type_unknown);
++  if (mdo_addr.index() == noreg) {
++    sd(AT, mdo_addr);
++  } else {
++    sd(AT, T0, mdo_addr.disp());
++  }
++  beq(R0, R0, next);
++  delayed()->nop();
++
++  bind(none);
++  // first time here. Set profile type.
++  if (mdo_addr.index() == noreg) {
++    sd(obj, mdo_addr);
++  } else {
++    sd(obj, T0, mdo_addr.disp());
++  }
++
++  bind(next);
++  if (mdo_addr.index() != noreg) {
++    pop(T0);
++  }
++}
++
++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
++  if (!ProfileInterpreter) {
++    return;
++  }
++
++  if (MethodData::profile_arguments() || MethodData::profile_return()) {
++    Label profile_continue;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
++
++    lb(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start);
++    li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
++    bne(tmp, AT, profile_continue);
++    delayed()->nop();
++
++
++    if (MethodData::profile_arguments()) {
++      Label done;
++      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
++      if (Assembler::is_simm16(off_to_args)) {
++        daddiu(mdp, mdp, off_to_args);
++      } else {
++        move(AT, off_to_args);
++        daddu(mdp, mdp, AT);
++      }
++
++
++      for (int i = 0; i < TypeProfileArgsLimit; i++) {
++        if (i > 0 || MethodData::profile_return()) {
++          // If return value type is profiled we may have no argument to profile
++          ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++          if (Assembler::is_simm16(-1 * i * TypeStackSlotEntries::per_arg_count())) {
++            addiu32(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count());
++          } else {
++            li(AT, i*TypeStackSlotEntries::per_arg_count());
++            subu32(tmp, tmp, AT);
++          }
++
++          li(AT, TypeStackSlotEntries::per_arg_count());
++          slt(AT, tmp, AT);
++          bne(AT, R0, done);
++          delayed()->nop();
++        }
++        ld(tmp, callee, in_bytes(Method::const_offset()));
++
++        lhu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++        // stack offset o (zero based) from the start of the argument
++        // list, for n arguments translates into offset n - o - 1 from
++        // the end of the argument list
++        ld(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args);
++        subu(tmp, tmp, AT);
++
++        addiu32(tmp, tmp, -1);
++
++        Address arg_addr = argument_address(tmp);
++        ld(tmp, arg_addr);
++
++        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
++        profile_obj_type(tmp, mdo_arg_addr);
++
++        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
++        if (Assembler::is_simm16(to_add)) {
++          daddiu(mdp, mdp, to_add);
++        } else {
++          move(AT, to_add);
++          daddu(mdp, mdp, AT);
++        }
++
++        off_to_args += to_add;
++      }
++
++      if (MethodData::profile_return()) {
++        ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++        int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count();
++        if (Assembler::is_simm16(-1 * tmp_arg_counts)) {
++          addiu32(tmp, tmp, -1 * tmp_arg_counts);
++        } else {
++          move(AT, tmp_arg_counts);
++          subu32(mdp, mdp, AT);
++        }
++      }
++
++      bind(done);
++
++      if (MethodData::profile_return()) {
++        // We're right after the type profile for the last
++        // argument. tmp is the number of cells left in the
++        // CallTypeData/VirtualCallTypeData to reach its end. Non null
++        // if there's a return to profile.
++        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
++        sll(tmp, tmp, exact_log2(DataLayout::cell_size));
++        daddu(mdp, mdp, tmp);
++      }
++      sd(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize);
++    } else {
++      assert(MethodData::profile_return(), "either profile call args or call ret");
++      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
++    }
++
++    // mdp points right after the end of the
++    // CallTypeData/VirtualCallTypeData, right after the cells for the
++    // return value type if there's one
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
++  assert_different_registers(mdp, ret, tmp, _bcp_register);
++  if (ProfileInterpreter && MethodData::profile_return()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    if (MethodData::profile_return_jsr292_only()) {
++      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++      // If we don't profile all invoke bytecodes we must make sure
++      // it's a bytecode we indeed profile. We can't go back to the
++      // begining of the ProfileData we intend to update to check its
++      // type because we're right after it and we don't known its
++      // length
++      Label do_profile;
++      lb(tmp, _bcp_register, 0);
++      daddiu(AT, tmp, -1 * Bytecodes::_invokedynamic);
++      beq(AT, R0, do_profile);
++      delayed()->daddiu(AT, tmp, -1 * Bytecodes::_invokehandle);
++      beq(AT, R0, do_profile);
++      delayed()->nop();
++
++      get_method(tmp);
++      lhu(tmp, tmp, Method::intrinsic_id_offset_in_bytes());
++      li(AT, vmIntrinsics::_compiledLambdaForm);
++      bne(tmp, AT, profile_continue);
++      delayed()->nop();
++
++      bind(do_profile);
++    }
++
++    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
++    daddu(tmp, ret, R0);
++    profile_obj_type(tmp, mdo_ret_addr);
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
++  guarantee(T9 == tmp1, "You are reqired to use T9 as the index register for MIPS !");
++
++  if (ProfileInterpreter && MethodData::profile_parameters()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Load the offset of the area within the MDO used for
++    // parameters. If it's negative we're not profiling any parameters
++    lw(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()));
++    bltz(tmp1, profile_continue);
++    delayed()->nop();
++
++    // Compute a pointer to the area for parameters from the offset
++    // and move the pointer to the slot for the last
++    // parameters. Collect profiling from last parameter down.
++    // mdo start + parameters offset + array length - 1
++    daddu(mdp, mdp, tmp1);
++    ld(tmp1, mdp, in_bytes(ArrayData::array_len_offset()));
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++
++
++    Label loop;
++    bind(loop);
++
++    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
++    int type_base = in_bytes(ParametersTypeData::type_offset(0));
++    Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size);
++    Address arg_type(mdp, tmp1, per_arg_scale, type_base);
++
++    // load offset on the stack from the slot for this parameter
++    dsll(AT, tmp1, per_arg_scale);
++    daddu(AT, AT, mdp);
++    ld(tmp2, AT, off_base);
++
++    subu(tmp2, R0, tmp2);
++
++    // read the parameter from the local area
++    dsll(AT, tmp2, Interpreter::logStackElementSize);
++    daddu(AT, AT, _locals_register);
++    ld(tmp2, AT, 0);
++
++    // profile the parameter
++    profile_obj_type(tmp2, arg_type);
++
++    // go to next parameter
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++    bgtz(tmp1, loop);
++    delayed()->nop();
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
++  if (state == atos) {
++    MacroAssembler::verify_oop(reg);
++  }
++}
++
++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
++}
++#endif // !CC_INTERP
++
++
++void InterpreterMacroAssembler::notify_method_entry() {
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, L);
++    delayed()->nop();
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::post_method_entry));
++    bind(L);
++  }
++
++  {
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++                                  //Rthread,
++                                  thread,
++                                  //Rmethod);
++                                  S3);
++  }
++
++}
++
++void InterpreterMacroAssembler::notify_method_exit(
++    TosState state, NotifyMethodExitMode mode) {
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
++    Label skip;
++    // Note: frame::interpreter_frame_result has a dependency on how the
++    // method result is saved across the call to post_method_exit. If this
++    // is changed then the interpreter_frame_result implementation will
++    // need to be updated too.
++
++    // template interpreter will leave it on the top of the stack.
++    push(state);
++    lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, skip);
++    delayed()->nop();
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
++    bind(skip);
++    pop(state);
++  }
++
++  {
++    // Dtrace notification
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    push(state);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++                 //Rthread, Rmethod);
++                 thread, S3);
++    pop(state);
++  }
++}
++
++// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
++                                                        int increment, int mask,
++                                                        Register scratch, bool preloaded,
++                                                        Condition cond, Label* where) {
++  assert_different_registers(scratch, AT);
++
++  if (!preloaded) {
++    lw(scratch, counter_addr);
++  }
++  addiu32(scratch, scratch, increment);
++  sw(scratch, counter_addr);
++
++  move(AT, mask);
++  andr(scratch, scratch, AT);
++
++  if (cond == Assembler::zero) {
++    beq(scratch, R0, *where);
++    delayed()->nop();
++  } else {
++    unimplemented();
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/interp_masm_mips.hpp b/src/hotspot/cpu/mips/interp_masm_mips.hpp
+--- a/src/hotspot/cpu/mips/interp_masm_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/interp_masm_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,276 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
++#define CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
++
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/invocationCounter.hpp"
++#include "runtime/frame.hpp"
++
++// This file specializes the assember with interpreter-specific macros
++
++
++class InterpreterMacroAssembler: public MacroAssembler {
++#ifndef CC_INTERP
++ private:
++
++  Register _locals_register; // register that contains the pointer to the locals
++  Register _bcp_register; // register that contains the bcp
++
++ protected:
++  // Interpreter specific version of call_VM_base
++  virtual void call_VM_leaf_base(address entry_point,
++                                 int number_of_arguments);
++
++  virtual void call_VM_base(Register oop_result,
++                            Register java_thread,
++                            Register last_java_sp,
++                            address  entry_point,
++                            int number_of_arguments,
++                            bool check_exceptions);
++
++  // base routine for all dispatches
++  void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false);
++#endif // CC_INTERP
++
++ public:
++  void jump_to_entry(address entry);
++  // narrow int return value
++  void narrow(Register result);
++
++  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {}
++
++  void  get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++  void  get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  void load_earlyret_value(TosState state);
++
++#ifdef CC_INTERP
++  void save_bcp()                                          { /*  not needed in c++ interpreter and harmless */ }
++  void restore_bcp()                                       { /*  not needed in c++ interpreter and harmless */ }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg);
++
++#else
++
++  // Interpreter-specific registers
++  void save_bcp() {
++    sd(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize);
++  }
++
++  void restore_bcp() {
++    ld(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize);
++  }
++
++  void restore_locals() {
++    ld(LVP, FP, frame::interpreter_frame_locals_offset * wordSize);
++  }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg) {
++    ld(reg, FP, frame::interpreter_frame_method_offset * wordSize);
++  }
++
++  void get_const(Register reg){
++    get_method(reg);
++    ld(reg, reg, in_bytes(Method::const_offset()));
++  }
++
++  void get_constant_pool(Register reg) {
++    get_const(reg);
++    ld(reg, reg, in_bytes(ConstMethod::constants_offset()));
++  }
++
++  void get_constant_pool_cache(Register reg) {
++    get_constant_pool(reg);
++    ld(reg, reg, ConstantPool::cache_offset_in_bytes());
++  }
++
++  void get_cpool_and_tags(Register cpool, Register tags) {
++    get_constant_pool(cpool);
++    ld(tags, cpool, ConstantPool::tags_offset_in_bytes());
++  }
++
++  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
++  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_method_counters(Register method, Register mcs, Label& skip);
++
++  // load cpool->resolved_references(index);
++  void load_resolved_reference_at_index(Register result, Register index, Register tmp);
++
++  // load cpool->resolved_klass_at(index)
++  void load_resolved_klass_at_index(Register cpool,  // the constant pool (corrupted on return)
++                                    Register index,  // the constant pool index (corrupted on return)
++                                    Register klass); // contains the Klass on return
++
++  void pop_ptr(   Register r = FSR);
++  void pop_i(     Register r = FSR);
++  void pop_l(     Register r = FSR);
++  void pop_f(FloatRegister r = FSF);
++  void pop_d(FloatRegister r = FSF);
++
++  void push_ptr(   Register r = FSR);
++  void push_i(     Register r = FSR);
++  void push_l(     Register r = FSR);
++  void push_f(FloatRegister r = FSF);
++  void push_d(FloatRegister r = FSF);
++
++  void pop(Register r ) { ((MacroAssembler*)this)->pop(r); }
++
++  void push(Register r ) { ((MacroAssembler*)this)->push(r); }
++
++  void pop(TosState state); // transition vtos -> state
++  void push(TosState state); // transition state -> vtos
++
++  void empty_expression_stack() {
++    ld(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    // NULL last_sp until next java call
++    sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  }
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void load_ptr(int n, Register val);
++  void store_ptr(int n, Register val);
++
++  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
++  // a subtype of super_klass.
++  //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
++  void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype );
++
++  // Dispatching
++  void dispatch_prolog(TosState state, int step = 0);
++  void dispatch_epilog(TosState state, int step = 0);
++  void dispatch_only(TosState state, bool generate_poll = false);
++  void dispatch_only_normal(TosState state);
++  void dispatch_only_noverify(TosState state);
++  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
++  void dispatch_via (TosState state, address* table);
++
++  // jump to an invoked target
++  void prepare_to_jump_from_interpreted();
++  void jump_from_interpreted(Register method, Register temp);
++
++
++  // Returning from interpreted functions
++  //
++  // Removes the current activation (incl. unlocking of monitors)
++  // and sets up the return address.  This code is also used for
++  // exception unwindwing. In that case, we do not want to throw
++  // IllegalMonitorStateExceptions, since that might get us into an
++  // infinite rethrow exception loop.
++  // Additionally this code is used for popFrame and earlyReturn.
++  // In popFrame case we want to skip throwing an exception,
++  // installing an exception, and notifying jvmdi.
++  // In earlyReturn case we only want to skip throwing an exception
++  // and installing an exception.
++  void remove_activation(TosState state, Register ret_addr,
++                         bool throw_monitor_exception = true,
++                         bool install_monitor_exception = true,
++                         bool notify_jvmdi = true);
++#endif // CC_INTERP
++
++  // Object locking
++  void lock_object  (Register lock_reg);
++  void unlock_object(Register lock_reg);
++
++#ifndef CC_INTERP
++
++  // Interpreter profiling operations
++  void set_method_data_pointer_for_bcp();
++  void test_method_data_pointer(Register mdp, Label& zero_continue);
++  void verify_method_data_pointer();
++
++  void set_mdp_data_at(Register mdp_in, int constant, Register value);
++  void increment_mdp_data_at(Address data, bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, int constant,
++                             bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
++                             bool decrement = false);
++  void increment_mask_and_jump(Address counter_addr,
++                               int increment, int mask,
++                               Register scratch, bool preloaded,
++                               Condition cond, Label* where);
++  void set_mdp_flag_at(Register mdp_in, int flag_constant);
++  void test_mdp_data_at(Register mdp_in, int offset, Register value,
++                        Register test_value_out,
++                        Label& not_equal_continue);
++
++  void record_klass_in_profile(Register receiver, Register mdp,
++                               Register reg2, bool is_virtual_call);
++  void record_klass_in_profile_helper(Register receiver, Register mdp,
++                                      Register reg2, int start_row,
++                                      Label& done, bool is_virtual_call);
++
++  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
++  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
++  void update_mdp_by_constant(Register mdp_in, int constant);
++  void update_mdp_for_ret(Register return_bci);
++
++  void profile_taken_branch(Register mdp, Register bumped_count);
++  void profile_not_taken_branch(Register mdp);
++  void profile_call(Register mdp);
++  void profile_final_call(Register mdp);
++  void profile_virtual_call(Register receiver, Register mdp,
++                            Register scratch2,
++                            bool receiver_can_be_null = false);
++  void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN;
++  void profile_ret(Register return_bci, Register mdp);
++  void profile_null_seen(Register mdp);
++  void profile_typecheck(Register mdp, Register klass, Register scratch);
++  void profile_typecheck_failed(Register mdp);
++  void profile_switch_default(Register mdp);
++  void profile_switch_case(Register index_in_scratch, Register mdp,
++                           Register scratch2);
++
++  // Debugging
++  // only if +VerifyOops && state == atos
++  void verify_oop(Register reg, TosState state = atos);
++  // only if +VerifyFPU  && (state == ftos || state == dtos)
++  void verify_FPU(int stack_depth, TosState state = ftos);
++
++  void profile_obj_type(Register obj, const Address& mdo_addr);
++  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
++  void profile_return_type(Register mdp, Register ret, Register tmp);
++  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2);
++#endif // !CC_INTERP
++
++  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
++
++  // support for jvmti/dtrace
++  void notify_method_entry();
++  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
++};
++
++#endif // CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp
+--- a/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,252 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/universe.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/signature.hpp"
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of SignatureHandlerGenerator
++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
++      const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
++  _masm = new MacroAssembler(buffer);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
++  __ ld(temp(), from(), Interpreter::local_offset_in_bytes(from_offset));
++  __ sd(temp(), to(), to_offset * longSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) {
++  __ addiu(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) );
++  __ lw(AT, from(), Interpreter::local_offset_in_bytes(from_offset) );
++
++  __ movz(temp(), R0, AT);
++  __ sw(temp(), to(), to_offset * wordSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
++  // generate code to handle arguments
++  iterate(fingerprint);
++  // return result handler
++  __ li(V0, AbstractInterpreter::result_handler(method()->result_type()));
++  // return
++  __ jr(RA);
++  __ delayed()->nop();
++
++  __ flush();
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ lw(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ sw(temp(), jni_arg.as_caller_address());
++  }
++}
++
++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ ld(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ sd(temp(), jni_arg.as_caller_address());
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
++  Argument jni_arg(jni_offset());
++
++  // the handle for a receiver will never be null
++  bool do_NULL_check = offset() != 0 || is_static();
++  if (do_NULL_check) {
++    __ ld(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ daddiu((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ movz((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), R0, AT);
++  } else {
++    __ daddiu(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset()));
++  }
++
++  if (!jni_arg.is_Register())
++    __ sd(temp(), jni_arg.as_caller_address());
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ lwc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ sw(temp(), jni_arg.as_caller_address());
++  }
++}
++
++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ ldc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ sd(temp(), jni_arg.as_caller_address());
++  }
++}
++
++
++Register InterpreterRuntime::SignatureHandlerGenerator::from()       { return LVP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::to()         { return SP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::temp()       { return T8; }
++
++// Implementation of SignatureHandlerLibrary
++
++void SignatureHandlerLibrary::pd_set_handler(address handler) {}
++
++
++class SlowSignatureHandler
++  : public NativeSignatureIterator {
++ private:
++  address   _from;
++  intptr_t* _to;
++  intptr_t* _reg_args;
++  intptr_t* _fp_identifiers;
++  unsigned int _num_args;
++
++  virtual void pass_int()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_register_parameters) {
++      *_reg_args++ = from_obj;
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_long()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2 * Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_register_parameters) {
++      *_reg_args++ = from_obj;
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_object()
++  {
++    intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++    if (_num_args < Argument::n_register_parameters) {
++      *_reg_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++      _num_args++;
++    } else {
++      *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++    }
++  }
++
++  virtual void pass_float()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_float_register_parameters) {
++      *_reg_args++ = from_obj;
++      *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_double()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2*Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_float_register_parameters) {
++      *_reg_args++ = from_obj;
++      *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++ public:
++  SlowSignatureHandler(methodHandle method, address from, intptr_t* to)
++    : NativeSignatureIterator(method)
++  {
++    _from = from;
++    _to   = to;
++
++    // see TemplateInterpreterGenerator::generate_slow_signature_handler()
++    _reg_args = to - Argument::n_register_parameters + jni_offset() - 1;
++    _fp_identifiers = to - 1;
++    *(int*) _fp_identifiers = 0;
++    _num_args = jni_offset();
++  }
++};
++
++
++IRT_ENTRY(address,
++          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
++                                                     Method* method,
++                                                     intptr_t* from,
++                                                     intptr_t* to))
++  methodHandle m(thread, (Method*)method);
++  assert(m->is_native(), "sanity check");
++
++  // handle arguments
++  SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1));
++
++  // return result handler
++  return Interpreter::result_handler(m->result_type());
++IRT_END
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/interpreterRT_mips.hpp b/src/hotspot/cpu/mips/interpreterRT_mips.hpp
+--- a/src/hotspot/cpu/mips/interpreterRT_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/interpreterRT_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,60 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
++#define CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
++
++// This is included in the middle of class Interpreter.
++// Do not include files here.
++
++// native method calls
++
++class SignatureHandlerGenerator: public NativeSignatureIterator {
++ private:
++  MacroAssembler* _masm;
++
++  void move(int from_offset, int to_offset);
++
++  void box(int from_offset, int to_offset);
++  void pass_int();
++  void pass_long();
++  void pass_object();
++  void pass_float();
++  void pass_double();
++
++ public:
++  // Creation
++  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
++
++  // Code generation
++  void generate(uint64_t fingerprint);
++
++  // Code generation support
++  static Register from();
++  static Register to();
++  static Register temp();
++};
++
++#endif // CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp
+--- a/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,87 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
++#define CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
++
++private:
++
++  // FP value associated with _last_Java_sp:
++  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
++
++public:
++  // Each arch must define reset, save, restore
++  // These are used by objects that only care about:
++  //  1 - initializing a new state (thread creation, javaCalls)
++  //  2 - saving a current state (javaCalls)
++  //  3 - restoring an old state (javaCalls)
++
++  void clear(void) {
++    // clearing _last_Java_sp must be first
++    _last_Java_sp = NULL;
++    // fence?
++    _last_Java_fp = NULL;
++    _last_Java_pc = NULL;
++  }
++
++  void copy(JavaFrameAnchor* src) {
++    // In order to make sure the transition state is valid for "this"
++    // We must clear _last_Java_sp before copying the rest of the new data
++    //
++    // Hack Alert: Temporary bugfix for 4717480/4721647
++    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
++    // unless the value is changing
++    //
++    if (_last_Java_sp != src->_last_Java_sp)
++      _last_Java_sp = NULL;
++
++    _last_Java_fp = src->_last_Java_fp;
++    _last_Java_pc = src->_last_Java_pc;
++    // Must be last so profiler will always see valid frame if has_last_frame() is true
++    _last_Java_sp = src->_last_Java_sp;
++  }
++
++  // Always walkable
++  bool walkable(void) { return true; }
++  // Never any thing to do since we are always walkable and can find address of return addresses
++  void make_walkable(JavaThread* thread) { }
++
++  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
++
++  address last_Java_pc(void)                     { return _last_Java_pc; }
++
++private:
++
++  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
++
++public:
++
++  void set_last_Java_sp(intptr_t* sp)            { _last_Java_sp = sp; }
++
++  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
++  // Assert (last_Java_sp == NULL || fp == NULL)
++  void set_last_Java_fp(intptr_t* fp)                { _last_Java_fp = fp; }
++
++#endif // CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp
+--- a/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,167 @@
++/*
++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/codeBlob.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/safepoint.hpp"
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#define BUFFER_SIZE 30*wordSize
++
++// Instead of issuing lfence for LoadLoad barrier, we create data dependency
++// between loads, which is more efficient than lfence.
++
++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
++  const char *name = NULL;
++  switch (type) {
++    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
++    case T_BYTE:    name = "jni_fast_GetByteField";    break;
++    case T_CHAR:    name = "jni_fast_GetCharField";    break;
++    case T_SHORT:   name = "jni_fast_GetShortField";   break;
++    case T_INT:     name = "jni_fast_GetIntField";     break;
++    case T_LONG:    name = "jni_fast_GetLongField";    break;
++    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
++    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
++    default:        ShouldNotReachHere();
++  }
++  ResourceMark rm;
++  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
++  CodeBuffer cbuf(blob);
++  MacroAssembler* masm = new MacroAssembler(&cbuf);
++  address fast_entry = __ pc();
++
++  Label slow;
++
++  //  return pc        RA
++  //  jni env          A0
++  //  obj              A1
++  //  jfieldID         A2
++
++  address counter_addr = SafepointSynchronize::safepoint_counter_addr();
++  __ set64(AT, (long)counter_addr);
++  __ lw(T1, AT, 0);
++
++  // Parameters(A0~A3) should not be modified, since they will be used in slow path
++  __ andi(AT, T1, 1);
++  __ bne(AT, R0, slow);
++  __ delayed()->nop();
++
++  __ move(T0, A1);
++  // Both T0 and T9 are clobbered by try_resolve_jobject_in_native.
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->try_resolve_jobject_in_native(masm, /* jni_env */ A0, T0, T9, slow);
++
++  __ dsrl(T2, A2, 2);                 // offset
++  __ daddu(T0, T0, T2);
++
++  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
++  speculative_load_pclist[count] = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ lbu (V0, T0, 0); break;
++    case T_BYTE:    __ lb  (V0, T0, 0); break;
++    case T_CHAR:    __ lhu (V0, T0, 0); break;
++    case T_SHORT:   __ lh  (V0, T0, 0); break;
++    case T_INT:     __ lw  (V0, T0, 0); break;
++    case T_LONG:    __ ld  (V0, T0, 0); break;
++    case T_FLOAT:   __ lwc1(F0, T0, 0); break;
++    case T_DOUBLE:  __ ldc1(F0, T0, 0); break;
++    default:        ShouldNotReachHere();
++  }
++
++  __ set64(AT, (long)counter_addr);
++  __ lw(AT, AT, 0);
++  __ bne(T1, AT, slow);
++  __ delayed()->nop();
++
++  __ jr(RA);
++  __ delayed()->nop();
++
++  slowcase_entry_pclist[count++] = __ pc();
++  __ bind (slow);
++  address slow_case_addr = NULL;
++  switch (type) {
++    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
++    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
++    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
++    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
++    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
++    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
++    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
++    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
++    default:        ShouldNotReachHere();
++  }
++  __ jmp(slow_case_addr);
++  __ delayed()->nop();
++
++  __ flush ();
++
++  return fast_entry;
++}
++
++address JNI_FastGetField::generate_fast_get_boolean_field() {
++  return generate_fast_get_int_field0(T_BOOLEAN);
++}
++
++address JNI_FastGetField::generate_fast_get_byte_field() {
++  return generate_fast_get_int_field0(T_BYTE);
++}
++
++address JNI_FastGetField::generate_fast_get_char_field() {
++  return generate_fast_get_int_field0(T_CHAR);
++}
++
++address JNI_FastGetField::generate_fast_get_short_field() {
++  return generate_fast_get_int_field0(T_SHORT);
++}
++
++address JNI_FastGetField::generate_fast_get_int_field() {
++  return generate_fast_get_int_field0(T_INT);
++}
++
++address JNI_FastGetField::generate_fast_get_long_field() {
++  return generate_fast_get_int_field0(T_LONG);
++}
++
++address JNI_FastGetField::generate_fast_get_float_field() {
++  return generate_fast_get_int_field0(T_FLOAT);
++}
++
++address JNI_FastGetField::generate_fast_get_double_field() {
++  return generate_fast_get_int_field0(T_DOUBLE);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/jniTypes_mips.hpp b/src/hotspot/cpu/mips/jniTypes_mips.hpp
+--- a/src/hotspot/cpu/mips/jniTypes_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/jniTypes_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,144 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_JNITYPES_MIPS_HPP
++#define CPU_MIPS_VM_JNITYPES_MIPS_HPP
++
++#include "jni.h"
++#include "memory/allocation.hpp"
++#include "oops/oop.hpp"
++
++// This file holds platform-dependent routines used to write primitive jni
++// types to the array of arguments passed into JavaCalls::call
++
++class JNITypes : AllStatic {
++  // These functions write a java primitive type (in native format)
++  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
++  // I.e., they are functionally 'push' operations if they have a 'pos'
++  // formal parameter.  Note that jlong's and jdouble's are written
++  // _in reverse_ of the order in which they appear in the interpreter
++  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
++  // reverse the argument list constructed by JavaCallArguments (see
++  // javaCalls.hpp).
++
++private:
++
++  // 32bit Helper routines.
++  static inline void    put_int2r(jint *from, intptr_t *to)           { *(jint *)(to++) = from[1];
++                                                                        *(jint *)(to  ) = from[0]; }
++  static inline void    put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; }
++
++public:
++  // In MIPS64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[]
++  //   is 8 bytes.
++  // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values.
++  // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded.
++  // This error occurs in ReflectInvoke.java
++  // The parameter of DD(int) should be 4 instead of 0x550000004.
++  //
++  // See: [runtime/javaCalls.hpp]
++
++  static inline void    put_int(jint  from, intptr_t *to)           { *(intptr_t *)(to +   0  ) =  from; }
++  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) =  from; }
++  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; }
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_long(jlong  from, intptr_t *to) {
++    *(jlong*) (to + 1) = from;
++    *(jlong*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = from;
++    *(jlong*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = *from;
++    *(jlong*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // Oops are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
++  static inline void    put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
++  static inline void    put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
++
++  // Floats are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
++  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
++  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
++
++#undef _JNI_SLOT_OFFSET
++#define _JNI_SLOT_OFFSET 0
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_double(jdouble  from, intptr_t *to) {
++    *(jdouble*) (to + 1) = from;
++    *(jdouble*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = from;
++    *(jdouble*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = *from;
++    *(jdouble*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // The get_xxx routines, on the other hand, actually _do_ fetch
++  // java primitive types from the interpreter stack.
++  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
++  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
++  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
++  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
++  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
++#undef _JNI_SLOT_OFFSET
++};
++
++#endif // CPU_MIPS_VM_JNITYPES_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/macroAssembler_mips.cpp b/src/hotspot/cpu/mips/macroAssembler_mips.cpp
+--- a/src/hotspot/cpu/mips/macroAssembler_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/macroAssembler_mips.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,4257 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "jvm.h"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/disassembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_mips.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/safepoint.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++
++#ifdef COMPILER2
++#include "opto/intrinsicnode.hpp"
++#endif
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of MacroAssembler
++
++intptr_t MacroAssembler::i[32] = {0};
++float MacroAssembler::f[32] = {0.0};
++
++void MacroAssembler::print(outputStream *s) {
++  unsigned int k;
++  for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
++    s->print_cr("i%d = 0x%.16lx", k, i[k]);
++  }
++  s->cr();
++
++  for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
++    s->print_cr("f%d = %f", k, f[k]);
++  }
++  s->cr();
++}
++
++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
++
++void MacroAssembler::save_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ sw (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ swc1 (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++void MacroAssembler::restore_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ lw (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++
++void MacroAssembler::pd_patch_instruction(address branch, address target) {
++  jint& stub_inst = *(jint*) branch;
++  jint *pc = (jint *)branch;
++
++  if((opcode(stub_inst) == special_op) && (special(stub_inst) == daddu_op)) {
++    //b_far:
++    //  move(AT, RA); // daddu
++    //  emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
++    //  nop();
++    //  lui(T9, 0); // to be patched
++    //  ori(T9, 0);
++    //  daddu(T9, T9, RA);
++    //  move(RA, AT);
++    //  jr(T9);
++
++    assert(opcode(pc[3]) == lui_op
++        && opcode(pc[4]) == ori_op
++        && special(pc[5]) == daddu_op, "Not a branch label patch");
++    if(!(opcode(pc[3]) == lui_op
++          && opcode(pc[4]) == ori_op
++          && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
++
++    int offset = target - branch;
++    if (!is_simm16(offset)) {
++      pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
++      pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
++    } else {
++      // revert to "beq + nop"
++      CodeBuffer cb(branch, 4 * 10);
++      MacroAssembler masm(&cb);
++#define __ masm.
++      __ b(target);
++      __ delayed()->nop();
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++    }
++    return;
++  } else if (special(pc[4]) == jr_op
++             && opcode(pc[4]) == special_op
++             && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
++    //jmp_far:
++    //  patchable_set48(T9, target);
++    //  jr(T9);
++    //  nop();
++
++    CodeBuffer cb(branch, 4 * 4);
++    MacroAssembler masm(&cb);
++    masm.patchable_set48(T9, (long)(target));
++    return;
++  }
++
++#ifndef PRODUCT
++  if (!is_simm16((target - branch - 4) >> 2)) {
++    tty->print_cr("Illegal patching: branch = " INTPTR_FORMAT ", target = " INTPTR_FORMAT, p2i(branch), p2i(target));
++    tty->print_cr("======= Start decoding at branch = " INTPTR_FORMAT " =======", p2i(branch));
++    Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty);
++    tty->print_cr("======= End of decoding =======");
++  }
++#endif
++
++  stub_inst = patched_branch(target - branch, stub_inst, 0);
++}
++
++static inline address first_cache_address() {
++  return CodeCache::low_bound() + sizeof(HeapBlock::Header);
++}
++
++static inline address last_cache_address() {
++  return CodeCache::high_bound() - Assembler::InstructionSize;
++}
++
++int MacroAssembler::call_size(address target, bool far, bool patchable) {
++  if (patchable) return 6 << Assembler::LogInstructionSize;
++  if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
++  return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
++}
++
++// Can we reach target using jal/j from anywhere
++// in the code cache (because code can be relocated)?
++bool MacroAssembler::reachable_from_cache(address target) {
++  address cl = first_cache_address();
++  address ch = last_cache_address();
++
++  return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch);
++}
++
++bool MacroAssembler::reachable_from_cache() {
++  if (ForceUnreachable) {
++    return false;
++  } else {
++    address cl = first_cache_address();
++    address ch = last_cache_address();
++
++    return fit_in_jal(cl, ch);
++  }
++}
++
++void MacroAssembler::general_jump(address target) {
++  if (reachable_from_cache(target)) {
++    j(target);
++    delayed()->nop();
++  } else {
++    set64(T9, (long)target);
++    jr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_general_jump(address target) {
++  if (reachable_from_cache(target)) {
++    //j(target);
++    //nop();
++    return 2;
++  } else {
++    //set64(T9, (long)target);
++    //jr(T9);
++    //nop();
++    return insts_for_set64((jlong)target) + 2;
++  }
++}
++
++void MacroAssembler::patchable_jump(address target) {
++  if (reachable_from_cache(target)) {
++    nop();
++    nop();
++    nop();
++    nop();
++    j(target);
++    delayed()->nop();
++  } else {
++    patchable_set48(T9, (long)target);
++    jr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_patchable_jump(address target) {
++  return 6;
++}
++
++void MacroAssembler::general_call(address target) {
++  if (reachable_from_cache(target)) {
++    jal(target);
++    delayed()->nop();
++  } else {
++    set64(T9, (long)target);
++    jalr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_general_call(address target) {
++  if (reachable_from_cache(target)) {
++    //jal(target);
++    //nop();
++    return 2;
++  } else {
++    //set64(T9, (long)target);
++    //jalr(T9);
++    //nop();
++    return insts_for_set64((jlong)target) + 2;
++  }
++}
++
++void MacroAssembler::patchable_call(address target) {
++  if (reachable_from_cache(target)) {
++    nop();
++    nop();
++    nop();
++    nop();
++    jal(target);
++    delayed()->nop();
++  } else {
++    patchable_set48(T9, (long)target);
++    jalr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_patchable_call(address target) {
++  return 6;
++}
++
++// Maybe emit a call via a trampoline.  If the code cache is small
++// trampolines won't be emitted.
++
++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) {
++  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
++  assert(entry.rspec().type() == relocInfo::runtime_call_type
++         || entry.rspec().type() == relocInfo::opt_virtual_call_type
++         || entry.rspec().type() == relocInfo::static_call_type
++         || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
++
++  address target = entry.target();
++  if (!reachable_from_cache()) {
++    address stub = emit_trampoline_stub(offset(), target);
++    if (stub == NULL) {
++      return NULL; // CodeCache is full
++    }
++  }
++
++  if (cbuf) cbuf->set_insts_mark();
++  relocate(entry.rspec());
++
++  if (reachable_from_cache()) {
++    nop();
++    nop();
++    nop();
++    nop();
++    jal(target);
++    delayed()->nop();
++  } else {
++    // load the call target from the trampoline stub
++    // branch
++    long dest = (long)pc();
++    dest += (dest & 0x8000) << 1;
++    lui(T9, dest >> 32);
++    ori(T9, T9, split_low(dest >> 16));
++    dsll(T9, T9, 16);
++    ld(T9, T9, simm16(split_low(dest)));
++    jalr(T9);
++    delayed()->nop();
++  }
++  return pc();
++}
++
++// Emit a trampoline stub for a call to a target which is too far away.
++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
++                                             address dest) {
++  // Max stub size: alignment nop, TrampolineStub.
++  address stub = start_a_stub(NativeInstruction::nop_instruction_size
++                   + NativeCallTrampolineStub::instruction_size);
++  if (stub == NULL) {
++    return NULL;  // CodeBuffer::expand failed
++  }
++
++  // Create a trampoline stub relocation which relates this trampoline stub
++  // with the call instruction at insts_call_instruction_offset in the
++  // instructions code-section.
++  align(wordSize);
++  relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
++                                            + insts_call_instruction_offset));
++  emit_int64((int64_t)dest);
++  end_a_stub();
++  return stub;
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
++  u_char * cur_pc = pc();
++
++  // Near/Far jump
++  if(is_simm16((entry - pc() - 4) / 4)) {
++    Assembler::beq(rs, rt, offset(entry));
++  } else {
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(entry);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    beq_far(rs, rt, target(L));
++  } else {
++    u_char * cur_pc = pc();
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(L);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
++  u_char * cur_pc = pc();
++
++  //Near/Far jump
++  if(is_simm16((entry - pc() - 4) / 4)) {
++    Assembler::bne(rs, rt, offset(entry));
++  } else {
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(entry);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    bne_far(rs, rt, target(L));
++  } else {
++    u_char * cur_pc = pc();
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(L);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++
++  bne(rs, rt, not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++
++  beq(rs, rt, not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::bc1t_long(Label& L) {
++  Label not_taken;
++
++  bc1f(not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::bc1f_long(Label& L) {
++  Label not_taken;
++
++  bc1t(not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::b_far(Label& L) {
++  if (L.is_bound()) {
++    b_far(target(L));
++  } else {
++    volatile address dest = target(L);
++//
++// MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
++//   0x00000055651ed514: daddu at, ra, zero
++//   0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
++//
++//   0x00000055651ed51c: sll zero, zero, 0
++//   0x00000055651ed520: lui t9, 0x0
++//   0x00000055651ed524: ori t9, t9, 0x21b8
++//   0x00000055651ed528: daddu t9, t9, ra
++//   0x00000055651ed52c: daddu ra, at, zero
++//   0x00000055651ed530: jr t9
++//   0x00000055651ed534: sll zero, zero, 0
++//
++    move(AT, RA);
++    emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
++    nop();
++    lui(T9, 0); // to be patched
++    ori(T9, T9, 0);
++    daddu(T9, T9, RA);
++    move(RA, AT);
++    jr(T9);
++  }
++}
++
++void MacroAssembler::b_far(address entry) {
++  u_char * cur_pc = pc();
++
++  // Near/Far jump
++  if(is_simm16((entry - pc() - 4) / 4)) {
++    b(offset(entry));
++  } else {
++    // address must be bounded
++    move(AT, RA);
++    emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
++    nop();
++    li32(T9, entry - pc());
++    daddu(T9, T9, RA);
++    move(RA, AT);
++    jr(T9);
++  }
++}
++
++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) {
++  addu_long(AT, base, offset);
++  ld_ptr(rt, AT, 0);
++}
++
++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) {
++  guarantee(AT != rt, "AT must not equal rt");
++  addu_long(AT, base, offset);
++  st_ptr(rt, AT, 0);
++}
++
++Address MacroAssembler::as_Address(AddressLiteral adr) {
++  return Address(adr.target(), adr.rspec());
++}
++
++Address MacroAssembler::as_Address(ArrayAddress adr) {
++  return Address::make_array(adr);
++}
++
++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
++  Label again;
++
++  li(tmp_reg1, counter_addr);
++  bind(again);
++  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
++  ll(tmp_reg2, tmp_reg1, 0);
++  addiu(tmp_reg2, tmp_reg2, inc);
++  sc(tmp_reg2, tmp_reg1, 0);
++  beq(tmp_reg2, R0, again);
++  delayed()->nop();
++}
++
++void MacroAssembler::reserved_stack_check() {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // testing if reserved zone needs to be enabled
++  Label no_reserved_zone_enabling;
++
++  ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset()));
++  dsubu(AT, SP, AT);
++  bltz(AT, no_reserved_zone_enabling);
++  delayed()->nop();
++
++  enter();   // RA and FP are live.
++  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread);
++  leave();
++
++  // We have already removed our own frame.
++  // throw_delayed_StackOverflowError will think that it's been
++  // called by our caller.
++  li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry());
++  jr(AT);
++  delayed()->nop();
++  should_not_reach_here();
++
++  bind(no_reserved_zone_enabling);
++}
++
++int MacroAssembler::biased_locking_enter(Register lock_reg,
++                                         Register obj_reg,
++                                         Register swap_reg,
++                                         Register tmp_reg,
++                                         bool swap_reg_contains_mark,
++                                         Label& done,
++                                         Label* slow_case,
++                                         BiasedLockingCounters* counters) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++  bool need_tmp_reg = false;
++  if (tmp_reg == noreg) {
++    need_tmp_reg = true;
++    tmp_reg = T9;
++  }
++  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
++  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
++  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
++  Address saved_mark_addr(lock_reg, 0);
++
++  // Biased locking
++  // See whether the lock is currently biased toward our thread and
++  // whether the epoch is still valid
++  // Note that the runtime guarantees sufficient alignment of JavaThread
++  // pointers to allow age to be placed into low bits
++  // First check to see whether biasing is even enabled for this object
++  Label cas_label;
++  int null_check_offset = -1;
++  if (!swap_reg_contains_mark) {
++    null_check_offset = offset();
++    ld_ptr(swap_reg, mark_addr);
++  }
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  move(tmp_reg, swap_reg);
++  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
++  daddiu(AT, R0, markOopDesc::biased_lock_pattern);
++  dsubu(AT, AT, tmp_reg);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++
++  bne(AT, R0, cas_label);
++  delayed()->nop();
++
++
++  // The bias pattern is present in the object's header. Need to check
++  // whether the bias owner and the epoch are both still current.
++  // Note that because there is no current thread register on MIPS we
++  // need to store off the mark word we read out of the object to
++  // avoid reloading it and needing to recheck invariants below. This
++  // store is unfortunate but it makes the overall code shorter and
++  // simpler.
++  st_ptr(swap_reg, saved_mark_addr);
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  if (swap_reg_contains_mark) {
++    null_check_offset = offset();
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  xorr(tmp_reg, tmp_reg, swap_reg);
++#ifndef OPT_THREAD
++  get_thread(swap_reg);
++  xorr(swap_reg, swap_reg, tmp_reg);
++#else
++  xorr(swap_reg, TREG, tmp_reg);
++#endif
++
++  move(AT, ~((int) markOopDesc::age_mask_in_place));
++  andr(swap_reg, swap_reg, AT);
++
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(swap_reg, R0, L);
++    delayed()->nop();
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  beq(swap_reg, R0, done);
++  delayed()->nop();
++  Label try_revoke_bias;
++  Label try_rebias;
++
++  // At this point we know that the header has the bias pattern and
++  // that we are not the bias owner in the current epoch. We need to
++  // figure out more details about the state of the header in order to
++  // know what operations can be legally performed on the object's
++  // header.
++
++  // If the low three bits in the xor result aren't clear, that means
++  // the prototype header is no longer biased and we have to revoke
++  // the bias on this object.
++
++  move(AT, markOopDesc::biased_lock_mask_in_place);
++  andr(AT, swap_reg, AT);
++  bne(AT, R0, try_revoke_bias);
++  delayed()->nop();
++  // Biasing is still enabled for this data type. See whether the
++  // epoch of the current bias is still valid, meaning that the epoch
++  // bits of the mark word are equal to the epoch bits of the
++  // prototype header. (Note that the prototype header's epoch bits
++  // only change at a safepoint.) If not, attempt to rebias the object
++  // toward the current thread. Note that we must be absolutely sure
++  // that the current epoch is invalid in order to do this because
++  // otherwise the manipulations it performs on the mark word are
++  // illegal.
++
++  move(AT, markOopDesc::epoch_mask_in_place);
++  andr(AT,swap_reg, AT);
++  bne(AT, R0, try_rebias);
++  delayed()->nop();
++  // The epoch of the current bias is still valid but we know nothing
++  // about the owner; it might be set or it might be clear. Try to
++  // acquire the bias of the object using an atomic operation. If this
++  // fails we will go in to the runtime to revoke the object's bias.
++  // Note that we first construct the presumed unbiased header so we
++  // don't accidentally blow away another thread's valid bias.
++
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
++  andr(swap_reg, swap_reg, AT);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++#ifndef OPT_THREAD
++  get_thread(tmp_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++#else
++  orr(tmp_reg, TREG, swap_reg);
++#endif
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, this means that
++  // another thread succeeded in biasing it toward itself and we
++  // need to revoke that bias. The revocation will occur in the
++  // interpreter runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    delayed()->nop();
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++    delayed()->nop();
++  }
++  b(done);
++  delayed()->nop();
++
++  bind(try_rebias);
++  // At this point we know the epoch has expired, meaning that the
++  // current "bias owner", if any, is actually invalid. Under these
++  // circumstances _only_, we are allowed to use the current header's
++  // value as the comparison value when doing the cas to acquire the
++  // bias in the current epoch. In other words, we allow transfer of
++  // the bias from one thread to another directly in this situation.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++#ifndef OPT_THREAD
++  get_thread(swap_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++#else
++  orr(tmp_reg, tmp_reg, TREG);
++#endif
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, then another thread
++  // succeeded in biasing it toward itself and we need to revoke that
++  // bias. The revocation will occur in the runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    delayed()->nop();
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++    delayed()->nop();
++  }
++
++  b(done);
++  delayed()->nop();
++  bind(try_revoke_bias);
++  // The prototype mark in the klass doesn't have the bias bit set any
++  // more, indicating that objects of this data type are not supposed
++  // to be biased any more. We are going to try to reset the mark of
++  // this object to the prototype value and fall through to the
++  // CAS-based locking scheme. Note that if our CAS fails, it means
++  // that another thread raced us for the privilege of revoking the
++  // bias of this particular object, so it's okay to continue in the
++  // normal locking code.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // Fall through to the normal CAS-based lock, because no matter what
++  // the result of the above CAS, some thread must have succeeded in
++  // removing the bias bit from the object's header.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    delayed()->nop();
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++
++  bind(cas_label);
++  return null_check_offset;
++}
++
++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++
++  // Check for biased locking unlock case, which is a no-op
++  // Note: we do not have to check the thread ID for two reasons.
++  // First, the interpreter checks for IllegalMonitorStateException at
++  // a higher level. Second, if the bias was revoked while we held the
++  // lock, the object could not be rebiased toward another thread, so
++  // the bias bit would be clear.
++  ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++  andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
++  daddiu(AT, R0, markOopDesc::biased_lock_pattern);
++
++  beq(AT, temp_reg, done);
++  delayed()->nop();
++}
++
++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
++// this method will handle the stack problem, you need not to preserve the stack space for the argument now
++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
++  Label L, E;
++
++  assert(number_of_arguments <= 4, "just check");
++
++  andi(AT, SP, 0xf);
++  beq(AT, R0, L);
++  delayed()->nop();
++  daddiu(SP, SP, -8);
++  call(entry_point, relocInfo::runtime_call_type);
++  delayed()->nop();
++  daddiu(SP, SP, 8);
++  b(E);
++  delayed()->nop();
++
++  bind(L);
++  call(entry_point, relocInfo::runtime_call_type);
++  delayed()->nop();
++  bind(E);
++}
++
++
++void MacroAssembler::jmp(address entry) {
++  patchable_set48(T9, (long)entry);
++  jr(T9);
++}
++
++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      jmp(entry);
++      break;
++    default:
++      {
++      InstructionMark im(this);
++      relocate(rtype);
++      patchable_set48(T9, (long)entry);
++      jr(T9);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::jmp_far(Label& L) {
++  if (L.is_bound()) {
++    address entry = target(L);
++    assert(entry != NULL, "jmp most probably wrong");
++    InstructionMark im(this);
++
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(T9, (long)entry);
++  } else {
++    InstructionMark im(this);
++    L.add_patch_at(code(), locator());
++
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(T9, (long)pc());
++  }
++
++  jr(T9);
++  delayed()->nop();
++}
++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_set48(AT, (long)obj);
++  sd(AT, dst);
++}
++
++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_set48(dst, (long)obj);
++}
++
++void MacroAssembler::call(address entry) {
++// c/c++ code assume T9 is entry point, so we just always move entry to t9
++// maybe there is some more graceful method to handle this. FIXME
++// For more info, see class NativeCall.
++  patchable_set48(T9, (long)entry);
++  jalr(T9);
++}
++
++void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      call(entry);
++      break;
++    default:
++      {
++  InstructionMark im(this);
++  relocate(rtype);
++  call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::call(address entry, RelocationHolder& rh)
++{
++  switch (rh.type()) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      call(entry);
++      break;
++    default:
++      {
++  InstructionMark im(this);
++  relocate(rh);
++  call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::ic_call(address entry, jint method_index) {
++  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
++  patchable_set48(IC_Klass, (long)Universe::non_oop_word());
++  assert(entry != NULL, "call most probably wrong");
++  InstructionMark im(this);
++  trampoline_call(AddressLiteral(entry, rh));
++}
++
++void MacroAssembler::c2bool(Register r) {
++  sltu(r, R0, r);
++}
++
++#ifndef PRODUCT
++extern "C" void findpc(intptr_t x);
++#endif
++
++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
++  if ( ShowMessageBoxOnError ) {
++    JavaThreadState saved_state = JavaThread::current()->thread_state();
++    JavaThread::current()->set_thread_state(_thread_in_vm);
++    {
++      // In order to get locks work, we need to fake a in_VM state
++      ttyLocker ttyl;
++      ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
++      if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
++  BytecodeCounter::print();
++      }
++
++    }
++    ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
++  }
++  else
++    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
++}
++
++
++void MacroAssembler::stop(const char* msg) {
++  li(A0, (long)msg);
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  delayed()->nop();
++  brk(17);
++}
++
++void MacroAssembler::warn(const char* msg) {
++  pushad();
++  li(A0, (long)msg);
++  push(S2);
++  move(AT, -(StackAlignmentInBytes));
++  move(S2, SP);     // use S2 as a sender SP holder
++  andr(SP, SP, AT); // align stack as required by ABI
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  delayed()->nop();
++  move(SP, S2);     // use S2 as a sender SP holder
++  pop(S2);
++  popad();
++}
++
++void MacroAssembler::increment(Register reg, int imm) {
++  if (!imm) return;
++  if (is_simm16(imm)) {
++    daddiu(reg, reg, imm);
++  } else {
++    move(AT, imm);
++    daddu(reg, reg, AT);
++  }
++}
++
++void MacroAssembler::decrement(Register reg, int imm) {
++  increment(reg, -imm);
++}
++
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             bool check_exceptions) {
++  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2);
++  assert(arg_2 != A1, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             int number_of_arguments,
++                             bool check_exceptions) {
++  call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM_base(Register oop_result,
++                                  Register java_thread,
++                                  Register last_java_sp,
++                                  address  entry_point,
++                                  int      number_of_arguments,
++                                  bool     check_exceptions) {
++
++  address before_call_pc;
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++  // debugging support
++  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
++  assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
++  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
++  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
++
++  assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp");
++
++  // set last Java frame before call
++  before_call_pc = (address)pc();
++  set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
++
++  // do the call
++  move(A0, java_thread);
++  call(entry_point, relocInfo::runtime_call_type);
++  delayed()->nop();
++
++  // restore the thread (cannot use the pushed argument since arguments
++  // may be overwritten by C code generated by an optimizing compiler);
++  // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++  get_thread(java_thread);
++#else
++#ifdef ASSERT
++  {
++    Label L;
++    get_thread(AT);
++    beq(java_thread, AT, L);
++    delayed()->nop();
++    stop("MacroAssembler::call_VM_base: TREG not callee saved?");
++    bind(L);
++  }
++#endif
++#endif
++
++  // discard thread and arguments
++  ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // reset last Java frame
++  reset_last_Java_frame(java_thread, false);
++
++  check_and_handle_popframe(java_thread);
++  check_and_handle_earlyret(java_thread);
++  if (check_exceptions) {
++    // check for pending exceptions (java_thread is set upon return)
++    Label L;
++    ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    beq(AT, R0, L);
++    delayed()->nop();
++    li(AT, before_call_pc);
++    push(AT);
++    jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    delayed()->nop();
++    bind(L);
++  }
++
++  // get oop result if there is one and reset the value in the thread
++  if (oop_result->is_valid()) {
++    ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    verify_oop(oop_result);
++  }
++}
++
++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
++
++  move(V0, SP);
++  //we also reserve space for java_thread here
++  move(AT, -(StackAlignmentInBytes));
++  andr(SP, SP, AT);
++  call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
++
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
++  call_VM_leaf_base(entry_point, number_of_arguments);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
++  if (arg_0 != A0) move(A0, arg_0);
++  call_VM_leaf(entry_point, 1);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  call_VM_leaf(entry_point, 2);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
++  call_VM_leaf(entry_point, 3);
++}
++void MacroAssembler::super_call_VM_leaf(address entry_point) {
++  MacroAssembler::call_VM_leaf_base(entry_point, 0);
++}
++
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1) {
++  if (arg_1 != A0) move(A0, arg_1);
++  MacroAssembler::call_VM_leaf_base(entry_point, 1);
++}
++
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 2);
++}
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2,
++                                                   Register arg_3) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 3);
++}
++
++void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
++}
++
++void MacroAssembler::check_and_handle_popframe(Register java_thread) {
++}
++
++void MacroAssembler::null_check(Register reg, int offset) {
++  if (needs_explicit_null_check(offset)) {
++    // provoke OS NULL exception if reg = NULL by
++    // accessing M[reg] w/o changing any (non-CC) registers
++    // NOTE: cmpl is plenty here to provoke a segv
++    lw(AT, reg, 0);
++  } else {
++    // nothing to do, (later) access of M[reg + offset]
++    // will provoke OS NULL exception if reg = NULL
++  }
++}
++
++void MacroAssembler::enter() {
++  push2(RA, FP);
++  move(FP, SP);
++}
++
++void MacroAssembler::leave() {
++  move(SP, FP);
++  pop2(RA, FP);
++}
++
++void MacroAssembler::unimplemented(const char* what) {
++  const char* buf = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("unimplemented: %s", what);
++    buf = code_string(ss.as_string());
++  }
++  stop(buf);
++}
++
++void MacroAssembler::get_thread(Register thread) {
++#ifdef MINIMIZE_RAM_USAGE
++//
++//  In MIPS64, we don't use full 64-bit address space.
++//  Only a small range is actually used.
++//
++//  Example:
++//  $  cat /proc/13352/maps
++//  120000000-120010000 r-xp 00000000 08:01 41077                            /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java
++//  12001c000-120020000 rw-p 0000c000 08:01 41077                            /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java
++//  120020000-1208dc000 rwxp 00000000 00:00 0                                [heap]
++//  555d574000-555d598000 r-xp 00000000 08:01 2073768                        /lib/ld-2.12.so
++//  555d598000-555d59c000 rw-p 00000000 00:00 0
++//  ......
++//  558b1f8000-558b23c000 rwxp 00000000 00:00 0
++//  558b23c000-558b248000 ---p 00000000 00:00 0
++//  558b248000-558b28c000 rwxp 00000000 00:00 0
++//  ffff914000-ffff94c000 rwxp 00000000 00:00 0                              [stack]
++//  ffffffc000-10000000000 r-xp 00000000 00:00 0                             [vdso]
++//
++//  All stacks are positioned at 0x55________.
++//  Therefore, we can utilize the same algorithm used in 32-bit.
++  // int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1);
++  // Thread* thread = _sp_map[index];
++  Register tmp;
++
++  if (thread == AT)
++    tmp = T9;
++  else
++    tmp = AT;
++
++  move(thread, SP);
++  shr(thread, PAGE_SHIFT);
++
++  push(tmp);
++  li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1));
++  andr(thread, thread, tmp);
++  shl(thread, Address::times_ptr); // sizeof(Thread *)
++  li48(tmp, (long)ThreadLocalStorage::sp_map_addr());
++  addu(tmp, tmp, thread);
++  ld_ptr(thread, tmp, 0);
++  pop(tmp);
++#else
++  if (thread != V0) {
++    push(V0);
++  }
++  pushad_except_v0();
++
++  push(S5);
++  move(S5, SP);
++  move(AT, -StackAlignmentInBytes);
++  andr(SP, SP, AT);
++  call(CAST_FROM_FN_PTR(address, Thread::current));
++  //MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current), 0);
++  delayed()->nop();
++  move(SP, S5);
++  pop(S5);
++
++  popad_except_v0();
++  if (thread != V0) {
++    move(thread, V0);
++    pop(V0);
++  }
++#endif // MINIMIZE_RAM_USAGE
++}
++
++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T1;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // we must set sp to zero to clear frame
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is possible
++  // that we need it only for debugging
++  if(clear_fp) {
++    st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
++}
++
++void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // we must set sp to zero to clear frame
++  sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is
++  // possible that we need it only for debugging
++  if (clear_fp) {
++    sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
++}
++
++// Write serialization page so VM thread can do a pseudo remote membar.
++// We use the current thread pointer to calculate a thread specific
++// offset to write to within the page. This minimizes bus traffic
++// due to cache line collision.
++void MacroAssembler::serialize_memory(Register thread, Register tmp) {
++  int mask = os::vm_page_size() - sizeof(int);
++  assert_different_registers(AT, tmp);
++  assert(is_uimm(mask, 16), "Not a unsigned 16-bit");
++  srl(AT, thread, os::get_serialize_page_shift_count());
++  andi(AT, AT, mask);
++  li(tmp, os::get_memory_serialize_page());
++  addu(tmp, tmp, AT);
++  sw(R0, tmp, 0);
++}
++
++void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld(AT, thread_reg, in_bytes(Thread::polling_page_offset()));
++    andi(AT, AT, SafepointMechanism::poll_bit());
++    bne(AT, R0, slow_path);
++    delayed()->nop();
++  } else {
++    li(AT, SafepointSynchronize::address_of_state());
++    lw(AT, AT, 0);
++    addiu(AT, AT, -SafepointSynchronize::_not_synchronized);
++    bne(AT, R0, slow_path);
++    delayed()->nop();
++  }
++}
++
++// Just like safepoint_poll, but use an acquiring load for thread-
++// local polling.
++//
++// We need an acquire here to ensure that any subsequent load of the
++// global SafepointSynchronize::_state flag is ordered after this load
++// of the local Thread::_polling page.  We don't want this poll to
++// return false (i.e. not safepointing) and a later poll of the global
++// SafepointSynchronize::_state spuriously to return true.
++//
++// This is to avoid a race when we're in a native->Java transition
++// racing the code which wakes up from a safepoint.
++//
++void MacroAssembler::safepoint_poll_acquire(Label& slow_path, Register thread_reg) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld(AT, thread_reg, in_bytes(Thread::polling_page_offset()));
++    sync();
++    andi(AT, AT, SafepointMechanism::poll_bit());
++    bne(AT, R0, slow_path);
++    delayed()->nop();
++  } else {
++    safepoint_poll(slow_path, thread_reg);
++  }
++}
++
++// Calls to C land
++//
++// When entering C land, the fp, & sp of the last Java frame have to be recorded
++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
++// has to be reset to 0. This is required to allow proper stack traversal.
++void MacroAssembler::set_last_Java_frame(Register java_thread,
++                                         Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc is optional
++  if (last_java_pc != NULL) {
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(AT, (long)last_java_pc);
++    st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++  st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++}
++
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc) {
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc is optional
++  if (last_java_pc != NULL) {
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(AT, (long)last_java_pc);
++    st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++
++  sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
++}
++
++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
++void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
++                                   Register t1, Register t2, Label& slow_case) {
++  Unimplemented();
++  //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  //bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
++}
++
++// Defines obj, preserves var_size_in_bytes
++void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
++                                   Register t1, Register t2, Label& slow_case) {
++  Unimplemented();
++  //assert_different_registers(obj, var_size_in_bytes, t1, AT);
++  //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  //bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
++}
++
++void MacroAssembler::incr_allocated_bytes(Register thread,
++                                          Register var_size_in_bytes,
++                                          int con_size_in_bytes,
++                                          Register t1) {
++  if (!thread->is_valid()) {
++#ifndef OPT_THREAD
++    assert(t1->is_valid(), "need temp reg");
++    thread = t1;
++    get_thread(thread);
++#else
++    thread = TREG;
++#endif
++  }
++
++  ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++  if (var_size_in_bytes->is_valid()) {
++    addu(AT, AT, var_size_in_bytes);
++  } else {
++    addiu(AT, AT, con_size_in_bytes);
++  }
++  st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++}
++
++void MacroAssembler::li(Register rd, long imm) {
++  if (imm <= max_jint && imm >= min_jint) {
++    li32(rd, (int)imm);
++  } else if (julong(imm) <= 0xFFFFFFFF) {
++    assert_not_delayed();
++    // lui sign-extends, so we can't use that.
++    ori(rd, R0, julong(imm) >> 16);
++    dsll(rd, rd, 16);
++    ori(rd, rd, split_low(imm));
++  } else if ((imm > 0) && is_simm16(imm >> 32)) {
++    // A 48-bit address
++    li48(rd, imm);
++  } else {
++    li64(rd, imm);
++  }
++}
++
++void MacroAssembler::li32(Register reg, int imm) {
++  if (is_simm16(imm)) {
++    addiu(reg, R0, imm);
++  } else {
++    lui(reg, split_low(imm >> 16));
++    if (split_low(imm))
++      ori(reg, reg, split_low(imm));
++  }
++}
++
++void MacroAssembler::set64(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++    } else {
++      lui(d, split_low(value >> 16));
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      ori(d, R0, julong(value) >> 16);
++      dsll(d, d, 16);
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++      }
++  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
++    // 4 insts
++    li48(d, value);
++  } else {  // li64
++    // 6 insts
++    li64(d, value);
++  }
++}
++
++
++int MacroAssembler::insts_for_set64(jlong value) {
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      //daddiu(d, R0, value);
++      count++;
++    } else {
++      //lui(d, split_low(value >> 16));
++      count++;
++      if (split_low(value)) {
++        //ori(d, d, split_low(value));
++        count++;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      //ori(d, R0, julong(value) >> 16);
++      //dsll(d, d, 16);
++      count += 2;
++      if (split_low(value)) {
++        //ori(d, d, split_low(value));
++        count++;
++      }
++  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
++    // 4 insts
++    //li48(d, value);
++    count += 4;
++  } else {  // li64
++    // 6 insts
++    //li64(d, value);
++    count += 6;
++  }
++
++  return count;
++}
++
++void MacroAssembler::patchable_set48(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++      count += 1;
++    } else {
++      lui(d, split_low(value >> 16));
++      count += 1;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      ori(d, R0, julong(value) >> 16);
++      dsll(d, d, 16);
++      count += 2;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
++    // 4 insts
++    li48(d, value);
++    count += 4;
++  } else {  // li64
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::patchable_set32(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++      count += 1;
++    } else {
++      lui(d, split_low(value >> 16));
++      count += 1;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      ori(d, R0, julong(value) >> 16);
++      dsll(d, d, 16);
++      count += 2;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 3) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::patchable_call32(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++      count += 1;
++    } else {
++      lui(d, split_low(value >> 16));
++      count += 1;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++    }
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 2) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
++  assert(UseCompressedClassPointers, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int klass_index = oop_recorder()->find_index(k);
++  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++  long narrowKlass = (long)Klass::encode_klass(k);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_set48(dst, narrowKlass);
++}
++
++
++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
++  assert(UseCompressedOops, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int oop_index = oop_recorder()->find_index(obj);
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_set48(dst, oop_index);
++}
++
++// ((OopHandle)result).resolve();
++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
++  // OopHandle::resolve is an indirection.
++  access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG);
++}
++
++void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
++  // get mirror
++  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++  ld_ptr(mirror, method, in_bytes(Method::const_offset()));
++  ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset()));
++  ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes());
++  ld_ptr(mirror, mirror, mirror_offset);
++  resolve_oop_handle(mirror, tmp);
++}
++
++void MacroAssembler::li64(Register rd, long imm) {
++  assert_not_delayed();
++  lui(rd, split_low(imm >> 48));
++  ori(rd, rd, split_low(imm >> 32));
++  dsll(rd, rd, 16);
++  ori(rd, rd, split_low(imm >> 16));
++  dsll(rd, rd, 16);
++  ori(rd, rd, split_low(imm));
++}
++
++void MacroAssembler::li48(Register rd, long imm) {
++  assert_not_delayed();
++  assert(is_simm16(imm >> 32), "Not a 48-bit address");
++  lui(rd, imm >> 32);
++  ori(rd, rd, split_low(imm >> 16));
++  dsll(rd, rd, 16);
++  ori(rd, rd, split_low(imm));
++}
++
++void MacroAssembler::verify_oop(Register reg, const char* s) {
++  if (!VerifyOops) return;
++  const char * b = NULL;
++  stringStream ss;
++  ss.print("verify_oop: %s: %s", reg->name(), s);
++  b = code_string(ss.as_string());
++  pushad();
++  move(A1, reg);
++  li(A0, (long)b);
++  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
++  ld(T9, AT, 0);
++  jalr(T9);
++  delayed()->nop();
++  popad();
++}
++
++
++void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
++  if (!VerifyOops) {
++    nop();
++    return;
++  }
++  // Pass register number to verify_oop_subroutine
++  const char * b = NULL;
++  stringStream ss;
++  ss.print("verify_oop_addr: %s",  s);
++  b = code_string(ss.as_string());
++
++  addiu(SP, SP, - 7 * wordSize);
++  st_ptr(T0, SP, 6 * wordSize);
++  st_ptr(T1, SP, 5 * wordSize);
++  st_ptr(RA, SP, 4 * wordSize);
++  st_ptr(A0, SP, 3 * wordSize);
++  st_ptr(A1, SP, 2 * wordSize);
++  st_ptr(AT, SP, 1 * wordSize);
++  st_ptr(T9, SP, 0);
++
++  // addr may contain sp so we will have to adjust it based on the
++  // pushes that we just did.
++  if (addr.uses(SP)) {
++    lea(A1, addr);
++    ld_ptr(A1, Address(A1, 7 * wordSize));
++  } else {
++    ld_ptr(A1, addr);
++  }
++  li(A0, (long)b);
++  // call indirectly to solve generation ordering problem
++  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
++  ld_ptr(T9, AT, 0);
++  jalr(T9);
++  delayed()->nop();
++  ld_ptr(T0, SP, 6* wordSize);
++  ld_ptr(T1, SP, 5* wordSize);
++  ld_ptr(RA, SP, 4* wordSize);
++  ld_ptr(A0, SP, 3* wordSize);
++  ld_ptr(A1, SP, 2* wordSize);
++  ld_ptr(AT, SP, 1* wordSize);
++  ld_ptr(T9, SP, 0* wordSize);
++  addiu(SP, SP, 7 * wordSize);
++}
++
++// used registers :  T0, T1
++void MacroAssembler::verify_oop_subroutine() {
++  // RA: ra
++  // A0: char* error message
++  // A1: oop   object to verify
++
++  Label exit, error;
++  // increment counter
++  li(T0, (long)StubRoutines::verify_oop_count_addr());
++  lw(AT, T0, 0);
++  daddiu(AT, AT, 1);
++  sw(AT, T0, 0);
++
++  // make sure object is 'reasonable'
++  beq(A1, R0, exit);         // if obj is NULL it is ok
++  delayed()->nop();
++
++  // Check if the oop is in the right area of memory
++  // const int oop_mask = Universe::verify_oop_mask();
++  // const int oop_bits = Universe::verify_oop_bits();
++  const uintptr_t oop_mask = Universe::verify_oop_mask();
++  const uintptr_t oop_bits = Universe::verify_oop_bits();
++  li(AT, oop_mask);
++  andr(T0, A1, AT);
++  li(AT, oop_bits);
++  bne(T0, AT, error);
++  delayed()->nop();
++
++  // make sure klass is 'reasonable'
++  // add for compressedoops
++  reinit_heapbase();
++  // add for compressedoops
++  load_klass(T0, A1);
++  beq(T0, R0, error);                        // if klass is NULL it is broken
++  delayed()->nop();
++  // return if everything seems ok
++  bind(exit);
++
++  jr(RA);
++  delayed()->nop();
++
++  // handle errors
++  bind(error);
++  pushad();
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  delayed()->nop();
++  popad();
++  jr(RA);
++  delayed()->nop();
++}
++
++void MacroAssembler::verify_tlab(Register t1, Register t2) {
++#ifdef ASSERT
++  assert_different_registers(t1, t2, AT);
++  if (UseTLAB && VerifyOops) {
++    Label next, ok;
++
++    get_thread(t1);
++
++    ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
++    sltu(AT, t2, AT);
++    beq(AT, R0, next);
++    delayed()->nop();
++
++    stop("assert(top >= start)");
++
++    bind(next);
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
++    sltu(AT, AT, t2);
++    beq(AT, R0, ok);
++    delayed()->nop();
++
++    stop("assert(top <= end)");
++
++    bind(ok);
++
++  }
++#endif
++}
++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
++                                                      Register tmp,
++                                                      int offset) {
++  intptr_t value = *delayed_value_addr;
++  if (value != 0)
++  return RegisterOrConstant(value + offset);
++  Unimplemented();
++  //AddressLiteral a(delayed_value_addr);
++  // load indirectly to solve generation ordering problem
++  //movptr(tmp, ExternalAddress((address) delayed_value_addr));
++  //ld(tmp, a);
++  if (offset != 0)
++    daddiu(tmp,tmp, offset);
++
++  return RegisterOrConstant(tmp);
++}
++
++void MacroAssembler::hswap(Register reg) {
++  //short
++  //andi(reg, reg, 0xffff);
++  srl(AT, reg, 8);
++  sll(reg, reg, 24);
++  sra(reg, reg, 16);
++  orr(reg, reg, AT);
++}
++
++void MacroAssembler::huswap(Register reg) {
++  dsrl(AT, reg, 8);
++  dsll(reg, reg, 24);
++  dsrl(reg, reg, 16);
++  orr(reg, reg, AT);
++  andi(reg, reg, 0xffff);
++}
++
++// something funny to do this will only one more register AT
++// 32 bits
++void MacroAssembler::swap(Register reg) {
++  srl(AT, reg, 8);
++  sll(reg, reg, 24);
++  orr(reg, reg, AT);
++  //reg : 4 1 2 3
++  srl(AT, AT, 16);
++  xorr(AT, AT, reg);
++  andi(AT, AT, 0xff);
++  //AT : 0 0 0 1^3);
++  xorr(reg, reg, AT);
++  //reg : 4 1 2 1
++  sll(AT, AT, 16);
++  xorr(reg, reg, AT);
++  //reg : 4 3 2 1
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register resflag, bool retold, bool barrier) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  Label again, succ, fail;
++  bind(again);
++  lld(resflag, addr);
++  bne(resflag, oldval, fail);
++  delayed()->nop();
++  move(resflag, newval);
++  scd(resflag, addr);
++  beq(resflag, R0, again);
++  delayed()->nop();
++  b(succ);
++  delayed()->nop();
++  bind(fail);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  move(resflag, R0);
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register tmp, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  lld(tmp, addr);
++  bne(tmp, oldval, neq);
++  delayed()->nop();
++  move(tmp, newval);
++  scd(tmp, addr);
++  beq(tmp, R0, again);
++  delayed()->nop();
++  b(succ);
++  delayed()->nop();
++
++  bind(neq);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail) {
++    b(*fail);
++    delayed()->nop();
++  }
++}
++
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval,
++                               Register resflag, bool sign, bool retold, bool barrier) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  Label again, succ, fail;
++  bind(again);
++  ll(resflag, addr);
++  if (!sign)
++    dinsu(resflag, R0, 32, 32);
++  bne(resflag, oldval, fail);
++  delayed()->nop();
++
++  move(resflag, newval);
++  sc(resflag, addr);
++  beq(resflag, R0, again);
++  delayed()->nop();
++  b(succ);
++  delayed()->nop();
++
++  bind(fail);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  move(resflag, R0);
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                               bool sign, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  ll(tmp, addr);
++  if (!sign)
++    dinsu(tmp, R0, 32, 32);
++  bne(tmp, oldval, neq);
++  delayed()->nop();
++  move(tmp, newval);
++  sc(tmp, addr);
++  beq(tmp, R0, again);
++  delayed()->nop();
++  b(succ);
++  delayed()->nop();
++
++  bind(neq);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail) {
++    b(*fail);
++    delayed()->nop();
++  }
++}
++
++void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
++  Label done, again, nequal;
++
++  Register x_reg = x_regLo;
++  dsll32(x_regHi, x_regHi, 0);
++  dsll32(x_regLo, x_regLo, 0);
++  dsrl32(x_regLo, x_regLo, 0);
++  orr(x_reg, x_regLo, x_regHi);
++
++  Register c_reg = c_regLo;
++  dsll32(c_regHi, c_regHi, 0);
++  dsll32(c_regLo, c_regLo, 0);
++  dsrl32(c_regLo, c_regLo, 0);
++  orr(c_reg, c_regLo, c_regHi);
++
++  bind(again);
++
++  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
++  lld(AT, dest);
++  bne(AT, c_reg, nequal);
++  delayed()->nop();
++
++  //move(AT, x_reg);
++  daddu(AT, x_reg, R0);
++  scd(AT, dest);
++  beq(AT, R0, again);
++  delayed()->nop();
++  b(done);
++  delayed()->nop();
++
++  // not xchged
++  bind(nequal);
++  sync();
++  //move(c_reg, AT);
++  //move(AT, R0);
++  daddu(c_reg, AT, R0);
++  daddu(AT, R0, R0);
++  bind(done);
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  assert_different_registers(tmp, fs, ft);
++  div_s(tmp, fs, ft);
++  trunc_l_s(tmp, tmp);
++  cvt_s_l(tmp, tmp);
++  mul_s(tmp, tmp, ft);
++  sub_s(fd, fs, tmp);
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  assert_different_registers(tmp, fs, ft);
++  div_d(tmp, fs, ft);
++  trunc_l_d(tmp, tmp);
++  cvt_d_l(tmp, tmp);
++  mul_d(tmp, tmp, ft);
++  sub_d(fd, fs, tmp);
++}
++
++#ifdef COMPILER2
++// Fast_Lock and Fast_Unlock used by C2
++
++// Because the transitions from emitted code to the runtime
++// monitorenter/exit helper stubs are so slow it's critical that
++// we inline both the stack-locking fast-path and the inflated fast path.
++//
++// See also: cmpFastLock and cmpFastUnlock.
++//
++// What follows is a specialized inline transliteration of the code
++// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
++// another option would be to emit TrySlowEnter and TrySlowExit methods
++// at startup-time.  These methods would accept arguments as
++// (Obj, Self, box, Scratch) and return success-failure
++// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
++// In practice, however, the # of lock sites is bounded and is usually small.
++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
++// if the processor uses simple bimodal branch predictors keyed by EIP
++// Since the helper routines would be called from multiple synchronization
++// sites.
++//
++// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
++// to those specialized methods.  That'd give us a mostly platform-independent
++// implementation that the JITs could optimize and inline at their pleasure.
++// Done correctly, the only time we'd need to cross to native could would be
++// to park() or unpark() threads.  We'd also need a few more unsafe operators
++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
++// (b) explicit barriers or fence operations.
++//
++// TODO:
++//
++// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
++//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
++//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
++//    the lock operators would typically be faster than reifying Self.
++//
++// *  Ideally I'd define the primitives as:
++//       fast_lock   (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED.
++//       fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED
++//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
++//    Instead, we're stuck with a rather awkward and brittle register assignments below.
++//    Furthermore the register assignments are overconstrained, possibly resulting in
++//    sub-optimal code near the synchronization site.
++//
++// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
++//    Alternately, use a better sp-proximity test.
++//
++// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
++//    Either one is sufficient to uniquely identify a thread.
++//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
++//
++// *  Intrinsify notify() and notifyAll() for the common cases where the
++//    object is locked by the calling thread but the waitlist is empty.
++//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
++//
++// *  use jccb and jmpb instead of jcc and jmp to improve code density.
++//    But beware of excessive branch density on AMD Opterons.
++//
++// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
++//    or failure of the fast-path.  If the fast-path fails then we pass
++//    control to the slow-path, typically in C.  In Fast_Lock and
++//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
++//    will emit a conditional branch immediately after the node.
++//    So we have branches to branches and lots of ICC.ZF games.
++//    Instead, it might be better to have C2 pass a "FailureLabel"
++//    into Fast_Lock and Fast_Unlock.  In the case of success, control
++//    will drop through the node.  ICC.ZF is undefined at exit.
++//    In the case of failure, the node will branch directly to the
++//    FailureLabel
++
++
++// obj: object to lock
++// box: on-stack box address (displaced header location)
++// tmp: tmp -- KILLED
++// scr: tmp -- KILLED
++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg,
++                               Register tmpReg, Register scrReg) {
++  Label IsInflated, DONE, DONE_SET;
++
++  // Ensure the register assignents are disjoint
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastLock");
++
++  if (PrintBiasedLockingStatistics) {
++    atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg);
++  }
++
++  if (EmitSync & 1) {
++    move(AT, 0x0);
++    return;
++  } else
++    if (EmitSync & 2) {
++      Label DONE_LABEL ;
++      if (UseBiasedLocking) {
++        // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
++        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
++      }
++
++      ld(tmpReg, Address(objReg, 0)) ;          // fetch markword
++      ori(tmpReg, tmpReg, 0x1);
++      sd(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
++
++      cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg
++      delayed()->nop();
++
++      // Recursive locking
++      dsubu(tmpReg, tmpReg, SP);
++      li(AT, (7 - os::vm_page_size() ));
++      andr(tmpReg, tmpReg, AT);
++      sd(tmpReg, Address(boxReg, 0));
++      bind(DONE_LABEL) ;
++    } else {
++      // Possible cases that we'll encounter in fast_lock
++      // ------------------------------------------------
++      // * Inflated
++      //    -- unlocked
++      //    -- Locked
++      //       = by self
++      //       = by other
++      // * biased
++      //    -- by Self
++      //    -- by other
++      // * neutral
++      // * stack-locked
++      //    -- by self
++      //       = sp-proximity test hits
++      //       = sp-proximity test generates false-negative
++      //    -- by other
++      //
++
++      // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
++      // order to reduce the number of conditional branches in the most common cases.
++      // Beware -- there's a subtle invariant that fetch of the markword
++      // at [FETCH], below, will never observe a biased encoding (*101b).
++      // If this invariant is not held we risk exclusion (safety) failure.
++      if (UseBiasedLocking && !UseOptoBiasInlining) {
++        Label succ, fail;
++        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL);
++        b(fail);
++        delayed()->nop();
++        bind(succ);
++        b(DONE);
++        delayed()->ori(resReg, R0, 1);
++        bind(fail);
++      }
++
++      ld(tmpReg, Address(objReg, 0)); //Fetch the markword of the object.
++      andi(AT, tmpReg, markOopDesc::monitor_value);
++      bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias
++      delayed()->nop();
++
++      // Attempt stack-locking ...
++      ori(tmpReg, tmpReg, markOopDesc::unlocked_value);
++      sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
++
++      if (PrintBiasedLockingStatistics) {
++        Label SUCC, FAIL;
++        cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg
++        bind(SUCC);
++        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++        b(DONE);
++        delayed()->ori(resReg, R0, 1);
++        bind(FAIL);
++      } else {
++        // If cmpxchg is succ, then scrReg = 1
++        cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg
++      }
++
++      // Recursive locking
++      // The object is stack-locked: markword contains stack pointer to BasicLock.
++      // Locked by current thread if difference with current SP is less than one page.
++      dsubu(tmpReg, tmpReg, SP);
++      li(AT, 7 - os::vm_page_size());
++      andr(tmpReg, tmpReg, AT);
++      sd(tmpReg, Address(boxReg, 0));
++
++      if (PrintBiasedLockingStatistics) {
++        Label L;
++        // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
++        bne(tmpReg, R0, L);
++        delayed()->nop();
++        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++        bind(L);
++      }
++      b(DONE);
++      delayed()->sltiu(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0
++
++      bind(IsInflated);
++      // The object's monitor m is unlocked iff m->owner == NULL,
++      // otherwise m->owner may contain a thread or a stack address.
++
++      // TODO: someday avoid the ST-before-CAS penalty by
++      // relocating (deferring) the following ST.
++      // We should also think about trying a CAS without having
++      // fetched _owner.  If the CAS is successful we may
++      // avoid an RTO->RTS upgrade on the $line.
++      // Without cast to int32_t a movptr will destroy r10 which is typically obj
++      li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
++      sd(AT, Address(boxReg, 0));
++
++      ld(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++      // if (m->owner != 0) => AT = 0, goto slow path.
++      bne(AT, R0, DONE_SET);
++      delayed()->ori(scrReg, R0, 0);
++
++#ifndef OPT_THREAD
++      get_thread(TREG);
++#endif
++      // It's inflated and appears unlocked
++      cmpxchg(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2), R0, TREG, scrReg, false, false) ;
++      // Intentional fall-through into DONE ...
++
++      bind(DONE_SET);
++      move(resReg, scrReg);
++
++      // DONE is a hot target - we'd really like to place it at the
++      // start of cache line by padding with NOPs.
++      // See the AMD and Intel software optimization manuals for the
++      // most efficient "long" NOP encodings.
++      // Unfortunately none of our alignment mechanisms suffice.
++      bind(DONE);
++      // At DONE the resReg is set as follows ...
++      // Fast_Unlock uses the same protocol.
++      // resReg == 1 -> Success
++      // resREg == 0 -> Failure - force control through the slow-path
++
++      // Avoid branch-to-branch on AMD processors
++      // This appears to be superstition.
++      if (EmitSync & 32) nop() ;
++
++    }
++}
++
++// obj: object to unlock
++// box: box address (displaced header location), killed.
++// tmp: killed tmp; cannot be obj nor box.
++//
++// Some commentary on balanced locking:
++//
++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
++// Methods that don't have provably balanced locking are forced to run in the
++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
++// The interpreter provides two properties:
++// I1:  At return-time the interpreter automatically and quietly unlocks any
++//      objects acquired the current activation (frame).  Recall that the
++//      interpreter maintains an on-stack list of locks currently held by
++//      a frame.
++// I2:  If a method attempts to unlock an object that is not held by the
++//      the frame the interpreter throws IMSX.
++//
++// Lets say A(), which has provably balanced locking, acquires O and then calls B().
++// B() doesn't have provably balanced locking so it runs in the interpreter.
++// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
++// is still locked by A().
++//
++// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
++// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
++// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
++
++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg,
++                                 Register tmpReg, Register scrReg) {
++  Label DONE, DONE_SET, Stacked, Inflated;
++
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastUnlock");
++
++  if (EmitSync & 4) {
++    // Disable - inhibit all inlining.  Force control through the slow-path
++    move(AT, 0x0);
++    return;
++  } else
++    if (EmitSync & 8) {
++      Label DONE_LABEL ;
++      if (UseBiasedLocking) {
++        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
++      }
++      // classic stack-locking code ...
++      ld(tmpReg, Address(boxReg, 0)) ;
++      beq(tmpReg, R0, DONE_LABEL) ;
++      move(AT, 0x1);  // delay slot
++
++      cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
++      bind(DONE_LABEL);
++    } else {
++      Label CheckSucc;
++
++      // Critically, the biased locking test must have precedence over
++      // and appear before the (box->dhw == 0) recursive stack-lock test.
++      if (UseBiasedLocking && !UseOptoBiasInlining) {
++        Label succ, fail;
++        biased_locking_exit(objReg, tmpReg, succ);
++        b(fail);
++        delayed()->nop();
++        bind(succ);
++        b(DONE);
++        delayed()->ori(resReg, R0, 1);
++        bind(fail);
++      }
++
++      ld(tmpReg, Address(boxReg, 0)); // Examine the displaced header
++      beq(tmpReg, R0, DONE_SET); // 0 indicates recursive stack-lock
++      delayed()->sltiu(AT, tmpReg, 1);
++
++      ld(tmpReg, Address(objReg, 0)); // Examine the object's markword
++      andi(AT, tmpReg, markOopDesc::monitor_value);
++      beq(AT, R0, Stacked); // Inflated?
++      delayed()->nop();
++
++      bind(Inflated);
++      // It's inflated.
++      // Despite our balanced locking property we still check that m->_owner == Self
++      // as java routines or native JNI code called by this thread might
++      // have released the lock.
++      // Refer to the comments in synchronizer.cpp for how we might encode extra
++      // state in _succ so we can avoid fetching EntryList|cxq.
++      //
++      // I'd like to add more cases in fast_lock() and fast_unlock() --
++      // such as recursive enter and exit -- but we have to be wary of
++      // I$ bloat, T$ effects and BP$ effects.
++      //
++      // If there's no contention try a 1-0 exit.  That is, exit without
++      // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
++      // we detect and recover from the race that the 1-0 exit admits.
++      //
++      // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
++      // before it STs null into _owner, releasing the lock.  Updates
++      // to data protected by the critical section must be visible before
++      // we drop the lock (and thus before any other thread could acquire
++      // the lock and observe the fields protected by the lock).
++#ifndef OPT_THREAD
++      get_thread(TREG);
++#endif
++
++      // It's inflated
++      ld(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)) ;
++      xorr(scrReg, scrReg, TREG);
++
++      ld(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2)) ;
++      orr(scrReg, scrReg, AT);
++
++      bne(scrReg, R0, DONE_SET);
++      delayed()->ori(AT, R0, 0);
++
++      ld(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2));
++      ld(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2));
++      orr(scrReg, scrReg, AT);
++
++      bne(scrReg, R0, DONE_SET);
++      delayed()->ori(AT, R0, 0);
++
++      sync();
++      sd(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++      b(DONE);
++      delayed()->ori(resReg, R0, 1);
++
++      bind(Stacked);
++      ld(tmpReg, Address(boxReg, 0));
++      cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
++
++      bind(DONE_SET);
++      move(resReg, AT);
++
++      if (EmitSync & 65536) {
++        bind (CheckSucc);
++      }
++
++      bind(DONE);
++
++      // Avoid branch to branch on AMD processors
++      if (EmitSync & 32768) { nop() ; }
++    }
++}
++#endif // COMPILER2
++
++void MacroAssembler::align(int modulus) {
++  while (offset() % modulus != 0) nop();
++}
++
++
++void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
++  //Unimplemented();
++}
++
++Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
++Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
++
++//In MIPS64, F0~23 are all caller-saved registers
++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
++
++// We preserve all caller-saved register
++void  MacroAssembler::pushad(){
++  int i;
++
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++)
++  {
++    sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++)
++  {
++    sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++};
++
++void  MacroAssembler::popad(){
++  int i;
++
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++};
++
++// We preserve all caller-saved register except V0
++void MacroAssembler::pushad_except_v0() {
++  int i;
++
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++}
++
++void MacroAssembler::popad_except_v0() {
++  int i;
++
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++) {
++    ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  for (i = 0; i < len; i++) {
++    ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++}
++
++void MacroAssembler::push2(Register reg1, Register reg2) {
++  daddiu(SP, SP, -16);
++  sd(reg1, SP, 8);
++  sd(reg2, SP, 0);
++}
++
++void MacroAssembler::pop2(Register reg1, Register reg2) {
++  ld(reg1, SP, 8);
++  ld(reg2, SP, 0);
++  daddiu(SP, SP, 16);
++}
++
++// for UseCompressedOops Option
++void MacroAssembler::load_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++    decode_klass_not_null(dst);
++  } else
++  ld(dst, src, oopDesc::klass_offset_in_bytes());
++}
++
++void MacroAssembler::store_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    encode_klass_not_null(src);
++    sw(src, dst, oopDesc::klass_offset_in_bytes());
++  } else {
++    sd(src, dst, oopDesc::klass_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::load_prototype_header(Register dst, Register src) {
++  load_klass(dst, src);
++  ld(dst, Address(dst, Klass::prototype_header_offset()));
++}
++
++void MacroAssembler::store_klass_gap(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
++                                    Register tmp1, Register thread_tmp) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  } else {
++    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  }
++}
++
++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
++                                     Register tmp1, Register tmp2) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2);
++  } else {
++    bs->store_at(this, decorators, type, dst, src, tmp1, tmp2);
++  }
++}
++
++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
++                                   Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
++}
++
++// Doesn't do verfication, generates fixed size code
++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
++                                            Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
++}
++
++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
++                                    Register tmp2, DecoratorSet decorators) {
++  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
++}
++
++// Used for storing NULLs.
++void MacroAssembler::store_heap_oop_null(Address dst) {
++  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
++}
++
++#ifdef ASSERT
++void MacroAssembler::verify_heapbase(const char* msg) {
++  assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++}
++#endif
++
++
++// Algorithm must match oop.inline.hpp encode_heap_oop.
++void MacroAssembler::encode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shr(r, LogMinObjAlignmentInBytes);
++    }
++    return;
++  }
++
++  movz(r, S5_heapbase, r);
++  dsubu(r, r, S5_heapbase);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      dsrl(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) move(dst, src);
++    }
++  } else {
++    if (dst == src) {
++      movz(dst, S5_heapbase, dst);
++      dsubu(dst, dst, S5_heapbase);
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        shr(dst, LogMinObjAlignmentInBytes);
++      }
++    } else {
++      dsubu(dst, src, S5_heapbase);
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        shr(dst, LogMinObjAlignmentInBytes);
++      }
++      movz(dst, R0, src);
++    }
++  }
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register r) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(r, R0, ok);
++    delayed()->nop();
++    stop("null oop passed to encode_heap_oop_not_null");
++    bind(ok);
++  }
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop_not_null");
++  if (Universe::narrow_oop_base() != NULL) {
++    dsubu(r, r, S5_heapbase);
++  }
++  if (Universe::narrow_oop_shift() != 0) {
++    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(src, R0, ok);
++    delayed()->nop();
++    stop("null oop passed to encode_heap_oop_not_null2");
++    bind(ok);
++  }
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
++
++  if (Universe::narrow_oop_base() != NULL) {
++    dsubu(dst, src, S5_heapbase);
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shr(dst, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      dsrl(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) move(dst, src);
++    }
++  }
++}
++
++void  MacroAssembler::decode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    move(AT, r);
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++    daddu(r, r, S5_heapbase);
++    movz(r, R0, AT);
++  }
++  verify_oop(r, "broken oop in decode_heap_oop");
++}
++
++void  MacroAssembler::decode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      if (dst != src) nop(); // DON'T DELETE THIS GUY.
++      dsll(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) move(dst, src);
++    }
++  } else {
++    if (dst == src) {
++      move(AT, dst);
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        shl(dst, LogMinObjAlignmentInBytes);
++      }
++      daddu(dst, dst, S5_heapbase);
++      movz(dst, R0, AT);
++    } else {
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        dsll(dst, src, LogMinObjAlignmentInBytes);
++        daddu(dst, dst, S5_heapbase);
++      } else {
++        daddu(dst, src, S5_heapbase);
++      }
++      movz(dst, R0, src);
++    }
++  }
++  verify_oop(dst, "broken oop in decode_heap_oop");
++}
++
++void  MacroAssembler::decode_heap_oop_not_null(Register r) {
++  // Note: it will change flags
++  assert (UseCompressedOops, "should only be used for compressed headers");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shl(r, LogMinObjAlignmentInBytes);
++    if (Universe::narrow_oop_base() != NULL) {
++      daddu(r, r, S5_heapbase);
++    }
++  } else {
++    assert (Universe::narrow_oop_base() == NULL, "sanity");
++  }
++}
++
++void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
++  assert (UseCompressedOops, "should only be used for compressed headers");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (LogMinObjAlignmentInBytes == Address::times_8) {
++      dsll(dst, src, LogMinObjAlignmentInBytes);
++      daddu(dst, dst, S5_heapbase);
++    } else {
++      dsll(dst, src, LogMinObjAlignmentInBytes);
++      if (Universe::narrow_oop_base() != NULL) {
++        daddu(dst, dst, S5_heapbase);
++      }
++    }
++  } else {
++    assert (Universe::narrow_oop_base() == NULL, "sanity");
++    if (dst != src) {
++      move(dst, src);
++    }
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register r) {
++  if (Universe::narrow_klass_base() != NULL) {
++    assert(r != AT, "Encoding a klass in AT");
++    set64(AT, (int64_t)Universe::narrow_klass_base());
++    dsubu(r, r, AT);
++  }
++  if (Universe::narrow_klass_shift() != 0) {
++    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    shr(r, LogKlassAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
++  if (dst == src) {
++    encode_klass_not_null(src);
++  } else {
++    if (Universe::narrow_klass_base() != NULL) {
++      set64(dst, (int64_t)Universe::narrow_klass_base());
++      dsubu(dst, src, dst);
++      if (Universe::narrow_klass_shift() != 0) {
++        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        shr(dst, LogKlassAlignmentInBytes);
++      }
++    } else {
++      if (Universe::narrow_klass_shift() != 0) {
++        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        dsrl(dst, src, LogKlassAlignmentInBytes);
++      } else {
++        move(dst, src);
++      }
++    }
++  }
++}
++
++// Function instr_size_for_decode_klass_not_null() counts the instructions
++// generated by decode_klass_not_null(register r) and reinit_heapbase(),
++// when (Universe::heap() != NULL).  Hence, if the instructions they
++// generate change, then this method needs to be updated.
++int MacroAssembler::instr_size_for_decode_klass_not_null() {
++  assert (UseCompressedClassPointers, "only for compressed klass ptrs");
++  if (Universe::narrow_klass_base() != NULL) {
++    // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
++    return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
++  } else {
++    // longest load decode klass function, mov64, leaq
++    return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
++  }
++}
++
++void  MacroAssembler::decode_klass_not_null(Register r) {
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++  assert(r != AT, "Decoding a klass in AT");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_klass_shift() != 0) {
++    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    shl(r, LogKlassAlignmentInBytes);
++  }
++  if (Universe::narrow_klass_base() != NULL) {
++    set64(AT, (int64_t)Universe::narrow_klass_base());
++    daddu(r, r, AT);
++    //Not neccessary for MIPS at all.
++    //reinit_heapbase();
++  }
++}
++
++void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++
++  if (dst == src) {
++    decode_klass_not_null(dst);
++  } else {
++    // Cannot assert, unverified entry point counts instructions (see .ad file)
++    // vtableStubs also counts instructions in pd_code_size_limit.
++    // Also do not verify_oop as this is called by verify_oop.
++    set64(dst, (int64_t)Universe::narrow_klass_base());
++    if (Universe::narrow_klass_shift() != 0) {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
++      dsll(AT, src, Address::times_8);
++      daddu(dst, dst, AT);
++    } else {
++      daddu(dst, src, dst);
++    }
++  }
++}
++
++void MacroAssembler::incrementl(Register reg, int value) {
++  if (value == min_jint) {
++     move(AT, value);
++     addu32(reg, reg, AT);
++     return;
++  }
++  if (value <  0) { decrementl(reg, -value); return; }
++  if (value == 0) {                        ; return; }
++
++  move(AT, value);
++  addu32(reg, reg, AT);
++}
++
++void MacroAssembler::decrementl(Register reg, int value) {
++  if (value == min_jint) {
++     move(AT, value);
++     subu32(reg, reg, AT);
++     return;
++  }
++  if (value <  0) { incrementl(reg, -value); return; }
++  if (value == 0) {                        ; return; }
++
++  move(AT, value);
++  subu32(reg, reg, AT);
++}
++
++void MacroAssembler::reinit_heapbase() {
++  if (UseCompressedOops || UseCompressedClassPointers) {
++    if (Universe::heap() != NULL) {
++      if (Universe::narrow_oop_base() == NULL) {
++        move(S5_heapbase, R0);
++      } else {
++        set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
++      }
++    } else {
++      set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
++      ld(S5_heapbase, S5_heapbase, 0);
++    }
++  }
++}
++
++void MacroAssembler::check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success) {
++//implement ind   gen_subtype_check
++  Label L_failure;
++  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
++  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
++  bind(L_failure);
++}
++
++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   Label* L_slow_path,
++                                        RegisterOrConstant super_check_offset) {
++  assert_different_registers(sub_klass, super_klass, temp_reg);
++  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
++  if (super_check_offset.is_register()) {
++    assert_different_registers(sub_klass, super_klass,
++                               super_check_offset.as_register());
++  } else if (must_load_sco) {
++    assert(temp_reg != noreg, "supply either a temp or a register offset");
++  }
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  int sco_offset = in_bytes(Klass::super_check_offset_offset());
++  // If the pointers are equal, we are done (e.g., String[] elements).
++  // This self-check enables sharing of secondary supertype arrays among
++  // non-primary types such as array-of-interface.  Otherwise, each such
++  // type would need its own customized SSA.
++  // We move this check to the front of the fast path because many
++  // type checks are in fact trivially successful in this manner,
++  // so we get a nicely predicted branch right at the start of the check.
++  beq(sub_klass, super_klass, *L_success);
++  delayed()->nop();
++  // Check the supertype display:
++  if (must_load_sco) {
++    lwu(temp_reg, super_klass, sco_offset);
++    super_check_offset = RegisterOrConstant(temp_reg);
++  }
++  daddu(AT, sub_klass, super_check_offset.register_or_noreg());
++  ld(AT, AT, super_check_offset.constant_or_zero());
++
++  // This check has worked decisively for primary supers.
++  // Secondary supers are sought in the super_cache ('super_cache_addr').
++  // (Secondary supers are interfaces and very deeply nested subtypes.)
++  // This works in the same check above because of a tricky aliasing
++  // between the super_cache and the primary super display elements.
++  // (The 'super_check_addr' can address either, as the case requires.)
++  // Note that the cache is updated below if it does not help us find
++  // what we need immediately.
++  // So if it was a primary super, we can just fail immediately.
++  // Otherwise, it's the slow path for us (no success at this point).
++
++  if (super_check_offset.is_register()) {
++    beq(super_klass, AT, *L_success);
++    delayed()->nop();
++    addiu(AT, super_check_offset.as_register(), -sc_offset);
++    if (L_failure == &L_fallthrough) {
++      beq(AT, R0, *L_slow_path);
++      delayed()->nop();
++    } else {
++      bne_far(AT, R0, *L_failure);
++      delayed()->nop();
++      b(*L_slow_path);
++      delayed()->nop();
++    }
++  } else if (super_check_offset.as_constant() == sc_offset) {
++    // Need a slow path; fast failure is impossible.
++    if (L_slow_path == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++      delayed()->nop();
++    } else {
++      bne(super_klass, AT, *L_slow_path);
++      delayed()->nop();
++      b(*L_success);
++      delayed()->nop();
++    }
++  } else {
++    // No slow path; it's a fast decision.
++    if (L_failure == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++      delayed()->nop();
++    } else {
++      bne_far(super_klass, AT, *L_failure);
++      delayed()->nop();
++      b(*L_success);
++      delayed()->nop();
++    }
++  }
++
++  bind(L_fallthrough);
++
++}
++
++
++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Register temp2_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   bool set_cond_codes) {
++  if (temp2_reg == noreg)
++    temp2_reg = TSR;
++  assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  // a couple of useful fields in sub_klass:
++  int ss_offset = in_bytes(Klass::secondary_supers_offset());
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  Address secondary_supers_addr(sub_klass, ss_offset);
++  Address super_cache_addr(     sub_klass, sc_offset);
++
++  // Do a linear scan of the secondary super-klass chain.
++  // This code is rarely used, so simplicity is a virtue here.
++  // The repne_scan instruction uses fixed registers, which we must spill.
++  // Don't worry too much about pre-existing connections with the input regs.
++
++#ifndef PRODUCT
++  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
++  ExternalAddress pst_counter_addr((address) pst_counter);
++#endif //PRODUCT
++
++  // We will consult the secondary-super array.
++  ld(temp_reg, secondary_supers_addr);
++  // Load the array length.
++  lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
++  // Skip to start of data.
++  daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
++
++  // OpenJDK8 never compresses klass pointers in secondary-super array.
++  Label Loop, subtype;
++  bind(Loop);
++  beq(temp2_reg, R0, *L_failure);
++  delayed()->nop();
++  ld(AT, temp_reg, 0);
++  beq(AT, super_klass, subtype);
++  delayed()->daddiu(temp_reg, temp_reg, 1 * wordSize);
++  b(Loop);
++  delayed()->daddiu(temp2_reg, temp2_reg, -1);
++
++  bind(subtype);
++  sd(super_klass, super_cache_addr);
++  if (L_success != &L_fallthrough) {
++    b(*L_success);
++    delayed()->nop();
++  }
++
++  // Success.  Cache the super we found and proceed in triumph.
++#undef IS_A_TEMP
++
++  bind(L_fallthrough);
++}
++
++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
++  ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
++  sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
++  verify_oop(oop_result, "broken oop in call_VM_base");
++}
++
++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
++  ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
++  sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
++}
++
++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
++                                         int extra_slot_offset) {
++  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
++  int stackElementSize = Interpreter::stackElementSize;
++  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
++#ifdef ASSERT
++  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
++  assert(offset1 - offset == stackElementSize, "correct arithmetic");
++#endif
++  Register             scale_reg    = NOREG;
++  Address::ScaleFactor scale_factor = Address::no_scale;
++  if (arg_slot.is_constant()) {
++    offset += arg_slot.as_constant() * stackElementSize;
++  } else {
++    scale_reg    = arg_slot.as_register();
++    scale_factor = Address::times_8;
++  }
++  // We don't push RA on stack in prepare_invoke.
++  //  offset += wordSize;           // return PC is on stack
++  if(scale_reg==NOREG) return Address(SP, offset);
++  else {
++  dsll(scale_reg, scale_reg, scale_factor);
++  daddu(scale_reg, SP, scale_reg);
++  return Address(scale_reg, offset);
++  }
++}
++
++SkipIfEqual::~SkipIfEqual() {
++  _masm->bind(_label);
++}
++
++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
++  switch (size_in_bytes) {
++  case  8:  ld(dst, src); break;
++  case  4:  lw(dst, src); break;
++  case  2:  is_signed ? lh(dst, src) : lhu(dst, src); break;
++  case  1:  is_signed ? lb( dst, src) : lbu( dst, src); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
++  switch (size_in_bytes) {
++  case  8:  sd(src, dst); break;
++  case  4:  sw(src, dst); break;
++  case  2:  sh(src, dst); break;
++  case  1:  sb(src, dst); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++// Look up the method for a megamorphic invokeinterface call.
++// The target method is determined by <intf_klass, itable_index>.
++// The receiver klass is in recv_klass.
++// On success, the result will be in method_result, and execution falls through.
++// On failure, execution transfers to the given label.
++void MacroAssembler::lookup_interface_method(Register recv_klass,
++                                             Register intf_klass,
++                                             RegisterOrConstant itable_index,
++                                             Register method_result,
++                                             Register scan_temp,
++                                             Label& L_no_such_interface,
++                                             bool return_method) {
++  assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
++  assert_different_registers(method_result, intf_klass, scan_temp, AT);
++  assert(recv_klass != method_result || !return_method,
++         "recv_klass can be destroyed when method isn't needed");
++
++  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
++         "caller must use same register for non-constant itable index as for method");
++
++  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
++  int vtable_base = in_bytes(Klass::vtable_start_offset());
++  int itentry_off = itableMethodEntry::method_offset_in_bytes();
++  int scan_step   = itableOffsetEntry::size() * wordSize;
++  int vte_size    = vtableEntry::size() * wordSize;
++  Address::ScaleFactor times_vte_scale = Address::times_ptr;
++  assert(vte_size == wordSize, "else adjust times_vte_scale");
++
++  lw(scan_temp, Address(recv_klass, Klass::vtable_length_offset()));
++
++  // %%% Could store the aligned, prescaled offset in the klassoop.
++  dsll(scan_temp, scan_temp, times_vte_scale);
++  daddu(scan_temp, recv_klass, scan_temp);
++  daddiu(scan_temp, scan_temp, vtable_base);
++  if (HeapWordsPerLong > 1) {
++    // Round up to align_object_offset boundary
++    // see code for InstanceKlass::start_of_itable!
++    round_to(scan_temp, BytesPerLong);
++  }
++
++  if (return_method) {
++    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
++    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++    if (itable_index.is_constant()) {
++      set64(AT, (int)itable_index.is_constant());
++      dsll(AT, AT, (int)Address::times_ptr);
++    } else {
++      dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
++    }
++    daddu(AT, AT, recv_klass);
++    daddiu(recv_klass, AT, itentry_off);
++  }
++
++  Label search, found_method;
++
++  for (int peel = 1; peel >= 0; peel--) {
++    ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
++
++    if (peel) {
++      beq(intf_klass, method_result, found_method);
++      delayed()->nop();
++    } else {
++      bne(intf_klass, method_result, search);
++      delayed()->nop();
++      // (invert the test to fall through to found_method...)
++    }
++
++    if (!peel)  break;
++
++    bind(search);
++
++    // Check that the previous entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    beq(method_result, R0, L_no_such_interface);
++    delayed()->nop();
++    daddiu(scan_temp, scan_temp, scan_step);
++  }
++
++  bind(found_method);
++
++  if (return_method) {
++    // Got a hit.
++    lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
++    if (UseLEXT1) {
++      gsldx(method_result, recv_klass, scan_temp, 0);
++    } else {
++      daddu(AT, recv_klass, scan_temp);
++      ld(method_result, AT, 0);
++    }
++  }
++}
++
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++                                           RegisterOrConstant vtable_index,
++                                           Register method_result) {
++  Register tmp = GP;
++  push(tmp);
++
++  if (vtable_index.is_constant()) {
++    assert_different_registers(recv_klass, method_result, tmp);
++  } else {
++    assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
++  }
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
++  if (vtable_index.is_constant()) {
++    set64(AT, vtable_index.as_constant());
++    dsll(AT, AT, (int)Address::times_ptr);
++  } else {
++    dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
++  }
++  set64(tmp, base + vtableEntry::method_offset_in_bytes());
++  daddu(tmp, tmp, AT);
++  daddu(tmp, tmp, recv_klass);
++  ld(method_result, tmp, 0);
++
++  pop(tmp);
++}
++
++void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
++  switch (type) {
++    case T_LONG:
++      st_ptr(src_reg, tmp_reg, disp);
++      break;
++    case T_ARRAY:
++    case T_OBJECT:
++      if (UseCompressedOops && !wide) {
++        sw(src_reg, tmp_reg, disp);
++      } else {
++        st_ptr(src_reg, tmp_reg, disp);
++      }
++      break;
++    case T_ADDRESS:
++      st_ptr(src_reg, tmp_reg, disp);
++      break;
++    case T_INT:
++      sw(src_reg, tmp_reg, disp);
++      break;
++    case T_CHAR:
++    case T_SHORT:
++      sh(src_reg, tmp_reg, disp);
++      break;
++    case T_BYTE:
++    case T_BOOLEAN:
++      sb(src_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) {
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  }
++}
++
++void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) {
++  switch (type) {
++    case T_DOUBLE:
++      sdc1(src_reg, tmp_reg, disp);
++      break;
++    case T_FLOAT:
++      swc1(src_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) {
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
++  }
++}
++
++void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
++  switch (type) {
++    case T_LONG:
++      ld_ptr(dst_reg, tmp_reg, disp);
++      break;
++    case T_ARRAY:
++    case T_OBJECT:
++      if (UseCompressedOops && !wide) {
++        lwu(dst_reg, tmp_reg, disp);
++      } else {
++        ld_ptr(dst_reg, tmp_reg, disp);
++      }
++      break;
++    case T_ADDRESS:
++      if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) {
++        lwu(dst_reg, tmp_reg, disp);
++      } else {
++        ld_ptr(dst_reg, tmp_reg, disp);
++      }
++      break;
++    case T_INT:
++      lw(dst_reg, tmp_reg, disp);
++      break;
++    case T_CHAR:
++      lhu(dst_reg, tmp_reg, disp);
++      break;
++    case T_SHORT:
++      lh(dst_reg, tmp_reg, disp);
++      break;
++    case T_BYTE:
++    case T_BOOLEAN:
++      lb(dst_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) {
++  int code_offset = 0;
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  }
++
++  return code_offset;
++}
++
++#ifdef COMPILER2
++// Compare strings, used for char[] and byte[].
++void MacroAssembler::string_compare(Register str1, Register str2,
++                                    Register cnt1, Register cnt2, Register result,
++                                    int ae) {
++  Label L, Loop, haveResult, done;
++
++  bool isLL = ae == StrIntrinsicNode::LL;
++  bool isLU = ae == StrIntrinsicNode::LU;
++  bool isUL = ae == StrIntrinsicNode::UL;
++
++  bool str1_isL = isLL || isLU;
++  bool str2_isL = isLL || isUL;
++
++  if (!str1_isL) srl(cnt1, cnt1, 1);
++  if (!str2_isL) srl(cnt2, cnt2, 1);
++
++  // compute the and difference of lengths (in result)
++  subu(result, cnt1, cnt2); // result holds the difference of two lengths
++
++  // compute the shorter length (in cnt1)
++  slt(AT, cnt2, cnt1);
++  movn(cnt1, cnt2, AT);
++
++  // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register
++  bind(Loop);                        // Loop begin
++  beq(cnt1, R0, done);
++  if (str1_isL) {
++    delayed()->lbu(AT, str1, 0);
++  } else {
++    delayed()->lhu(AT, str1, 0);
++  }
++
++  // compare current character
++  if (str2_isL) {
++    lbu(cnt2, str2, 0);
++  } else {
++    lhu(cnt2, str2, 0);
++  }
++  bne(AT, cnt2, haveResult);
++  delayed()->addiu(str1, str1, str1_isL ? 1 : 2);
++  addiu(str2, str2, str2_isL ? 1 : 2);
++  b(Loop);
++  delayed()->addiu(cnt1, cnt1, -1);   // Loop end
++
++  bind(haveResult);
++  subu(result, AT, cnt2);
++
++  bind(done);
++}
++
++// Compare char[] or byte[] arrays or substrings.
++void MacroAssembler::arrays_equals(Register str1, Register str2,
++                                   Register cnt, Register tmp, Register result,
++                                   bool is_char) {
++  Label Loop, True, False;
++
++  beq(str1, str2, True);  // same char[] ?
++  delayed()->daddiu(result, R0, 1);
++
++  beq(cnt, R0, True);
++  delayed()->nop(); // count == 0
++
++  bind(Loop);
++
++  // compare current character
++  if (is_char) {
++    lhu(AT, str1, 0);
++    lhu(tmp, str2, 0);
++  } else {
++    lbu(AT, str1, 0);
++    lbu(tmp, str2, 0);
++  }
++  bne(AT, tmp, False);
++  delayed()->addiu(str1, str1, is_char ? 2 : 1);
++  addiu(cnt, cnt, -1);
++  bne(cnt, R0, Loop);
++  delayed()->addiu(str2, str2, is_char ? 2 : 1);
++
++  b(True);
++  delayed()->nop();
++
++  bind(False);
++  daddiu(result, R0, 0);
++
++  bind(True);
++}
++#endif // COMPILER2
++
++void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) {
++  switch (type) {
++    case T_DOUBLE:
++      ldc1(dst_reg, tmp_reg, disp);
++      break;
++    case T_FLOAT:
++      lwc1(dst_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) {
++  int code_offset = 0;
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
++  }
++
++  return code_offset;
++}
++
++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
++  const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
++  STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
++  // The inverted mask is sign-extended
++  move(AT, inverted_jweak_mask);
++  andr(possibly_jweak, AT, possibly_jweak);
++}
++
++void MacroAssembler::resolve_jobject(Register value,
++                                     Register thread,
++                                     Register tmp) {
++  assert_different_registers(value, thread, tmp);
++  Label done, not_weak;
++  beq(value, R0, done);                // Use NULL as-is.
++  delayed()->nop();
++  move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag.
++  andr(AT, value, AT);
++  beq(AT, R0, not_weak);
++  delayed()->nop();
++  // Resolve jweak.
++  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
++                 value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);
++  verify_oop(value);
++  b(done);
++  delayed()->nop();
++  bind(not_weak);
++  // Resolve (untagged) jobject.
++  access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
++  verify_oop(value);
++  bind(done);
++}
++
++void MacroAssembler::cmp_cmov(Register  op1,
++                              Register  op2,
++                              Register  dst,
++                              Register  src,
++                              CMCompare cmp,
++                              bool      is_signed) {
++  switch (cmp) {
++    case EQ:
++      subu(AT, op1, op2);
++      movz(dst, src, AT);
++      break;
++
++    case NE:
++      subu(AT, op1, op2);
++      movn(dst, src, AT);
++      break;
++
++    case GT:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      movn(dst, src, AT);
++      break;
++
++    case GE:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      movz(dst, src, AT);
++      break;
++
++    case LT:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      movn(dst, src, AT);
++      break;
++
++    case LE:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      movz(dst, src, AT);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              Register      dst,
++                              Register      src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  switch(cmp) {
++    case EQ:
++      if (is_float) {
++        c_eq_s(op1, op2);
++      } else {
++        c_eq_d(op1, op2);
++      }
++      movt(dst, src);
++      break;
++
++    case NE:
++      if (is_float) {
++        c_eq_s(op1, op2);
++      } else {
++        c_eq_d(op1, op2);
++      }
++      movf(dst, src);
++      break;
++
++    case GT:
++      if (is_float) {
++        c_ule_s(op1, op2);
++      } else {
++        c_ule_d(op1, op2);
++      }
++      movf(dst, src);
++      break;
++
++    case GE:
++      if (is_float) {
++        c_ult_s(op1, op2);
++      } else {
++        c_ult_d(op1, op2);
++      }
++      movf(dst, src);
++      break;
++
++    case LT:
++      if (is_float) {
++        c_ult_s(op1, op2);
++      } else {
++        c_ult_d(op1, op2);
++      }
++      movt(dst, src);
++      break;
++
++    case LE:
++      if (is_float) {
++        c_ule_s(op1, op2);
++      } else {
++        c_ule_d(op1, op2);
++      }
++      movt(dst, src);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  switch(cmp) {
++    case EQ:
++      if (!is_float) {
++        c_eq_d(op1, op2);
++        movt_d(dst, src);
++      } else {
++        c_eq_s(op1, op2);
++        movt_s(dst, src);
++      }
++      break;
++
++    case NE:
++      if (!is_float) {
++        c_eq_d(op1, op2);
++        movf_d(dst, src);
++      } else {
++        c_eq_s(op1, op2);
++        movf_s(dst, src);
++      }
++      break;
++
++    case GT:
++      if (!is_float) {
++        c_ule_d(op1, op2);
++        movf_d(dst, src);
++      } else {
++        c_ule_s(op1, op2);
++        movf_s(dst, src);
++      }
++      break;
++
++    case GE:
++      if (!is_float) {
++        c_ult_d(op1, op2);
++        movf_d(dst, src);
++      } else {
++        c_ult_s(op1, op2);
++        movf_s(dst, src);
++      }
++      break;
++
++    case LT:
++      if (!is_float) {
++        c_ult_d(op1, op2);
++        movt_d(dst, src);
++      } else {
++        c_ult_s(op1, op2);
++        movt_s(dst, src);
++      }
++      break;
++
++    case LE:
++      if (!is_float) {
++        c_ule_d(op1, op2);
++        movt_d(dst, src);
++      } else {
++        c_ule_s(op1, op2);
++        movt_s(dst, src);
++      }
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(Register      op1,
++                              Register      op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  Label L;
++
++  switch(cmp) {
++    case EQ:
++      bne(op1, op2, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case NE:
++      beq(op1, op2, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case GT:
++      slt(AT, op2, op1);
++      beq(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case GE:
++      slt(AT, op1, op2);
++      bne(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case LT:
++      slt(AT, op1, op2);
++      beq(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case LE:
++      slt(AT, op2, op1);
++      bne(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::gs_loadstore(Register reg, Register base, Register index, int disp, int type) {
++  switch (type) {
++    case STORE_BYTE:
++      gssbx(reg, base, index, disp);
++      break;
++    case STORE_CHAR:
++    case STORE_SHORT:
++      gsshx(reg, base, index, disp);
++      break;
++    case STORE_INT:
++      gsswx(reg, base, index, disp);
++      break;
++    case STORE_LONG:
++      gssdx(reg, base, index, disp);
++      break;
++    case LOAD_BYTE:
++      gslbx(reg, base, index, disp);
++      break;
++    case LOAD_SHORT:
++      gslhx(reg, base, index, disp);
++      break;
++    case LOAD_INT:
++      gslwx(reg, base, index, disp);
++      break;
++    case LOAD_LONG:
++      gsldx(reg, base, index, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:
++      gsswxc1(reg, base, index, disp);
++      break;
++    case STORE_DOUBLE:
++      gssdxc1(reg, base, index, disp);
++      break;
++    case LOAD_FLOAT:
++      gslwxc1(reg, base, index, disp);
++      break;
++    case LOAD_DOUBLE:
++      gsldxc1(reg, base, index, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_BYTE:
++      sb(reg, base, disp);
++      break;
++    case STORE_CHAR:
++    case STORE_SHORT:
++      sh(reg, base, disp);
++      break;
++    case STORE_INT:
++      sw(reg, base, disp);
++      break;
++    case STORE_LONG:
++      sd(reg, base, disp);
++      break;
++    case LOAD_BYTE:
++      lb(reg, base, disp);
++      break;
++    case LOAD_U_BYTE:
++      lbu(reg, base, disp);
++      break;
++    case LOAD_SHORT:
++      lh(reg, base, disp);
++      break;
++    case LOAD_U_SHORT:
++      lhu(reg, base, disp);
++      break;
++    case LOAD_INT:
++      lw(reg, base, disp);
++      break;
++    case LOAD_U_INT:
++      lwu(reg, base, disp);
++      break;
++    case LOAD_LONG:
++      ld(reg, base, disp);
++      break;
++    case LOAD_LINKED_LONG:
++      lld(reg, base, disp);
++      break;
++     default:
++       ShouldNotReachHere();
++    }
++}
++
++void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:
++      swc1(reg, base, disp);
++      break;
++    case STORE_DOUBLE:
++      sdc1(reg, base, disp);
++      break;
++    case LOAD_FLOAT:
++      lwc1(reg, base, disp);
++      break;
++    case LOAD_DOUBLE:
++      ldc1(reg, base, disp);
++      break;
++     default:
++       ShouldNotReachHere();
++    }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/macroAssembler_mips.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.hpp
+--- a/src/hotspot/cpu/mips/macroAssembler_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/macroAssembler_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,818 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
++
++#include "asm/assembler.hpp"
++#include "runtime/rtmLocking.hpp"
++#include "utilities/macros.hpp"
++
++// MacroAssembler extends Assembler by frequently used macros.
++//
++// Instructions for which a 'better' code sequence exists depending
++// on arguments should also go in here.
++
++class MacroAssembler: public Assembler {
++  friend class LIR_Assembler;
++  friend class Runtime1;      // as_Address()
++
++ public:
++  // Compare code
++  typedef enum {
++    EQ = 0x01,
++    NE = 0x02,
++    GT = 0x03,
++    GE = 0x04,
++    LT = 0x05,
++    LE = 0x06
++  } CMCompare;
++
++ protected:
++
++  // Support for VM calls
++  //
++  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  #define VIRTUAL virtual
++
++  VIRTUAL void call_VM_leaf_base(
++    address entry_point,               // the entry point
++    int     number_of_arguments        // the number of arguments to pop after the call
++  );
++
++  // This is the base routine called by the different versions of call_VM. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  //
++  // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base
++  // returns the register which contains the thread upon return. If a thread register has been
++  // specified, the return value will correspond to that register. If no last_java_sp is specified
++  // (noreg) than sp will be used instead.
++  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
++    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
++    Register java_thread,              // the thread if computed before     ; use noreg otherwise
++    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
++    address  entry_point,              // the entry point
++    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
++    bool     check_exceptions          // whether to check for pending exceptions after return
++  );
++
++  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
++
++  // helpers for FPU flag access
++  // tmp is a temporary register, if none is available use noreg
++
++ public:
++  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
++
++  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
++  // The implementation is only non-empty for the InterpreterMacroAssembler,
++  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  Address as_Address(AddressLiteral adr);
++  Address as_Address(ArrayAddress adr);
++
++  static intptr_t  i[32];
++  static float  f[32];
++  static void print(outputStream *s);
++
++  static int i_offset(unsigned int k);
++  static int f_offset(unsigned int k);
++
++  static void save_registers(MacroAssembler *masm);
++  static void restore_registers(MacroAssembler *masm);
++
++  // Support for NULL-checks
++  //
++  // Generates code that causes a NULL OS exception if the content of reg is NULL.
++  // If the accessed location is M[reg + offset] and the offset is known, provide the
++  // offset. No explicit code generation is needed if the offset is within a certain
++  // range (0 <= offset <= page_size).
++
++  void null_check(Register reg, int offset = -1);
++  static bool needs_explicit_null_check(intptr_t offset);
++
++  // Required platform-specific helpers for Label::patch_instructions.
++  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
++  void pd_patch_instruction(address branch, address target);
++
++  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
++
++  // Support for inc/dec with optimal instruction selection depending on value
++  void incrementl(Register reg, int value = 1);
++  void decrementl(Register reg, int value = 1);
++
++
++  // Alignment
++  void align(int modulus);
++
++
++  // Stack frame creation/removal
++  void enter();
++  void leave();
++
++  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
++  // The pointer will be loaded into the thread register.
++  void get_thread(Register thread);
++
++
++  // Support for VM calls
++  //
++  // It is imperative that all calls into the VM are handled via the call_VM macros.
++  // They make sure that the stack linkage is setup correctly. call_VM's correspond
++  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
++
++
++  void call_VM(Register oop_result,
++               address entry_point,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  // Overloadings with last_Java_sp
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               int number_of_arguments = 0,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, bool
++               check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  void get_vm_result  (Register oop_result, Register thread);
++  void get_vm_result_2(Register metadata_result, Register thread);
++  void call_VM_leaf(address entry_point,
++                    int number_of_arguments = 0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2, Register arg_3);
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void super_call_VM_leaf(address entry_point);
++  void super_call_VM_leaf(address entry_point, Register arg_1);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
++
++  // last Java Frame (fills frame anchor)
++  void set_last_Java_frame(Register thread,
++                           Register last_java_sp,
++                           Register last_java_fp,
++                           address last_java_pc);
++
++  // thread in the default location (S6)
++  void set_last_Java_frame(Register last_java_sp,
++                           Register last_java_fp,
++                           address last_java_pc);
++
++  void reset_last_Java_frame(Register thread, bool clear_fp);
++
++  // thread in the default location (S6)
++  void reset_last_Java_frame(bool clear_fp);
++
++  // jobjects
++  void clear_jweak_tag(Register possibly_jweak);
++  void resolve_jobject(Register value, Register thread, Register tmp);
++
++  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
++  void c2bool(Register x);
++
++  void resolve_oop_handle(Register result, Register tmp);
++  void load_mirror(Register dst, Register method, Register tmp);
++
++  // oop manipulations
++  void load_klass(Register dst, Register src);
++  void store_klass(Register dst, Register src);
++
++  void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
++                      Register tmp1, Register thread_tmp);
++  void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
++                       Register tmp1, Register tmp2);
++
++  void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
++                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
++                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
++                      Register tmp2 = noreg, DecoratorSet decorators = 0);
++
++  // Used for storing NULL. All other oop constants should be
++  // stored using routines that take a jobject.
++  void store_heap_oop_null(Address dst);
++
++  void load_prototype_header(Register dst, Register src);
++
++  void store_klass_gap(Register dst, Register src);
++
++  void encode_heap_oop(Register r);
++  void encode_heap_oop(Register dst, Register src);
++  void decode_heap_oop(Register r);
++  void decode_heap_oop(Register dst, Register src);
++  void encode_heap_oop_not_null(Register r);
++  void decode_heap_oop_not_null(Register r);
++  void encode_heap_oop_not_null(Register dst, Register src);
++  void decode_heap_oop_not_null(Register dst, Register src);
++
++  void encode_klass_not_null(Register r);
++  void decode_klass_not_null(Register r);
++  void encode_klass_not_null(Register dst, Register src);
++  void decode_klass_not_null(Register dst, Register src);
++
++  // Returns the byte size of the instructions generated by decode_klass_not_null()
++  // when compressed klass pointers are being used.
++  static int instr_size_for_decode_klass_not_null();
++
++  // if heap base register is used - reinit it with the correct value
++  void reinit_heapbase();
++
++  DEBUG_ONLY(void verify_heapbase(const char* msg);)
++
++  void set_narrow_klass(Register dst, Klass* k);
++  void set_narrow_oop(Register dst, jobject obj);
++
++
++
++
++  // Sign extension
++  void sign_extend_short(Register reg)   { /*dsll32(reg, reg, 16); dsra32(reg, reg, 16);*/ seh(reg, reg); }
++  void sign_extend_byte(Register reg)  { /*dsll32(reg, reg, 24); dsra32(reg, reg, 24);*/ seb(reg, reg); }
++  void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++  void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++
++  // allocation
++  void eden_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void tlab_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void incr_allocated_bytes(Register thread,
++                            Register var_size_in_bytes, int con_size_in_bytes,
++                            Register t1 = noreg);
++  // interface method calling
++  void lookup_interface_method(Register recv_klass,
++                               Register intf_klass,
++                               RegisterOrConstant itable_index,
++                               Register method_result,
++                               Register scan_temp,
++                               Label& no_such_interface,
++                               bool return_method = true);
++
++  // virtual method calling
++  void lookup_virtual_method(Register recv_klass,
++                             RegisterOrConstant vtable_index,
++                             Register method_result);
++
++  // Test sub_klass against super_klass, with fast and slow paths.
++
++  // The fast path produces a tri-state answer: yes / no / maybe-slow.
++  // One of the three labels can be NULL, meaning take the fall-through.
++  // If super_check_offset is -1, the value is loaded up from super_klass.
++  // No registers are killed, except temp_reg.
++  void check_klass_subtype_fast_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     Label* L_slow_path,
++                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
++
++  // The rest of the type check; must be wired to a corresponding fast path.
++  // It does not repeat the fast path logic, so don't use it standalone.
++  // The temp_reg and temp2_reg can be noreg, if no temps are available.
++  // Updates the sub's secondary super cache as necessary.
++  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
++  void check_klass_subtype_slow_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Register temp2_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     bool set_cond_codes = false);
++
++  // Simplified, combined version, good for typical uses.
++  // Falls through on failure.
++  void check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success);
++
++
++  // Debugging
++
++  // only if +VerifyOops
++  void verify_oop(Register reg, const char* s = "broken oop");
++  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
++  void verify_oop_subroutine();
++  // TODO: verify method and klass metadata (compare against vptr?)
++  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
++  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
++
++  #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
++  #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
++
++  // only if +VerifyFPU
++  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
++
++  // prints msg, dumps registers and stops execution
++  void stop(const char* msg);
++
++  // prints msg and continues
++  void warn(const char* msg);
++
++  static void debug(char* msg/*, RegistersForDebugging* regs*/);
++  static void debug64(char* msg, int64_t pc, int64_t regs[]);
++
++  void print_reg(Register reg);
++  void print_reg(FloatRegister reg);
++
++  void untested()                                { stop("untested"); }
++
++  void unimplemented(const char* what = "");
++
++  void should_not_reach_here()                   { stop("should not reach here"); }
++
++  void print_CPU_state();
++
++  // Stack overflow checking
++  void bang_stack_with_offset(int offset) {
++    // stack grows down, caller passes positive offset
++    assert(offset > 0, "must bang with negative offset");
++    if (offset <= 32768) {
++      sw(A0, SP, -offset);
++    } else {
++      li(AT, offset);
++      dsubu(AT, SP, AT);
++      sw(A0, AT, 0);
++    }
++  }
++
++  // Writes to stack successive pages until offset reached to check for
++  // stack overflow + shadow pages.  Also, clobbers tmp
++  void bang_stack_size(Register size, Register tmp);
++
++  // Check for reserved stack access in method being exited (for JIT)
++  void reserved_stack_check();
++
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset);
++
++  // Support for serializing memory accesses between threads
++  void serialize_memory(Register thread, Register tmp);
++
++  void safepoint_poll(Label& slow_path, Register thread_reg);
++  void safepoint_poll_acquire(Label& slow_path, Register thread_reg);
++
++  //void verify_tlab();
++  void verify_tlab(Register t1, Register t2);
++
++  // Biased locking support
++  // lock_reg and obj_reg must be loaded up with the appropriate values.
++  // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
++  // be killed; if not supplied, push/pop will be used internally to
++  // allocate a temporary (inefficient, avoid if possible).
++  // Optional slow case is for implementations (interpreter and C1) which branch to
++  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
++  // Returns offset of first potentially-faulting instruction for null
++  // check info (currently consumed only by C1). If
++  // swap_reg_contains_mark is true then returns -1 as it is assumed
++  // the calling code has already passed any potential faults.
++  int biased_locking_enter(Register lock_reg, Register obj_reg,
++                           Register swap_reg, Register tmp_reg,
++                           bool swap_reg_contains_mark,
++                           Label& done, Label* slow_case = NULL,
++                           BiasedLockingCounters* counters = NULL);
++  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
++#ifdef COMPILER2
++  void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr);
++  void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr);
++#endif
++
++
++  // Arithmetics
++  // Regular vs. d* versions
++  inline void addu_long(Register rd, Register rs, Register rt) {
++    daddu(rd, rs, rt);
++  }
++  inline void addu_long(Register rd, Register rs, long imm32_64) {
++    daddiu(rd, rs, imm32_64);
++  }
++
++  void round_to(Register reg, int modulus) {
++    assert_different_registers(reg, AT);
++    increment(reg, modulus - 1);
++    move(AT, - modulus);
++    andr(reg, reg, AT);
++  }
++
++  // the follow two might use AT register, be sure you have no meanful data in AT before you call them
++  void increment(Register reg, int imm);
++  void decrement(Register reg, int imm);
++
++  void shl(Register reg, int sa)        { dsll(reg, reg, sa); }
++  void shr(Register reg, int sa)        { dsrl(reg, reg, sa); }
++  void sar(Register reg, int sa)        { dsra(reg, reg, sa); }
++
++  // Helper functions for statistics gathering.
++  void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2);
++
++  // Calls
++  void call(address entry);
++  void call(address entry, relocInfo::relocType rtype);
++  void call(address entry, RelocationHolder& rh);
++
++  address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL);
++
++  // Emit the CompiledIC call idiom
++  void ic_call(address entry, jint method_index = 0);
++
++  // Jumps
++  void jmp(address entry);
++  void jmp(address entry, relocInfo::relocType rtype);
++  void jmp_far(Label& L); // always long jumps
++
++  /* branches may exceed 16-bit offset */
++  void b_far(address entry);
++  void b_far(Label& L);
++
++  void bne_far    (Register rs, Register rt, address entry);
++  void bne_far    (Register rs, Register rt, Label& L);
++
++  void beq_far    (Register rs, Register rt, address entry);
++  void beq_far    (Register rs, Register rt, Label& L);
++
++  // For C2 to support long branches
++  void beq_long   (Register rs, Register rt, Label& L);
++  void bne_long   (Register rs, Register rt, Label& L);
++  void bc1t_long  (Label& L);
++  void bc1f_long  (Label& L);
++
++  void patchable_call(address target);
++  void general_call(address target);
++
++  void patchable_jump(address target);
++  void general_jump(address target);
++
++  static int insts_for_patchable_call(address target);
++  static int insts_for_general_call(address target);
++
++  static int insts_for_patchable_jump(address target);
++  static int insts_for_general_jump(address target);
++
++  // Floating
++  // Data
++
++  // Load and store values by size and signed-ness
++  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
++  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
++
++  // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs
++  inline void ld_ptr(Register rt, Address a) {
++    ld(rt, a);
++  }
++
++  inline void ld_ptr(Register rt, Register base, int offset16) {
++    ld(rt, base, offset16);
++  }
++
++  // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs
++  inline void st_ptr(Register rt, Address a) {
++    sd(rt, a);
++  }
++
++  inline void st_ptr(Register rt, Register base, int offset16) {
++    sd(rt, base, offset16);
++  }
++
++  void ld_ptr(Register rt, Register base, Register offset);
++  void st_ptr(Register rt, Register base, Register offset);
++
++  // swap the two byte of the low 16-bit halfword
++  // this directive will use AT, be sure the high 16-bit of reg is zero
++  void hswap(Register reg);
++  void huswap(Register reg);
++
++  // convert big endian integer to little endian integer
++  void swap(Register reg);
++
++  // implement the x86 instruction semantic
++  // if c_reg == *dest then *dest <= x_reg
++  // else c_reg <= *dest
++  // the AT indicate if xchg occurred, 1 for xchged, else  0
++  void cmpxchg(Address addr, Register oldval, Register newval, Register resflag,
++               bool retold, bool barrier);
++  void cmpxchg(Address addr, Register oldval, Register newval, Register tmp,
++               bool retold, bool barrier, Label& succ, Label* fail = NULL);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag,
++                 bool sign, bool retold, bool barrier);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                 bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL);
++  void cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi);
++
++  //pop & push
++  void extend_sign(Register rh, Register rl) { stop("extend_sign"); }
++  void neg(Register reg) { dsubu(reg, R0, reg); }
++  void push (Register reg)      { daddiu(SP, SP, -8); sd  (reg, SP, 0); }
++  void push (FloatRegister reg) { daddiu(SP, SP, -8); sdc1(reg, SP, 0); }
++  void pop  (Register reg)      { ld  (reg, SP, 0);  daddiu(SP, SP, 8); }
++  void pop  (FloatRegister reg) { ldc1(reg, SP, 0);  daddiu(SP, SP, 8); }
++  void pop  ()                  { daddiu(SP, SP, 8); }
++  void pop2 ()                  { daddiu(SP, SP, 16); }
++  void push2(Register reg1, Register reg2);
++  void pop2 (Register reg1, Register reg2);
++  void dpush (Register reg)     { daddiu(SP, SP, -8); sd  (reg, SP, 0); }
++  void dpop  (Register reg)     { ld  (reg, SP, 0);  daddiu(SP, SP, 8); }
++  //we need 2 fun to save and resotre general register
++  void pushad();
++  void popad();
++  void pushad_except_v0();
++  void popad_except_v0();
++
++  //move an 32-bit immediate to Register
++  void move(Register reg, int imm32)  { li32(reg, imm32); }
++  void li  (Register rd, long imm);
++  void li  (Register rd, address addr) { li(rd, (long)addr); }
++  //replace move(Register reg, int imm)
++  void li32(Register rd, int imm32); // sign-extends to 64 bits on mips64
++  void set64(Register d, jlong value);
++  static int  insts_for_set64(jlong value);
++
++  void patchable_set48(Register d, jlong value);
++  void patchable_set32(Register d, jlong value);
++
++  void patchable_call32(Register d, jlong value);
++
++  static int call_size(address target, bool far, bool patchable);
++
++  static bool reachable_from_cache(address target);
++  static bool reachable_from_cache();
++
++
++  void dli(Register rd, long imm) { li(rd, imm); }
++  void li64(Register rd, long imm);
++  void li48(Register rd, long imm);
++
++  void move(Register rd, Register rs)   { daddu(rd, rs, R0); }
++  void move_u32(Register rd, Register rs)   { addu32(rd, rs, R0); }
++  void dmove(Register rd, Register rs)  { daddu(rd, rs, R0); }
++  void mov_metadata(Register dst, Metadata* obj);
++  void mov_metadata(Address dst, Metadata* obj);
++
++  void store_for_type_by_register(Register src_reg,      Register tmp_reg, int disp, BasicType type, bool wide);
++  void store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type);
++  void store_for_type(Register src_reg,      Address addr, BasicType type = T_INT, bool wide = false);
++  void store_for_type(FloatRegister src_reg, Address addr, BasicType type = T_INT);
++  void load_for_type_by_register(Register dst_reg,      Register tmp_reg, int disp, BasicType type, bool wide);
++  void load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type);
++  int load_for_type(Register dst_reg,      Address addr, BasicType type = T_INT, bool wide = false);
++  int load_for_type(FloatRegister dst_reg, Address addr, BasicType type = T_INT);
++
++#ifndef PRODUCT
++  static void pd_print_patched_instruction(address branch) {
++    jint stub_inst = *(jint*) branch;
++    print_instruction(stub_inst);
++    ::tty->print("%s", " (unresolved)");
++
++  }
++#endif
++
++  //FIXME
++  void empty_FPU_stack(){/*need implemented*/};
++
++#ifdef COMPILER2
++  // Compare strings.
++  void string_compare(Register str1, Register str2,
++                      Register cnt1, Register cnt2, Register result,
++                      int ae);
++
++  // Compare char[] or byte[] arrays.
++  void arrays_equals(Register str1, Register str2,
++                     Register cnt, Register tmp, Register result,
++                     bool is_char);
++#endif
++
++  // method handles (JSR 292)
++  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
++
++  // Conditional move
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                Register        dst,
++                Register        src,
++                CMCompare       cmp = EQ,
++                bool      is_signed = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                Register        dst,
++                Register        src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++
++#undef VIRTUAL
++
++public:
++
++// Memory Data Type
++#define INT_TYPE 0x100
++#define FLOAT_TYPE 0x200
++#define SIGNED_TYPE 0x10
++#define UNSIGNED_TYPE 0x20
++
++  typedef enum {
++    LOAD_BYTE        = INT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_CHAR        = INT_TYPE | SIGNED_TYPE | 0x2,
++    LOAD_SHORT       = INT_TYPE | SIGNED_TYPE | 0x3,
++    LOAD_INT         = INT_TYPE | SIGNED_TYPE | 0x4,
++    LOAD_LONG        = INT_TYPE | SIGNED_TYPE | 0x5,
++    STORE_BYTE       = INT_TYPE | SIGNED_TYPE | 0x6,
++    STORE_CHAR       = INT_TYPE | SIGNED_TYPE | 0x7,
++    STORE_SHORT      = INT_TYPE | SIGNED_TYPE | 0x8,
++    STORE_INT        = INT_TYPE | SIGNED_TYPE | 0x9,
++    STORE_LONG       = INT_TYPE | SIGNED_TYPE | 0xa,
++    LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb,
++
++    LOAD_U_BYTE      = INT_TYPE | UNSIGNED_TYPE | 0x1,
++    LOAD_U_SHORT     = INT_TYPE | UNSIGNED_TYPE | 0x2,
++    LOAD_U_INT       = INT_TYPE | UNSIGNED_TYPE | 0x3,
++
++    LOAD_FLOAT       = FLOAT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_DOUBLE      = FLOAT_TYPE | SIGNED_TYPE | 0x2,
++    STORE_FLOAT      = FLOAT_TYPE | SIGNED_TYPE | 0x3,
++    STORE_DOUBLE     = FLOAT_TYPE | SIGNED_TYPE | 0x4
++  } CMLoadStoreDataType;
++
++  void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) {
++    assert((type & INT_TYPE), "must be General reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++  void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) {
++    assert((type & FLOAT_TYPE), "must be Float reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++private:
++
++  template <typename T>
++  void loadstore_t(T reg, int base, int index, int scale, int disp, int type) {
++    if (index != 0) {
++      if (Assembler::is_simm16(disp)) {
++        if (UseLEXT1 && (type & SIGNED_TYPE) && Assembler::is_simm(disp, 8)) {
++          if (scale == 0) {
++            gs_loadstore(reg, as_Register(base), as_Register(index), disp, type);
++          } else {
++            dsll(AT, as_Register(index), scale);
++            gs_loadstore(reg, as_Register(base), AT, disp, type);
++          }
++        } else {
++          if (scale == 0) {
++            addu(AT, as_Register(base), as_Register(index));
++          } else {
++            dsll(AT, as_Register(index), scale);
++            addu(AT, as_Register(base), AT);
++          }
++          loadstore(reg, AT, disp, type);
++        }
++      } else {
++          if (scale == 0) {
++            addu(AT, as_Register(base), as_Register(index));
++          } else {
++            dsll(AT, as_Register(index), scale);
++            addu(AT, as_Register(base), AT);
++          }
++          move(RT9, disp);
++          if (UseLEXT1 && (type & SIGNED_TYPE)) {
++            gs_loadstore(reg, AT, RT9, 0, type);
++          } else {
++            addu(AT, AT, RT9);
++            loadstore(reg, AT, 0, type);
++          }
++        }
++      } else {
++        if (Assembler::is_simm16(disp)) {
++          loadstore(reg, as_Register(base), disp, type);
++        } else {
++          move(RT9, disp);
++          if (UseLEXT1 && (type & SIGNED_TYPE)) {
++            gs_loadstore(reg, as_Register(base), RT9, 0, type);
++          } else {
++            addu(AT, as_Register(base), RT9);
++            loadstore(reg, AT, 0, type);
++          }
++        }
++    }
++  }
++  void loadstore(Register reg, Register base, int disp, int type);
++  void loadstore(FloatRegister reg, Register base, int disp, int type);
++  void gs_loadstore(Register reg, Register base, Register index, int disp, int type);
++  void gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type);
++};
++
++/**
++ * class SkipIfEqual:
++ *
++ * Instantiating this class will result in assembly code being output that will
++ * jump around any code emitted between the creation of the instance and it's
++ * automatic destruction at the end of a scope block, depending on the value of
++ * the flag passed to the constructor, which will be checked at run-time.
++ */
++class SkipIfEqual {
++private:
++  MacroAssembler* _masm;
++  Label _label;
++
++public:
++  inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value)
++    : _masm(masm) {
++    _masm->li(AT, (address)flag_addr);
++    _masm->lb(AT, AT, 0);
++    if (value) {
++      _masm->bne(AT, R0, _label);
++    } else {
++      _masm->beq(AT, R0, _label);
++    }
++    _masm->delayed()->nop();
++  }
++
++  ~SkipIfEqual();
++};
++
++#ifdef ASSERT
++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
++#endif
++
++
++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp
+--- a/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/methodHandles_mips.cpp b/src/hotspot/cpu/mips/methodHandles_mips.cpp
+--- a/src/hotspot/cpu/mips/methodHandles_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/methodHandles_mips.cpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,576 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "classfile/javaClasses.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "utilities/preserveException.hpp"
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#define STOP(error) block_comment(error); __ stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
++  if (VerifyMethodHandles)
++    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
++                 "MH argument is a Class");
++  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
++}
++
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++  assert(x != 0, "%s should be nonzero", xname);
++  return x;
++}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //ASSERT
++
++#ifdef ASSERT
++void MethodHandles::verify_klass(MacroAssembler* _masm,
++                                 Register obj, SystemDictionary::WKID klass_id,
++                                 const char* error_message) {
++}
++
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
++  Label L;
++  BLOCK_COMMENT("verify_ref_kind {");
++  __ lw(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
++  __ sra(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
++  __ move(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
++  __ andr(temp, temp, AT);
++  __ move(AT, ref_kind);
++  __ beq(temp, AT, L);
++  __ delayed()->nop();
++  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
++    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
++    if (ref_kind == JVM_REF_invokeVirtual ||
++        ref_kind == JVM_REF_invokeSpecial)
++      // could do this for all ref_kinds, but would explode assembly code size
++      trace_method_handle(_masm, buf);
++    __ STOP(buf);
++  }
++  BLOCK_COMMENT("} verify_ref_kind");
++  __ bind(L);
++}
++
++#endif //ASSERT
++
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                            bool for_compiler_entry) {
++  assert(method == Rmethod, "interpreter calling convention");
++
++  Label L_no_such_method;
++  __ beq(method, R0, L_no_such_method);
++  __ delayed()->nop();
++
++  __ verify_method_ptr(method);
++
++  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++    Register rthread = TREG;
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    __ lbu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset()));
++    __ beq(AT, R0, run_compiled_code);
++    __ delayed()->nop();
++    __ ld(T9, method, in_bytes(Method::interpreter_entry_offset()));
++    __ jr(T9);
++    __ delayed()->nop();
++    __ BIND(run_compiled_code);
++  }
++
++  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
++                                                     Method::from_interpreted_offset();
++  __ ld(T9, method, in_bytes(entry_offset));
++  __ jr(T9);
++  __ delayed()->nop();
++
++  __ bind(L_no_such_method);
++  address wrong_method = StubRoutines::throw_AbstractMethodError_entry();
++  __ jmp(wrong_method, relocInfo::runtime_call_type);
++  __ delayed()->nop();
++}
++
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++                                        Register recv, Register method_temp,
++                                        Register temp2,
++                                        bool for_compiler_entry) {
++  BLOCK_COMMENT("jump_to_lambda_form {");
++  // This is the initial entry point of a lazy method handle.
++  // After type checking, it picks up the invoker from the LambdaForm.
++  assert_different_registers(recv, method_temp, temp2);
++  assert(recv != noreg, "required register");
++  assert(method_temp == Rmethod, "required register for loading method");
++
++  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
++
++  // Load the invoker, as MH -> MH.form -> LF.vmentry
++  __ verify_oop(recv);
++  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())));
++  __ verify_oop(method_temp);
++  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg);
++
++  if (VerifyMethodHandles && !for_compiler_entry) {
++    // make sure recv is already on stack
++    __ ld(temp2, Address(method_temp, Method::const_offset()));
++    __ load_sized_value(temp2,
++                        Address(temp2, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    Label L;
++    Address recv_addr = __ argument_address(temp2, -1);
++    __ ld(AT, recv_addr);
++    __ beq(recv, AT, L);
++    __ delayed()->nop();
++
++    recv_addr = __ argument_address(temp2, -1);
++    __ ld(V0, recv_addr);
++    __ STOP("receiver not on stack");
++    __ BIND(L);
++  }
++
++  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
++  BLOCK_COMMENT("} jump_to_lambda_form");
++}
++
++
++// Code generation
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++                                                                vmIntrinsics::ID iid) {
++  const bool not_for_compiler_entry = false;  // this is the interpreter entry
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  if (iid == vmIntrinsics::_invokeGeneric ||
++      iid == vmIntrinsics::_compiledLambdaForm) {
++    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++    // They all allow an appendix argument.
++    __ stop("empty stubs make SG sick");
++    return NULL;
++  }
++
++  // Rmethod: Method*
++  // T9: argument locator (parameter slot count, added to sp)
++  // S7: used as temp to hold mh or receiver
++  Register t9_argp   = T9;   // argument list ptr, live on error paths
++  Register s7_mh     = S7;   // MH receiver; dies quickly and is recycled
++  Register rm_method = Rmethod;   // eventual target of this invocation
++
++  // here's where control starts out:
++  __ align(CodeEntryAlignment);
++  address entry_point = __ pc();
++
++  if (VerifyMethodHandles) {
++    assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++    Label L;
++    BLOCK_COMMENT("verify_intrinsic_id {");
++    __ lhu(AT, rm_method, Method::intrinsic_id_offset_in_bytes());
++    guarantee(Assembler::is_simm16(iid), "Oops, iid is not simm16! Change the instructions.");
++    __ addiu(AT, AT, -1 * (int) iid);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    if (iid == vmIntrinsics::_linkToVirtual ||
++        iid == vmIntrinsics::_linkToSpecial) {
++      // could do this for all kinds, but would explode assembly code size
++      trace_method_handle(_masm, "bad Method*::intrinsic_id");
++    }
++    __ STOP("bad Method*::intrinsic_id");
++    __ bind(L);
++    BLOCK_COMMENT("} verify_intrinsic_id");
++  }
++
++  // First task:  Find out how big the argument list is.
++  Address t9_first_arg_addr;
++  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++    __ ld(t9_argp, Address(rm_method, Method::const_offset()));
++    __ load_sized_value(t9_argp,
++                        Address(t9_argp, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    t9_first_arg_addr = __ argument_address(t9_argp, -1);
++  } else {
++    DEBUG_ONLY(t9_argp = noreg);
++  }
++
++  if (!is_signature_polymorphic_static(iid)) {
++    __ ld(s7_mh, t9_first_arg_addr);
++    DEBUG_ONLY(t9_argp = noreg);
++  }
++
++  // t9_first_arg_addr is live!
++
++  trace_method_handle_interpreter_entry(_masm, iid);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry);
++
++  } else {
++    // Adjust argument list by popping the trailing MemberName argument.
++    Register r_recv = noreg;
++    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++      __ ld(r_recv = T2, t9_first_arg_addr);
++    }
++    DEBUG_ONLY(t9_argp = noreg);
++    Register rm_member = rm_method;  // MemberName ptr; incoming method ptr is dead now
++    __ pop(rm_member);         // extract last argument
++    generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry);
++  }
++
++  return entry_point;
++}
++
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++                                                    vmIntrinsics::ID iid,
++                                                    Register receiver_reg,
++                                                    Register member_reg,
++                                                    bool for_compiler_entry) {
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  Register rm_method = Rmethod;   // eventual target of this invocation
++  // temps used in this code are not used in *either* compiled or interpreted calling sequences
++  Register j_rarg0 = T0;
++  Register j_rarg1 = A0;
++  Register j_rarg2 = A1;
++  Register j_rarg3 = A2;
++  Register j_rarg4 = A3;
++  Register j_rarg5 = A4;
++
++  Register temp1 = T8;
++  Register temp2 = T9;
++  Register temp3 = V0;
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
++    assert_different_registers(temp1,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp2,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp3,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++  }
++  else {
++    assert_different_registers(temp1, temp2, temp3, saved_last_sp_register());  // don't trash lastSP
++  }
++  assert_different_registers(temp1, temp2, temp3, receiver_reg);
++  assert_different_registers(temp1, temp2, temp3, member_reg);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    // indirect through MH.form.vmentry.vmtarget
++    jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry);
++
++  } else {
++    // The method is a member invoker used by direct method handles.
++    if (VerifyMethodHandles) {
++      // make sure the trailing argument really is a MemberName (caller responsibility)
++      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
++                   "MemberName required for invokeVirtual etc.");
++    }
++
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()));
++    Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes()));
++
++    Register temp1_recv_klass = temp1;
++    if (iid != vmIntrinsics::_linkToStatic) {
++      __ verify_oop(receiver_reg);
++      if (iid == vmIntrinsics::_linkToSpecial) {
++        // Don't actually load the klass; just null-check the receiver.
++        __ null_check(receiver_reg);
++      } else {
++        // load receiver klass itself
++        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      BLOCK_COMMENT("check_receiver {");
++      // The receiver for the MemberName must be in receiver_reg.
++      // Check the receiver against the MemberName.clazz
++      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++        // Did not load it above...
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++        Label L_ok;
++        Register temp2_defc = temp2;
++        __ load_heap_oop(temp2_defc, member_clazz, temp3);
++        load_klass_from_Class(_masm, temp2_defc);
++        __ verify_klass_ptr(temp2_defc);
++        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
++        // If we get here, the type check failed!
++        __ STOP("receiver class disagrees with MemberName.clazz");
++        __ bind(L_ok);
++      }
++      BLOCK_COMMENT("} check_receiver");
++    }
++    if (iid == vmIntrinsics::_linkToSpecial ||
++        iid == vmIntrinsics::_linkToStatic) {
++      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
++    }
++
++    // Live registers at this point:
++    //  member_reg - MemberName that was the trailing argument
++    //  temp1_recv_klass - klass of stacked receiver, if needed
++
++    Label L_incompatible_class_change_error;
++    switch (iid) {
++    case vmIntrinsics::_linkToSpecial:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
++      }
++      __ load_heap_oop(rm_method, member_vmtarget);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToStatic:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
++      }
++      __ load_heap_oop(rm_method, member_vmtarget);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToVirtual:
++    {
++      // same as TemplateTable::invokevirtual,
++      // minus the CP setup and profiling:
++
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
++      }
++
++      // pick out the vtable index from the MemberName, and then we can discard it:
++      Register temp2_index = temp2;
++      __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
++      if (VerifyMethodHandles) {
++        Label L_index_ok;
++        __ slt(AT, R0, temp2_index);
++        __ bne(AT, R0, L_index_ok);
++        __ delayed()->nop();
++        __ STOP("no virtual index");
++        __ BIND(L_index_ok);
++      }
++
++      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
++
++      // get target Method* & entry point
++      __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method);
++      break;
++    }
++
++    case vmIntrinsics::_linkToInterface:
++    {
++      // same as TemplateTable::invokeinterface
++      // (minus the CP setup and profiling, with different argument motion)
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
++      }
++
++      Register temp3_intf = temp3;
++      __ load_heap_oop(temp3_intf, member_clazz);
++      load_klass_from_Class(_masm, temp3_intf);
++      __ verify_klass_ptr(temp3_intf);
++
++      Register rm_index = rm_method;
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg);
++      if (VerifyMethodHandles) {
++        Label L;
++        __ slt(AT, rm_index, R0);
++        __ beq(AT, R0, L);
++        __ delayed()->nop();
++        __ STOP("invalid vtable index for MH.invokeInterface");
++        __ bind(L);
++      }
++
++      // given intf, index, and recv klass, dispatch to the implementation method
++      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
++                                 // note: next two args must be the same:
++                                 rm_index, rm_method,
++                                 temp2,
++                                 L_incompatible_class_change_error);
++      break;
++    }
++
++    default:
++      fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
++      break;
++    }
++
++    // Live at this point:
++    //   rm_method
++
++    // After figuring out which concrete method to call, jump into it.
++    // Note that this works in the interpreter with no data motion.
++    // But the compiled version will require that r_recv be shifted out.
++    __ verify_method_ptr(rm_method);
++    jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry);
++
++    if (iid == vmIntrinsics::_linkToInterface) {
++      __ bind(L_incompatible_class_change_error);
++      address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry();
++      __ jmp(icce_entry, relocInfo::runtime_call_type);
++      __ delayed()->nop();
++    }
++  }
++}
++
++#ifndef PRODUCT
++void trace_method_handle_stub(const char* adaptername,
++                              oop mh,
++                              intptr_t* saved_regs,
++                              intptr_t* entry_sp) {
++  // called as a leaf from native code: do not block the JVM!
++  bool has_mh = (strstr(adaptername, "/static") == NULL &&
++                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
++  const char* mh_reg_name = has_mh ? "s7_mh" : "s7";
++  tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT,
++                adaptername, mh_reg_name,
++                p2i(mh), p2i(entry_sp));
++
++  if (Verbose) {
++    tty->print_cr("Registers:");
++    const int saved_regs_count = RegisterImpl::number_of_registers;
++    for (int i = 0; i < saved_regs_count; i++) {
++      Register r = as_Register(i);
++      // The registers are stored in reverse order on the stack (by pusha).
++      tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]);
++      if ((i + 1) % 4 == 0) {
++        tty->cr();
++      } else {
++        tty->print(", ");
++      }
++    }
++    tty->cr();
++
++    {
++     // dumping last frame with frame::describe
++
++      JavaThread* p = JavaThread::active();
++
++      ResourceMark rm;
++      PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
++      FrameValues values;
++
++      // Note: We want to allow trace_method_handle from any call site.
++      // While trace_method_handle creates a frame, it may be entered
++      // without a PC on the stack top (e.g. not just after a call).
++      // Walking that frame could lead to failures due to that invalid PC.
++      // => carefully detect that frame when doing the stack walking
++
++      // Current C frame
++      frame cur_frame = os::current_frame();
++
++      // Robust search of trace_calling_frame (independant of inlining).
++      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
++      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
++      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
++      while (trace_calling_frame.fp() < saved_regs) {
++        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
++      }
++
++      // safely create a frame and call frame::describe
++      intptr_t *dump_sp = trace_calling_frame.sender_sp();
++      intptr_t *dump_fp = trace_calling_frame.link();
++
++      bool walkable = has_mh; // whether the traced frame shoud be walkable
++
++      if (walkable) {
++        // The previous definition of walkable may have to be refined
++        // if new call sites cause the next frame constructor to start
++        // failing. Alternatively, frame constructors could be
++        // modified to support the current or future non walkable
++        // frames (but this is more intrusive and is not considered as
++        // part of this RFE, which will instead use a simpler output).
++        frame dump_frame = frame(dump_sp, dump_fp);
++        dump_frame.describe(values, 1);
++      } else {
++        // Stack may not be walkable (invalid PC above FP):
++        // Add descriptions without building a Java frame to avoid issues
++        values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
++        values.describe(-1, dump_sp, "sp for #1");
++      }
++      values.describe(-1, entry_sp, "raw top of stack");
++
++      tty->print_cr("Stack layout:");
++      values.print(p);
++    }
++    if (has_mh && oopDesc::is_oop(mh)) {
++      mh->print();
++      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
++        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
++          java_lang_invoke_MethodHandle::form(mh)->print();
++      }
++    }
++  }
++}
++
++// The stub wraps the arguments in a struct on the stack to avoid
++// dealing with the different calling conventions for passing 6
++// arguments.
++struct MethodHandleStubArguments {
++  const char* adaptername;
++  oopDesc* mh;
++  intptr_t* saved_regs;
++  intptr_t* entry_sp;
++};
++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {
++  trace_method_handle_stub(args->adaptername,
++                           args->mh,
++                           args->saved_regs,
++                           args->entry_sp);
++}
++
++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
++}
++#endif //PRODUCT
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/methodHandles_mips.hpp b/src/hotspot/cpu/mips/methodHandles_mips.hpp
+--- a/src/hotspot/cpu/mips/methodHandles_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/methodHandles_mips.hpp	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// Platform-specific definitions for method handles.
++// These definitions are inlined into class MethodHandles.
++
++// Adapters
++enum /* platform_dependent_constants */ {
++  adapter_code_size = 32000 DEBUG_ONLY(+ 150000)
++};
++
++// Additional helper methods for MethodHandles code generation:
++public:
++  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
++
++  static void verify_klass(MacroAssembler* _masm,
++                           Register obj, SystemDictionary::WKID klass_id,
++                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
++
++  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
++    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
++                 "reference is a MH");
++  }
++
++  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
++
++  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
++  // Takes care of special dispatch from single stepping too.
++  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                      bool for_compiler_entry);
++
++  static void jump_to_lambda_form(MacroAssembler* _masm,
++                                  Register recv, Register method_temp,
++                                  Register temp2,
++                                  bool for_compiler_entry);
++
++  static Register saved_last_sp_register() {
++    // Should be in sharedRuntime, not here.
++   return I29;
++  }
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/mips_64.ad b/src/hotspot/cpu/mips/mips_64.ad
+--- a/src/hotspot/cpu/mips/mips_64.ad	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/mips_64.ad	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,12243 @@
++//
++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// GodSon3 Architecture Description File
++
++//----------REGISTER DEFINITION BLOCK------------------------------------------
++// This information is used by the matcher and the register allocator to
++// describe individual registers and classes of registers within the target
++// archtecture.
++
++// format:
++// reg_def name (call convention, c-call convention, ideal type, encoding);
++//     call convention :
++//      NS  = No-Save
++//      SOC = Save-On-Call
++//      SOE = Save-On-Entry
++//      AS  = Always-Save
++//    ideal type :
++//      see opto/opcodes.hpp for more info
++// reg_class name (reg, ...);
++// alloc_class name (reg, ...);
++register %{
++
++// General Registers
++// Integer Registers
++  reg_def R0      ( NS,  NS,   Op_RegI,  0, VMRegImpl::Bad());
++  reg_def AT    ( NS,  NS,   Op_RegI,  1, AT->as_VMReg());
++  reg_def AT_H    ( NS,  NS,  Op_RegI,  1, AT->as_VMReg()->next());
++  reg_def V0    (SOC, SOC,  Op_RegI,  2, V0->as_VMReg());
++  reg_def V0_H  (SOC, SOC,  Op_RegI,  2, V0->as_VMReg()->next());
++  reg_def V1    (SOC, SOC,  Op_RegI,  3, V1->as_VMReg());
++  reg_def V1_H  (SOC, SOC,  Op_RegI,  3, V1->as_VMReg()->next());
++  reg_def A0    (SOC, SOC,  Op_RegI,  4, A0->as_VMReg());
++  reg_def A0_H  (SOC, SOC,  Op_RegI,  4, A0->as_VMReg()->next());
++  reg_def A1    (SOC, SOC,  Op_RegI,  5, A1->as_VMReg());
++  reg_def A1_H  (SOC, SOC,  Op_RegI,  5, A1->as_VMReg()->next());
++  reg_def A2    (SOC, SOC,  Op_RegI,  6, A2->as_VMReg());
++  reg_def A2_H  (SOC, SOC,  Op_RegI,  6, A2->as_VMReg()->next());
++  reg_def A3    (SOC, SOC,  Op_RegI,  7, A3->as_VMReg());
++  reg_def A3_H  (SOC, SOC,  Op_RegI,  7, A3->as_VMReg()->next());
++  reg_def A4    (SOC, SOC,  Op_RegI,  8, A4->as_VMReg());
++  reg_def A4_H  (SOC, SOC,  Op_RegI,  8, A4->as_VMReg()->next());
++  reg_def A5    (SOC, SOC,  Op_RegI,  9, A5->as_VMReg());
++  reg_def A5_H  (SOC, SOC,  Op_RegI,  9, A5->as_VMReg()->next());
++  reg_def A6    (SOC, SOC,  Op_RegI,  10, A6->as_VMReg());
++  reg_def A6_H  (SOC, SOC,  Op_RegI,  10, A6->as_VMReg()->next());
++  reg_def A7    (SOC, SOC,  Op_RegI,  11, A7->as_VMReg());
++  reg_def A7_H  (SOC, SOC,  Op_RegI,  11, A7->as_VMReg()->next());
++  reg_def T0    (SOC, SOC,  Op_RegI,  12, T0->as_VMReg());
++  reg_def T0_H  (SOC, SOC,  Op_RegI,  12, T0->as_VMReg()->next());
++  reg_def T1    (SOC, SOC,  Op_RegI,  13, T1->as_VMReg());
++  reg_def T1_H  (SOC, SOC,  Op_RegI,  13, T1->as_VMReg()->next());
++  reg_def T2    (SOC, SOC,  Op_RegI,  14, T2->as_VMReg());
++  reg_def T2_H  (SOC, SOC,  Op_RegI,  14, T2->as_VMReg()->next());
++  reg_def T3    (SOC, SOC,  Op_RegI,  15, T3->as_VMReg());
++  reg_def T3_H  (SOC, SOC,  Op_RegI,  15, T3->as_VMReg()->next());
++  reg_def S0    (SOC, SOE,  Op_RegI,  16, S0->as_VMReg());
++  reg_def S0_H  (SOC, SOE,  Op_RegI,  16, S0->as_VMReg()->next());
++  reg_def S1    (SOC, SOE,  Op_RegI,  17, S1->as_VMReg());
++  reg_def S1_H  (SOC, SOE,  Op_RegI,  17, S1->as_VMReg()->next());
++  reg_def S2    (SOC, SOE,  Op_RegI,  18, S2->as_VMReg());
++  reg_def S2_H  (SOC, SOE,  Op_RegI,  18, S2->as_VMReg()->next());
++  reg_def S3    (SOC, SOE,  Op_RegI,  19, S3->as_VMReg());
++  reg_def S3_H  (SOC, SOE,  Op_RegI,  19, S3->as_VMReg()->next());
++  reg_def S4    (SOC, SOE,  Op_RegI,  20, S4->as_VMReg());
++  reg_def S4_H  (SOC, SOE,  Op_RegI,  20, S4->as_VMReg()->next());
++  reg_def S5    (SOC, SOE,  Op_RegI,  21, S5->as_VMReg());
++  reg_def S5_H  (SOC, SOE,  Op_RegI,  21, S5->as_VMReg()->next());
++  reg_def S6    (SOC, SOE,  Op_RegI,  22, S6->as_VMReg());
++  reg_def S6_H  (SOC, SOE,  Op_RegI,  22, S6->as_VMReg()->next());
++  reg_def S7    (SOC, SOE,  Op_RegI,  23, S7->as_VMReg());
++  reg_def S7_H  (SOC, SOE,  Op_RegI,  23, S7->as_VMReg()->next());
++  reg_def T8    (SOC, SOC,  Op_RegI,  24, T8->as_VMReg());
++  reg_def T8_H  (SOC, SOC,  Op_RegI,  24, T8->as_VMReg()->next());
++  reg_def T9    (SOC, SOC,  Op_RegI,  25, T9->as_VMReg());
++  reg_def T9_H  (SOC, SOC,  Op_RegI,  25, T9->as_VMReg()->next());
++
++// Special Registers
++  reg_def K0    ( NS,  NS,  Op_RegI, 26, K0->as_VMReg());
++  reg_def K1    ( NS,  NS,  Op_RegI, 27, K1->as_VMReg());
++  reg_def GP    ( NS,  NS,  Op_RegI, 28, GP->as_VMReg());
++  reg_def GP_H  ( NS,  NS,  Op_RegI, 28, GP->as_VMReg()->next());
++  reg_def SP    ( NS,  NS,  Op_RegI, 29, SP->as_VMReg());
++  reg_def SP_H  ( NS,  NS,  Op_RegI, 29, SP->as_VMReg()->next());
++  reg_def FP    ( NS,  NS,  Op_RegI, 30, FP->as_VMReg());
++  reg_def FP_H  ( NS,  NS,  Op_RegI, 30, FP->as_VMReg()->next());
++  reg_def RA    ( NS,  NS,  Op_RegI, 31, RA->as_VMReg());
++  reg_def RA_H  ( NS,  NS,  Op_RegI, 31, RA->as_VMReg()->next());
++
++// Floating registers.
++reg_def F0          ( SOC, SOC, Op_RegF, 0, F0->as_VMReg());
++reg_def F0_H        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next());
++reg_def F1          ( SOC, SOC, Op_RegF, 1, F1->as_VMReg());
++reg_def F1_H        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next());
++reg_def F2          ( SOC, SOC, Op_RegF, 2, F2->as_VMReg());
++reg_def F2_H        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next());
++reg_def F3          ( SOC, SOC, Op_RegF, 3, F3->as_VMReg());
++reg_def F3_H        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next());
++reg_def F4          ( SOC, SOC, Op_RegF, 4, F4->as_VMReg());
++reg_def F4_H        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next());
++reg_def F5          ( SOC, SOC, Op_RegF, 5, F5->as_VMReg());
++reg_def F5_H        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next());
++reg_def F6          ( SOC, SOC, Op_RegF, 6, F6->as_VMReg());
++reg_def F6_H        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next());
++reg_def F7          ( SOC, SOC, Op_RegF, 7, F7->as_VMReg());
++reg_def F7_H        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next());
++reg_def F8          ( SOC, SOC, Op_RegF, 8, F8->as_VMReg());
++reg_def F8_H        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next());
++reg_def F9          ( SOC, SOC, Op_RegF, 9, F9->as_VMReg());
++reg_def F9_H        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next());
++reg_def F10         ( SOC, SOC, Op_RegF, 10, F10->as_VMReg());
++reg_def F10_H       ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next());
++reg_def F11         ( SOC, SOC, Op_RegF, 11, F11->as_VMReg());
++reg_def F11_H       ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next());
++reg_def F12         ( SOC, SOC, Op_RegF, 12, F12->as_VMReg());
++reg_def F12_H       ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next());
++reg_def F13         ( SOC, SOC, Op_RegF, 13, F13->as_VMReg());
++reg_def F13_H       ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next());
++reg_def F14         ( SOC, SOC, Op_RegF, 14, F14->as_VMReg());
++reg_def F14_H       ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next());
++reg_def F15         ( SOC, SOC, Op_RegF, 15, F15->as_VMReg());
++reg_def F15_H       ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next());
++reg_def F16         ( SOC, SOC, Op_RegF, 16, F16->as_VMReg());
++reg_def F16_H       ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next());
++reg_def F17         ( SOC, SOC, Op_RegF, 17, F17->as_VMReg());
++reg_def F17_H       ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next());
++reg_def F18         ( SOC, SOC, Op_RegF, 18, F18->as_VMReg());
++reg_def F18_H       ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next());
++reg_def F19         ( SOC, SOC, Op_RegF, 19, F19->as_VMReg());
++reg_def F19_H       ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next());
++reg_def F20         ( SOC, SOC, Op_RegF, 20, F20->as_VMReg());
++reg_def F20_H       ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next());
++reg_def F21         ( SOC, SOC, Op_RegF, 21, F21->as_VMReg());
++reg_def F21_H       ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next());
++reg_def F22         ( SOC, SOC, Op_RegF, 22, F22->as_VMReg());
++reg_def F22_H       ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next());
++reg_def F23         ( SOC, SOC, Op_RegF, 23, F23->as_VMReg());
++reg_def F23_H       ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next());
++reg_def F24         ( SOC, SOC, Op_RegF, 24, F24->as_VMReg());
++reg_def F24_H       ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next());
++reg_def F25         ( SOC, SOC, Op_RegF, 25, F25->as_VMReg());
++reg_def F25_H       ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next());
++reg_def F26         ( SOC, SOC, Op_RegF, 26, F26->as_VMReg());
++reg_def F26_H       ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next());
++reg_def F27         ( SOC, SOC, Op_RegF, 27, F27->as_VMReg());
++reg_def F27_H       ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next());
++reg_def F28         ( SOC, SOC, Op_RegF, 28, F28->as_VMReg());
++reg_def F28_H       ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next());
++reg_def F29         ( SOC, SOC, Op_RegF, 29, F29->as_VMReg());
++reg_def F29_H       ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next());
++reg_def F30         ( SOC, SOC, Op_RegF, 30, F30->as_VMReg());
++reg_def F30_H       ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next());
++reg_def F31         ( SOC, SOC, Op_RegF, 31, F31->as_VMReg());
++reg_def F31_H       ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next());
++
++
++// ----------------------------
++// Special Registers
++//S6 is used for get_thread(S6)
++//S5 is uesd for heapbase of compressed oop
++alloc_class chunk0(
++                     S7, S7_H,
++                     S0, S0_H,
++                     S1, S1_H,
++                     S2, S2_H,
++                     S4, S4_H,
++                     S5, S5_H,
++                     S6, S6_H,
++                     S3, S3_H,
++                     T2, T2_H,
++                     T3, T3_H,
++                     T8, T8_H,
++                     T9, T9_H,
++                     T1, T1_H, // inline_cache_reg
++                     V1, V1_H,
++                     A7, A7_H,
++                     A6, A6_H,
++                     A5, A5_H,
++                     A4, A4_H,
++                     V0, V0_H,
++                     A3, A3_H,
++                     A2, A2_H,
++                     A1, A1_H,
++                     A0, A0_H,
++                     T0, T0_H,
++                     GP, GP_H
++                     RA, RA_H,
++                     SP, SP_H, // stack_pointer
++                     FP, FP_H  // frame_pointer
++                 );
++
++alloc_class chunk1(  F0, F0_H,
++                     F1, F1_H,
++                     F2, F2_H,
++                     F3, F3_H,
++                     F4, F4_H,
++                     F5, F5_H,
++                     F6, F6_H,
++                     F7, F7_H,
++                     F8, F8_H,
++                     F9, F9_H,
++                     F10, F10_H,
++                     F11, F11_H,
++                     F20, F20_H,
++                     F21, F21_H,
++                     F22, F22_H,
++                     F23, F23_H,
++                     F24, F24_H,
++                     F25, F25_H,
++                     F26, F26_H,
++                     F27, F27_H,
++                     F28, F28_H,
++                     F19, F19_H,
++                     F18, F18_H,
++                     F17, F17_H,
++                     F16, F16_H,
++                     F15, F15_H,
++                     F14, F14_H,
++                     F13, F13_H,
++                     F12, F12_H,
++                     F29, F29_H,
++                     F30, F30_H,
++                     F31, F31_H);
++
++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 );
++reg_class s0_reg( S0 );
++reg_class s1_reg( S1 );
++reg_class s2_reg( S2 );
++reg_class s3_reg( S3 );
++reg_class s4_reg( S4 );
++reg_class s5_reg( S5 );
++reg_class s6_reg( S6 );
++reg_class s7_reg( S7 );
++
++reg_class t_reg( T0, T1, T2, T3, T8, T9 );
++reg_class t0_reg( T0 );
++reg_class t1_reg( T1 );
++reg_class t2_reg( T2 );
++reg_class t3_reg( T3 );
++reg_class t8_reg( T8 );
++reg_class t9_reg( T9 );
++
++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 );
++reg_class a0_reg( A0 );
++reg_class a1_reg( A1 );
++reg_class a2_reg( A2 );
++reg_class a3_reg( A3 );
++reg_class a4_reg( A4 );
++reg_class a5_reg( A5 );
++reg_class a6_reg( A6 );
++reg_class a7_reg( A7 );
++
++reg_class v0_reg( V0 );
++reg_class v1_reg( V1 );
++
++reg_class sp_reg( SP, SP_H );
++reg_class fp_reg( FP, FP_H );
++
++reg_class v0_long_reg( V0, V0_H );
++reg_class v1_long_reg( V1, V1_H );
++reg_class a0_long_reg( A0, A0_H );
++reg_class a1_long_reg( A1, A1_H );
++reg_class a2_long_reg( A2, A2_H );
++reg_class a3_long_reg( A3, A3_H );
++reg_class a4_long_reg( A4, A4_H );
++reg_class a5_long_reg( A5, A5_H );
++reg_class a6_long_reg( A6, A6_H );
++reg_class a7_long_reg( A7, A7_H );
++reg_class t0_long_reg( T0, T0_H );
++reg_class t1_long_reg( T1, T1_H );
++reg_class t2_long_reg( T2, T2_H );
++reg_class t3_long_reg( T3, T3_H );
++reg_class t8_long_reg( T8, T8_H );
++reg_class t9_long_reg( T9, T9_H );
++reg_class s0_long_reg( S0, S0_H );
++reg_class s1_long_reg( S1, S1_H );
++reg_class s2_long_reg( S2, S2_H );
++reg_class s3_long_reg( S3, S3_H );
++reg_class s4_long_reg( S4, S4_H );
++reg_class s5_long_reg( S5, S5_H );
++reg_class s6_long_reg( S6, S6_H );
++reg_class s7_long_reg( S7, S7_H );
++
++reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, A7, A6, A5, A4, V0, A3, A2, A1, A0, T0 );
++
++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, V0, T0 );
++
++reg_class p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T8, T8_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A7, A7_H,
++                 A6, A6_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H
++               );
++
++reg_class no_T8_p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A7, A7_H,
++                 A6, A6_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H
++               );
++
++reg_class long_reg(
++                    S7, S7_H,
++                    S0, S0_H,
++                    S1, S1_H,
++                    S2, S2_H,
++                    S4, S4_H,
++                    S3, S3_H,
++                    T8, T8_H,
++                    T2, T2_H,
++                    T3, T3_H,
++                    T1, T1_H,
++                    A7, A7_H,
++                    A6, A6_H,
++                    A5, A5_H,
++                    A4, A4_H,
++                    A3, A3_H,
++                    A2, A2_H,
++                    A1, A1_H,
++                    A0, A0_H,
++                    T0, T0_H
++                  );
++
++
++// Floating point registers.
++// F31 are not used as temporary registers in D2I
++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F31);
++reg_class dbl_reg( F0, F0_H,
++                   F1, F1_H,
++                   F2, F2_H,
++                   F3, F3_H,
++                   F4, F4_H,
++                   F5, F5_H,
++                   F6, F6_H,
++                   F7, F7_H,
++                   F8, F8_H,
++                   F9, F9_H,
++                   F10, F10_H,
++                   F11, F11_H,
++                   F12, F12_H,
++                   F13, F13_H,
++                   F14, F14_H,
++                   F15, F15_H,
++                   F16, F16_H,
++                   F17, F17_H,
++                   F18, F18_H,
++                   F19, F19_H,
++                   F20, F20_H,
++                   F21, F21_H,
++                   F22, F22_H,
++                   F23, F23_H,
++                   F24, F24_H,
++                   F25, F25_H,
++                   F26, F26_H,
++                   F27, F27_H,
++                   F28, F28_H,
++                   F29, F29_H,
++                   F31, F31_H);
++
++reg_class flt_arg0( F12 );
++reg_class dbl_arg0( F12, F12_H );
++reg_class dbl_arg1( F14, F14_H );
++
++%}
++
++//----------DEFINITION BLOCK---------------------------------------------------
++// Define name --> value mappings to inform the ADLC of an integer valued name
++// Current support includes integer values in the range [0, 0x7FFFFFFF]
++// Format:
++//        int_def  <name>         ( <int_value>, <expression>);
++// Generated Code in ad_<arch>.hpp
++//        #define  <name>   (<expression>)
++//        // value == <int_value>
++// Generated code in ad_<arch>.cpp adlc_verification()
++//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
++//
++definitions %{
++  int_def DEFAULT_COST      (    100,     100);
++  int_def HUGE_COST         (1000000, 1000000);
++
++  // Memory refs are twice as expensive as run-of-the-mill.
++  int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);
++
++  // Branches are even more expensive.
++  int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
++  // we use jr instruction to construct call, so more expensive
++  int_def CALL_COST         (    500, DEFAULT_COST * 5);
++/*
++        int_def EQUAL             (   1, 1  );
++        int_def NOT_EQUAL         (   2, 2  );
++        int_def GREATER           (   3, 3  );
++        int_def GREATER_EQUAL     (   4, 4  );
++        int_def LESS              (   5, 5  );
++        int_def LESS_EQUAL        (   6, 6  );
++*/
++%}
++
++
++
++//----------SOURCE BLOCK-------------------------------------------------------
++// This is a block of C++ code which provides values, functions, and
++// definitions necessary in the rest of the architecture description
++
++source_hpp %{
++// Header information of the source block.
++// Method declarations/definitions which are used outside
++// the ad-scope can conveniently be defined here.
++//
++// To keep related declarations/definitions/uses close together,
++// we switch between source %{ }% and source_hpp %{ }% freely as needed.
++
++class CallStubImpl {
++
++  //--------------------------------------------------------------
++  //---<  Used for optimization in Compile::shorten_branches  >---
++  //--------------------------------------------------------------
++
++ public:
++  // Size of call trampoline stub.
++  static uint size_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++
++  // number of relocations needed by a call trampoline stub
++  static uint reloc_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++};
++
++class HandlerImpl {
++
++ public:
++
++  static int emit_exception_handler(CodeBuffer &cbuf);
++  static int emit_deopt_handler(CodeBuffer& cbuf);
++
++  static uint size_exception_handler() {
++    // NativeCall instruction size is the same as NativeJump.
++    // exception handler starts out as jump and can be patched to
++    // a call be deoptimization.  (4932387)
++    // Note that this value is also credited (in output.cpp) to
++    // the size of the code section.
++    int size = NativeCall::instruction_size;
++    const uintx m = 16 - 1;
++    return mask_bits(size + m, ~m);
++    //return round_to(size, 16);
++  }
++
++  static uint size_deopt_handler() {
++    int size = NativeCall::instruction_size;
++    const uintx m = 16 - 1;
++    return mask_bits(size + m, ~m);
++    //return round_to(size, 16);
++  }
++};
++
++%} // end source_hpp
++
++source %{
++
++#define   NO_INDEX    0
++#define   RELOC_IMM64    Assembler::imm_operand
++#define   RELOC_DISP32   Assembler::disp32_operand
++
++
++#define __ _masm.
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++
++// Emit exception handler code.
++// Stuff framesize into a register and call a VM stub routine.
++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_exception_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_exception_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point());
++  __ align(16);
++  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++// Emit deopt handler code.
++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_deopt_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_deopt_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_call(SharedRuntime::deopt_blob()->unpack());
++  __ align(16);
++  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++
++const bool Matcher::match_rule_supported(int opcode) {
++  if (!has_match_rule(opcode))
++    return false;
++
++  switch (opcode) {
++    //Op_CountLeadingZerosI Op_CountLeadingZerosL can be deleted, all MIPS CPUs support clz & dclz.
++    case Op_CountLeadingZerosI:
++    case Op_CountLeadingZerosL:
++      if (!UseCountLeadingZerosInstructionMIPS64)
++        return false;
++      break;
++    case Op_CountTrailingZerosI:
++    case Op_CountTrailingZerosL:
++      if (!UseCountTrailingZerosInstructionMIPS64)
++        return false;
++      break;
++  }
++
++  return true;  // Per default match rules are supported.
++}
++
++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
++  // TODO
++  // identify extra cases that we might want to provide match rules for
++  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
++  bool ret_value = match_rule_supported(opcode);
++  // Add rules here.
++
++  return ret_value;  // Per default match rules are supported.
++}
++
++const bool Matcher::has_predicated_vectors(void) {
++  return false;
++}
++
++const int Matcher::float_pressure(int default_pressure_threshold) {
++  Unimplemented();
++  return default_pressure_threshold;
++}
++
++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
++  int offs = offset - br_size + 4;
++  // To be conservative on MIPS
++  // branch node should be end with:
++  //   branch inst
++  //   delay slot
++  const int safety_zone = 3 * BytesPerInstWord;
++  return Assembler::is_simm16((offs<0 ? offs-safety_zone : offs+safety_zone) >> 2);
++}
++
++
++// No additional cost for CMOVL.
++const int Matcher::long_cmove_cost() { return 0; }
++
++// No CMOVF/CMOVD with SSE2
++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
++
++// Does the CPU require late expand (see block.cpp for description of late expand)?
++const bool Matcher::require_postalloc_expand = false;
++
++// Do we need to mask the count passed to shift instructions or does
++// the cpu only look at the lower 5/6 bits anyway?
++const bool Matcher::need_masked_shift_count = false;
++
++bool Matcher::narrow_oop_use_complex_address() {
++  assert(UseCompressedOops, "only for compressed oops code");
++  return false;
++}
++
++bool Matcher::narrow_klass_use_complex_address() {
++  assert(UseCompressedClassPointers, "only for compressed klass code");
++  return false;
++}
++
++bool Matcher::const_oop_prefer_decode() {
++  // Prefer ConN+DecodeN over ConP.
++  return true;
++}
++
++bool Matcher::const_klass_prefer_decode() {
++  // TODO: Either support matching DecodeNKlass (heap-based) in operand
++  //       or condisider the following:
++  // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
++  //return Universe::narrow_klass_base() == NULL;
++  return true;
++}
++
++// This is UltraSparc specific, true just means we have fast l2f conversion
++const bool Matcher::convL2FSupported(void) {
++  return true;
++}
++
++// Max vector size in bytes. 0 if not supported.
++const int Matcher::vector_width_in_bytes(BasicType bt) {
++  if (MaxVectorSize == 0)
++    return 0;
++  assert(MaxVectorSize == 8, "");
++  return 8;
++}
++
++// Vector ideal reg
++const uint Matcher::vector_ideal_reg(int size) {
++  assert(MaxVectorSize == 8, "");
++  switch(size) {
++    case  8: return Op_VecD;
++  }
++  ShouldNotReachHere();
++  return 0;
++}
++
++// Only lowest bits of xmm reg are used for vector shift count.
++const uint Matcher::vector_shift_count_ideal_reg(int size) {
++  fatal("vector shift is not supported");
++  return Node::NotAMachineReg;
++}
++
++
++const bool Matcher::convi2l_type_required = true;
++
++// Should the Matcher clone shifts on addressing modes, expecting them
++// to be subsumed into complex addressing expressions or compute them
++// into registers?
++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
++  return clone_base_plus_offset_address(m, mstack, address_visited);
++}
++
++void Compile::reshape_address(AddPNode* addp) {
++}
++
++// Limits on vector size (number of elements) loaded into vector.
++const int Matcher::max_vector_size(const BasicType bt) {
++  assert(is_java_primitive(bt), "only primitive type vectors");
++  return vector_width_in_bytes(bt)/type2aelembytes(bt);
++}
++
++const int Matcher::min_vector_size(const BasicType bt) {
++  return max_vector_size(bt); // Same as max.
++}
++
++// MIPS supports misaligned vectors store/load? FIXME
++const bool Matcher::misaligned_vectors_ok() {
++  return false;
++  //return !AlignVector; // can be changed by flag
++}
++
++// Register for DIVI projection of divmodI
++RegMask Matcher::divI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for MODI projection of divmodI
++RegMask Matcher::modI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for DIVL projection of divmodL
++RegMask Matcher::divL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++int Matcher::regnum_to_fpu_offset(int regnum) {
++  return regnum - 32; // The FP registers are in the second chunk
++}
++
++
++const bool Matcher::isSimpleConstant64(jlong value) {
++  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
++  return true;
++}
++
++
++// Return whether or not this register is ever used as an argument.  This
++// function is used on startup to build the trampoline stubs in generateOptoStub.
++// Registers not mentioned will be killed by the VM call in the trampoline, and
++// arguments in those registers not be available to the callee.
++bool Matcher::can_be_java_arg( int reg ) {
++  // Refer to: [sharedRuntime_mips_64.cpp] SharedRuntime::java_calling_convention()
++  if (    reg == T0_num || reg == T0_H_num
++       || reg == A0_num || reg == A0_H_num
++       || reg == A1_num || reg == A1_H_num
++       || reg == A2_num || reg == A2_H_num
++       || reg == A3_num || reg == A3_H_num
++       || reg == A4_num || reg == A4_H_num
++       || reg == A5_num || reg == A5_H_num
++       || reg == A6_num || reg == A6_H_num
++       || reg == A7_num || reg == A7_H_num )
++    return true;
++
++  if (    reg == F12_num || reg == F12_H_num
++       || reg == F13_num || reg == F13_H_num
++       || reg == F14_num || reg == F14_H_num
++       || reg == F15_num || reg == F15_H_num
++       || reg == F16_num || reg == F16_H_num
++       || reg == F17_num || reg == F17_H_num
++       || reg == F18_num || reg == F18_H_num
++       || reg == F19_num || reg == F19_H_num )
++    return true;
++
++  return false;
++}
++
++bool Matcher::is_spillable_arg( int reg ) {
++  return can_be_java_arg(reg);
++}
++
++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
++  return false;
++}
++
++// Register for MODL projection of divmodL
++RegMask Matcher::modL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++const RegMask Matcher::method_handle_invoke_SP_save_mask() {
++  return FP_REG_mask();
++}
++
++// MIPS doesn't support AES intrinsics
++const bool Matcher::pass_original_key_for_aes() {
++  return false;
++}
++
++int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallLeafDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallRuntimeDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++// If CPU can load and store mis-aligned doubles directly then no fixup is
++// needed.  Else we split the double into 2 integer pieces and move it
++// piece-by-piece.  Only happens when passing doubles into C code as the
++// Java calling convention forces doubles to be aligned.
++const bool Matcher::misaligned_doubles_ok = false;
++// Do floats take an entire double register or just half?
++//const bool Matcher::float_in_double = true;
++bool Matcher::float_in_double() { return false; }
++// Do ints take an entire long register or just half?
++const bool Matcher::int_in_long = true;
++// Is it better to copy float constants, or load them directly from memory?
++// Intel can load a float constant from a direct address, requiring no
++// extra registers.  Most RISCs will have to materialize an address into a
++// register first, so they would do better to copy the constant from stack.
++const bool Matcher::rematerialize_float_constants = false;
++// Advertise here if the CPU requires explicit rounding operations
++// to implement the UseStrictFP mode.
++const bool Matcher::strict_fp_requires_explicit_rounding = false;
++// false => size gets scaled to BytesPerLong, ok.
++const bool Matcher::init_array_count_is_in_bytes = false;
++
++// Indicate if the safepoint node needs the polling page as an input.
++// it does if the polling page is more than disp32 away.
++bool SafePointNode::needs_polling_address_input() {
++  return SafepointMechanism::uses_thread_local_poll();
++}
++
++#ifndef PRODUCT
++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("BRK");
++}
++#endif
++
++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
++  MacroAssembler _masm(&cbuf);
++  __ brk(5);
++}
++
++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++// !!!!! Special hack to get all type of calls to specify the byte offset
++//       from the start of the call to the point where the return address
++//       will point.
++int MachCallStaticJavaNode::ret_addr_offset() {
++  //lui
++  //ori
++  //nop
++  //nop
++  //jalr
++  //nop
++  return 24;
++}
++
++int MachCallDynamicJavaNode::ret_addr_offset() {
++  //lui IC_Klass,
++  //ori IC_Klass,
++  //dsll IC_Klass
++  //ori IC_Klass
++
++  //lui T9
++  //ori T9
++  //nop
++  //nop
++  //jalr T9
++  //nop
++  return 4 * 4 + 4 * 6;
++}
++
++//=============================================================================
++
++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
++enum RC { rc_bad, rc_int, rc_float, rc_stack };
++static enum RC rc_class( OptoReg::Name reg ) {
++  if( !OptoReg::is_valid(reg)  ) return rc_bad;
++  if (OptoReg::is_stack(reg)) return rc_stack;
++  VMReg r = OptoReg::as_VMReg(reg);
++  if (r->is_Register()) return rc_int;
++  assert(r->is_FloatRegister(), "must be");
++  return rc_float;
++}
++
++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
++  // Get registers to move
++  OptoReg::Name src_second = ra_->get_reg_second(in(1));
++  OptoReg::Name src_first = ra_->get_reg_first(in(1));
++  OptoReg::Name dst_second = ra_->get_reg_second(this );
++  OptoReg::Name dst_first = ra_->get_reg_first(this );
++
++  enum RC src_second_rc = rc_class(src_second);
++  enum RC src_first_rc = rc_class(src_first);
++  enum RC dst_second_rc = rc_class(dst_second);
++  enum RC dst_first_rc = rc_class(dst_first);
++
++  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
++
++  // Generate spill code!
++
++  if( src_first == dst_first && src_second == dst_second )
++    return 0;            // Self copy, no move
++
++  if (src_first_rc == rc_stack) {
++    // mem ->
++    if (dst_first_rc == rc_stack) {
++      // mem -> mem
++      assert(src_second != dst_first, "overlap");
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld(AT, Address(SP, src_offset));
++          __ sd(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("ld    AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t"
++                    "sd    AT, [SP + #%d]",
++                    src_offset, dst_offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        // No pushl/popl, so:
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ lw(AT, Address(SP, src_offset));
++          __ sw(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("lw    AT, [SP + #%d] spill 2\n\t"
++                    "sw    AT, [SP + #%d]\n\t",
++                    src_offset, dst_offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // mem -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("ld    %s, [SP + #%d]\t# spill 3",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++            __ lw(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++          else
++            __ lwu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++          } else {
++            st->print("\n\t");
++            if (this->ideal_reg() == Op_RegI)
++              st->print("lw    %s, [SP + #%d]\t# spill 4",
++                        Matcher::regName[dst_first],
++                        offset);
++            else
++              st->print("lwu    %s, [SP + #%d]\t# spill 5",
++                        Matcher::regName[dst_first],
++                        offset);
++#endif
++          }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // mem-> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ldc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("ldc1  %s, [SP + #%d]\t# spill 6",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ lwc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("lwc1   %s, [SP + #%d]\t# spill 7",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      }
++      return 0;
++    }
++  } else if (src_first_rc == rc_int) {
++    // gpr ->
++    if (dst_first_rc == rc_stack) {
++      // gpr -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ sd(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("sd    %s, [SP + #%d] # spill 8",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ sw(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("sw    %s, [SP + #%d]\t# spill 9",
++                    Matcher::regName[src_first], offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // gpr -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ move(as_Register(Matcher::_regEncode[dst_first]),
++                  as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("move(64bit)    %s <-- %s\t# spill 10",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++              __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++          else
++              __ daddu(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0);
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("move(32-bit)    %s <-- %s\t# spill 11",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      }
++    } else if (dst_first_rc == rc_float) {
++      // gpr -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ dmtc1(as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("dmtc1   %s, %s\t# spill 12",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mtc1( as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]) );
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mtc1   %s, %s\t# spill 13",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  } else if (src_first_rc == rc_float) {
++    // xmm ->
++    if (dst_first_rc == rc_stack) {
++      // xmm -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ sdc1( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) );
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("sdc1   %s, [SP + #%d]\t# spill 14",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ swc1(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("swc1   %s, [SP + #%d]\t# spill 15",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // xmm -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ dmfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("dmfc1   %s, %s\t# spill 16",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mfc1   %s, %s\t# spill 17",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // xmm -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mov_d  %s <-- %s\t# spill 18",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mov_s  %s <-- %s\t# spill 19",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  }
++
++  assert(0," foo ");
++  Unimplemented();
++  return 0;
++}
++
++#ifndef PRODUCT
++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  implementation( NULL, ra_, false, st );
++}
++#endif
++
++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  implementation( &cbuf, ra_, false, NULL );
++}
++
++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++//=============================================================================
++#ifndef PRODUCT
++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile *C = ra_->C;
++  int framesize = C->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  st->print_cr("daddiu   SP, SP, %d # Rlease stack @ MachEpilogNode", framesize);
++  st->print("\t");
++  if (UseLEXT1) {
++    st->print_cr("gslq  RA, FP, SP, %d # Restore FP & RA @ MachEpilogNode", -wordSize*2);
++  } else {
++    st->print_cr("ld    RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize);
++    st->print("\t");
++    st->print_cr("ld    FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2);
++  }
++
++  if( do_polling() && C->is_method_compilation() ) {
++    st->print("\t");
++    if (SafepointMechanism::uses_thread_local_poll()) {
++      st->print_cr("ld    AT, poll_offset[thread] #polling_page_address\n\t"
++                   "lw    AT, [AT]\t"
++                   "# Safepoint: poll for GC");
++    } else {
++      st->print_cr("Poll Safepoint # MachEpilogNode");
++    }
++  }
++}
++#endif
++
++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile *C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++  int framesize = C->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++  assert(Assembler::is_simm16(framesize), "daddiu uses a signed 16-bit int");
++
++  if (UseLEXT1) {
++    __ gslq(RA, FP, SP, framesize - wordSize * 2);
++  } else {
++    __ ld(RA, SP, framesize - wordSize );
++    __ ld(FP, SP, framesize - wordSize * 2);
++  }
++  __ daddiu(SP, SP, framesize);
++
++  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
++    __ reserved_stack_check();
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if( do_polling() && C->is_method_compilation() ) {
++    if (SafepointMechanism::uses_thread_local_poll()) {
++      __ ld(AT, thread, in_bytes(Thread::polling_page_offset()));
++      __ relocate(relocInfo::poll_return_type);
++      __ lw(AT, AT, 0);
++    } else {
++      __ set64(AT, (long)os::get_polling_page());
++      __ relocate(relocInfo::poll_return_type);
++      __ lw(AT, AT, 0);
++    }
++  }
++}
++
++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way  fujie debug
++}
++
++int MachEpilogNode::reloc() const {
++  return 0; // a large enough number
++}
++
++const Pipeline * MachEpilogNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++int MachEpilogNode::safepoint_offset() const { return 0; }
++
++//=============================================================================
++
++#ifndef PRODUCT
++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_reg_first(this);
++  st->print("ADDI %s, SP, %d   @BoxLockNode",Matcher::regName[reg],offset);
++}
++#endif
++
++
++uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
++  return 4;
++}
++
++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_encode(this);
++
++  __ addiu(as_Register(reg), SP, offset);
++}
++
++
++//static int sizeof_FFree_Float_Stack_All = -1;
++
++int MachCallRuntimeNode::ret_addr_offset() {
++  //lui
++  //ori
++  //dsll
++  //ori
++  //jalr
++  //nop
++  assert(NativeCall::instruction_size == 24, "in MachCallRuntimeNode::ret_addr_offset()");
++  return NativeCall::instruction_size;
++}
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
++}
++#endif
++
++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
++  MacroAssembler _masm(&cbuf);
++  int i = 0;
++  for(i = 0; i < _count; i++)
++     __ nop();
++}
++
++uint MachNopNode::size(PhaseRegAlloc *) const {
++  return 4 * _count;
++}
++const Pipeline* MachNopNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++//=============================================================================
++
++//=============================================================================
++#ifndef PRODUCT
++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  st->print_cr("load_klass(T9, T0)");
++  st->print_cr("\tbeq(T9, iCache, L)");
++  st->print_cr("\tnop");
++  st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)");
++  st->print_cr("\tnop");
++  st->print_cr("\tnop");
++  st->print_cr("    L:");
++}
++#endif
++
++
++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  int  ic_reg = Matcher::inline_cache_reg_encode();
++  Label L;
++  Register receiver = T0;
++  Register   iCache = as_Register(ic_reg);
++
++  __ load_klass(T9, receiver);
++  __ beq(T9, iCache, L);
++  __ delayed()->nop();
++  __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  __ bind(L);
++}
++
++uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++//=============================================================================
++
++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask();
++
++int Compile::ConstantTable::calculate_table_base_offset() const {
++  return 0;  // absolute addressing, no offset
++}
++
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
++  ShouldNotReachHere();
++}
++
++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++  Compile* C = ra_->C;
++  Compile::ConstantTable& constant_table = C->constant_table();
++  MacroAssembler _masm(&cbuf);
++
++  Register Rtoc = as_Register(ra_->get_encode(this));
++  CodeSection* consts_section = __ code()->consts();
++  int consts_size = consts_section->align_at_start(consts_section->size());
++  assert(constant_table.size() == consts_size, "must be equal");
++
++  if (consts_section->size()) {
++    // Materialize the constant table base.
++    address baseaddr = consts_section->start() + -(constant_table.table_base_offset());
++    // RelocationHolder rspec = internal_word_Relocation::spec(baseaddr);
++    __ relocate(relocInfo::internal_word_type);
++    __ patchable_set48(Rtoc, (long)baseaddr);
++  }
++}
++
++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
++  // patchable_set48 (4 insts)
++  return 4 * 4;
++}
++
++#ifndef PRODUCT
++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++  Register r = as_Register(ra_->get_encode(this));
++  st->print("patchable_set48    %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name());
++}
++#endif
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile* C = ra_->C;
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  // Calls to C2R adapters often do not accept exceptional returns.
++  // We require that their callers must bang for them.  But be careful, because
++  // some VM calls (such as call site linkage) can use several kilobytes of
++  // stack.  But the stack safety zone should account for that.
++  // See bugs 4446381, 4468289, 4497237.
++  if (C->need_stack_bang(bangsize)) {
++    st->print_cr("# stack bang"); st->print("\t");
++  }
++  if (UseLEXT1) {
++    st->print("gssq     RA, FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
++  } else {
++    st->print("sd       RA, %d(SP)  @ MachPrologNode\n\t", -wordSize);
++    st->print("sd       FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
++  }
++  st->print("daddiu   FP, SP, -%d \n\t", wordSize*2);
++  st->print("daddiu   SP, SP, -%d \t",framesize);
++}
++#endif
++
++
++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile* C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++  assert(Assembler::is_simm16(-framesize), "daddiu uses a signed 16-bit int");
++
++  // Make enough room for patch_verified_entry
++  __ nop();
++  __ nop();
++
++  if (C->need_stack_bang(bangsize)) {
++    __ generate_stack_overflow_check(bangsize);
++  }
++
++  __ daddiu(SP, SP, -framesize);
++  if (UseLEXT1) {
++    __ gssq(RA, FP, SP, framesize - wordSize * 2);
++  } else {
++    __ sd(RA, SP, framesize - wordSize);
++    __ sd(FP, SP, framesize - wordSize * 2);
++  }
++  __ daddiu(FP, SP, framesize - wordSize * 2);
++
++  C->set_frame_complete(cbuf.insts_size());
++  if (C->has_mach_constant_base_node()) {
++    // NOTE: We set the table base offset here because users might be
++    // emitted before MachConstantBaseNode.
++    Compile::ConstantTable& constant_table = C->constant_table();
++    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
++  }
++}
++
++
++uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way
++}
++
++int MachPrologNode::reloc() const {
++  return 0; // a large enough number
++}
++
++%}
++
++//----------ENCODING BLOCK-----------------------------------------------------
++// This block specifies the encoding classes used by the compiler to output
++// byte streams.  Encoding classes generate functions which are called by
++// Machine Instruction Nodes in order to generate the bit encoding of the
++// instruction.  Operands specify their base encoding interface with the
++// interface keyword.  There are currently supported four interfaces,
++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
++// operand to generate a function which returns its register number when
++// queried.   CONST_INTER causes an operand to generate a function which
++// returns the value of the constant when queried.  MEMORY_INTER causes an
++// operand to generate four functions which return the Base Register, the
++// Index Register, the Scale Value, and the Offset Value of the operand when
++// queried.  COND_INTER causes an operand to generate six functions which
++// return the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional instruction.
++// Instructions specify two basic values for encoding.  They use the
++// ins_encode keyword to specify their encoding class (which must be one of
++// the class names specified in the encoding block), and they use the
++// opcode keyword to specify, in order, their primary, secondary, and
++// tertiary opcode.  Only the opcode sections which a particular instruction
++// needs for encoding need to be specified.
++encode %{
++
++  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
++    MacroAssembler _masm(&cbuf);
++    // This is the instruction starting address for relocation info.
++    __ block_comment("Java_To_Runtime");
++    cbuf.set_insts_mark();
++    __ relocate(relocInfo::runtime_call_type);
++    __ patchable_call((address)$meth$$method);
++  %}
++
++  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
++    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
++    // who we intended to call.
++    MacroAssembler _masm(&cbuf);
++    address addr = (address)$meth$$method;
++    address call;
++    __ block_comment("Java_Static_Call");
++
++    if ( !_method ) {
++      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
++      call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf);
++    } else {
++      int method_index = resolved_method_index(cbuf);
++      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
++                                     : static_call_Relocation::spec(method_index);
++      call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf);
++
++      // Emit stub for static call
++      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
++      if (stub == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    }
++    if (call == NULL) {
++      ciEnv::current()->record_failure("CodeCache is full");
++      return;
++    }
++  %}
++
++
++  //
++  // [Ref: LIR_Assembler::ic_call() ]
++  //
++  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
++    MacroAssembler _masm(&cbuf);
++    __ block_comment("Java_Dynamic_Call");
++    __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
++  %}
++
++
++  enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{
++    Register result = $result$$Register;
++    Register sub    = $sub$$Register;
++    Register super  = $super$$Register;
++    Register length = $tmp$$Register;
++    Register tmp    = T9;
++    Label miss;
++
++    // result may be the same as sub
++    //    47c   B40: #    B21 B41 <- B20  Freq: 0.155379
++    //    47c     partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0
++    //    4bc     mov   S2, NULL #@loadConP
++    //    4c0     beq   S1, S2, B21 #@branchConP  P=0.999999 C=-1.000000
++    //
++    MacroAssembler _masm(&cbuf);
++    Label done;
++    __ check_klass_subtype_slow_path(sub, super, length, tmp,
++        NULL, &miss,
++        /*set_cond_codes:*/ true);
++    // Refer to X86_64's RDI
++    __ move(result, 0);
++    __ b(done);
++    __ delayed()->nop();
++
++    __ bind(miss);
++    __ move(result, 1);
++    __ bind(done);
++  %}
++
++%}
++
++
++//---------MIPS FRAME--------------------------------------------------------------
++// Definition of frame structure and management information.
++//
++//  S T A C K   L A Y O U T    Allocators stack-slot number
++//                             |   (to get allocators register number
++//  G  Owned by    |        |  v    add SharedInfo::stack0)
++//  r   CALLER     |        |
++//  o     |        +--------+      pad to even-align allocators stack-slot
++//  w     V        |  pad0  |        numbers; owned by CALLER
++//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
++//  h     ^        |   in   |  5
++//        |        |  args  |  4   Holes in incoming args owned by SELF
++//  |     |    old |        |  3
++//  |     |     SP-+--------+----> Matcher::_old_SP, even aligned
++//  v     |        |  ret   |  3   return address
++//     Owned by    +--------+
++//      Self       |  pad2  |  2   pad to align old SP
++//        |        +--------+  1
++//        |        | locks  |  0
++//        |        +--------+----> SharedInfo::stack0, even aligned
++//        |        |  pad1  | 11   pad to align new SP
++//        |        +--------+
++//        |        |        | 10
++//        |        | spills |  9   spills
++//        V        |        |  8   (pad0 slot for callee)
++//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
++//        ^        |  out   |  7
++//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
++//   Owned by  new |        |
++//    Callee    SP-+--------+----> Matcher::_new_SP, even aligned
++//                  |        |
++//
++// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
++//         known from SELF's arguments and the Java calling convention.
++//         Region 6-7 is determined per call site.
++// Note 2: If the calling convention leaves holes in the incoming argument
++//         area, those holes are owned by SELF.  Holes in the outgoing area
++//         are owned by the CALLEE.  Holes should not be nessecary in the
++//         incoming area, as the Java calling convention is completely under
++//         the control of the AD file.  Doubles can be sorted and packed to
++//         avoid holes.  Holes in the outgoing arguments may be nessecary for
++//         varargs C calling conventions.
++// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
++//         even aligned with pad0 as needed.
++//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
++//         region 6-11 is even aligned; it may be padded out more so that
++//         the region from SP to FP meets the minimum stack alignment.
++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
++//         alignment.  Region 11, pad1, may be dynamically extended so that
++//         SP meets the minimum alignment.
++
++
++frame %{
++
++  stack_direction(TOWARDS_LOW);
++
++  // These two registers define part of the calling convention
++  // between compiled code and the interpreter.
++  // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention
++  // for more information.
++
++  inline_cache_reg(T1);                // Inline Cache Register
++  interpreter_method_oop_reg(S3);      // Method Oop Register when calling interpreter
++
++  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
++  cisc_spilling_operand_name(indOffset32);
++
++  // Number of stack slots consumed by locking an object
++  // generate Compile::sync_stack_slots
++  sync_stack_slots(2);
++
++  frame_pointer(SP);
++
++  // Interpreter stores its frame pointer in a register which is
++  // stored to the stack by I2CAdaptors.
++  // I2CAdaptors convert from interpreted java to compiled java.
++
++  interpreter_frame_pointer(FP);
++
++  // generate Matcher::stack_alignment
++  stack_alignment(StackAlignmentInBytes);  //wordSize = sizeof(char*);
++
++  // Number of stack slots between incoming argument block and the start of
++  // a new frame.  The PROLOG must add this many slots to the stack.  The
++  // EPILOG must remove this many slots.
++  in_preserve_stack_slots(4);  //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp
++
++  // Number of outgoing stack slots killed above the out_preserve_stack_slots
++  // for calls to C.  Supports the var-args backing area for register parms.
++  varargs_C_out_slots_killed(0);
++
++  // The after-PROLOG location of the return address.  Location of
++  // return address specifies a type (REG or STACK) and a number
++  // representing the register number (i.e. - use a register name) or
++  // stack slot.
++  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
++  // Otherwise, it is above the locks and verification slot and alignment word
++  //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong));
++  return_addr(REG RA);
++
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++
++  // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing)
++  // StartNode::calling_convention call this.
++  calling_convention %{
++    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
++  %}
++
++
++
++
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++
++  // SEE CallRuntimeNode::calling_convention for more information.
++  c_calling_convention %{
++   (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
++  %}
++
++
++  // Location of C & interpreter return values
++  // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR.
++  // SEE Matcher::match.
++  c_return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,    V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,  V0_H_num };
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++  // Location of return values
++  // register(s) contain(s) return value for Op_StartC2I and Op_Start.
++  // SEE Matcher::match.
++
++  return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,     V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,   V0_H_num};
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++%}
++
++//----------ATTRIBUTES---------------------------------------------------------
++//----------Operand Attributes-------------------------------------------------
++op_attrib op_cost(0);        // Required cost attribute
++
++//----------Instruction Attributes---------------------------------------------
++ins_attrib ins_cost(100);       // Required cost attribute
++ins_attrib ins_size(32);         // Required size attribute (in bits)
++ins_attrib ins_pc_relative(0);  // Required PC Relative flag
++ins_attrib ins_short_branch(0); // Required flag: is this instruction a
++                                // non-matching short branch variant of some
++                                                            // long branch?
++ins_attrib ins_alignment(4);    // Required alignment attribute (must be a power of 2)
++                                // specifies the alignment that some part of the instruction (not
++                                // necessarily the start) requires.  If > 1, a compute_padding()
++                                // function must be provided for the instruction
++
++//----------OPERANDS-----------------------------------------------------------
++// Operand definitions must precede instruction definitions for correct parsing
++// in the ADLC because operands constitute user defined types which are used in
++// instruction definitions.
++
++// Vectors
++operand vecD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(VecD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Flags register, used as output of compare instructions
++operand FlagsReg() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegFlags);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++//----------Simple Operands----------------------------------------------------
++// TODO: Should we need to define some more special immediate number ?
++// Immediate Operands
++// Integer Immediate
++operand immI() %{
++  match(ConI);
++  // TODO: should not match immI8 here LEE
++  match(immI8);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI8() %{
++  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI16() %{
++  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
++  match(ConI);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M65536() %{
++  predicate(n->get_int() == -65536);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for decrement
++operand immI_M1() %{
++  predicate(n->get_int() == -1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for test vs zero
++operand immI_0() %{
++  predicate(n->get_int() == 0);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for increment
++operand immI_1() %{
++  predicate(n->get_int() == 1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constants for increment
++operand immI_16() %{
++  predicate(n->get_int() == 16);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_24() %{
++  predicate(n->get_int() == 24);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for long shifts
++operand immI_32() %{
++  predicate(n->get_int() == 32);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for byte-wide masking
++operand immI_255() %{
++  predicate(n->get_int() == 255);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_65535() %{
++  predicate(n->get_int() == 65535);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_MaxI() %{
++  predicate(n->get_int() == 2147483647);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M32767_32768() %{
++  predicate((-32767 <= n->get_int()) && (n->get_int() <= 32768));
++  match(ConI);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Valid scale values for addressing modes
++operand immI_0_3() %{
++  predicate(0 <= n->get_int() && (n->get_int() <= 3));
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_31() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 31);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_32767() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 32767);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_65535() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 65535);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_32_63() %{
++  predicate(n->get_int() >= 32 && n->get_int() <= 63);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive integer mask
++operand immI_nonneg_mask() %{
++  predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1));
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate
++operand immL() %{
++  match(ConL);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 8-bit
++operand immL8() %{
++  predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
++  match(ConL);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer for polling page
++operand immP_poll() %{
++  predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
++  match(ConP);
++  op_cost(5);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL16() %{
++  predicate((-32768 <= n->get_long()) && (n->get_long() <= 32767));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 32-bit signed
++operand immL32() %{
++  predicate(n->get_long() == (int)(n->get_long()));
++  match(ConL);
++
++  op_cost(15);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 3..6 zero
++operand immL_M121() %{
++  predicate(n->get_long() == -121L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..2 zero
++operand immL_M8() %{
++  predicate(n->get_long() == -8L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 1..2 zero
++operand immL_M7() %{
++  predicate(n->get_long() == -7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 2 zero
++operand immL_M5() %{
++  predicate(n->get_long() == -5L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..1 zero
++operand immL_M4() %{
++  predicate(n->get_long() == -4L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_M1() %{
++  predicate(n->get_long() == -1L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate zero
++operand immL_0() %{
++  predicate(n->get_long() == 0L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_7() %{
++  predicate(n->get_long() == 7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate: low 32-bit mask
++operand immL_MaxUI() %{
++  predicate(n->get_long() == 0xFFFFFFFFL);
++  match(ConL);
++  op_cost(20);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_M32767_32768() %{
++  predicate((-32767 <= n->get_long()) && (n->get_long() <= 32768));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_0_65535() %{
++  predicate(n->get_long() >= 0 && n->get_long() <= 65535);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive long mask
++operand immL_nonneg_mask() %{
++  predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1));
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immP() %{
++  match(ConP);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immP_0() %{
++  predicate(n->get_ptr() == 0);
++  match(ConP);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate: 64-bit
++operand immP_no_oop_cheap() %{
++  predicate(!n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set64(n->get_ptr()) <= 3));
++  match(ConP);
++
++  op_cost(5);
++  // formats are generated automatically for constants and base registers
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immN() %{
++  match(ConN);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immNKlass() %{
++  match(ConNKlass);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immN_0() %{
++  predicate(n->get_narrowcon() == 0);
++  match(ConN);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point immediate
++operand immF() %{
++  match(ConF);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point zero
++operand immF_0() %{
++  predicate(jint_cast(n->getf()) == 0);
++  match(ConF);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point immediate
++operand immD() %{
++  match(ConD);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point zero
++operand immD_0() %{
++  predicate(jlong_cast(n->getd()) == 0);
++  match(ConD);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Register Operands
++// Integer Register
++operand mRegI() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegI);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand no_Ax_mRegI() %{
++  constraint(ALLOC_IN_RC(no_Ax_int_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand mS0RegI() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S0" %}
++  interface(REG_INTER);
++%}
++
++operand mS1RegI() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S1" %}
++  interface(REG_INTER);
++%}
++
++operand mS3RegI() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S3" %}
++  interface(REG_INTER);
++%}
++
++operand mS4RegI() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S4" %}
++  interface(REG_INTER);
++%}
++
++operand mS5RegI() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S5" %}
++  interface(REG_INTER);
++%}
++
++operand mS6RegI() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S6" %}
++  interface(REG_INTER);
++%}
++
++operand mS7RegI() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S7" %}
++  interface(REG_INTER);
++%}
++
++
++operand mT0RegI() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++operand mT1RegI() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T1" %}
++  interface(REG_INTER);
++%}
++
++operand mT2RegI() %{
++  constraint(ALLOC_IN_RC(t2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T2" %}
++  interface(REG_INTER);
++%}
++
++operand mT3RegI() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T3" %}
++  interface(REG_INTER);
++%}
++
++operand mT8RegI() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T8" %}
++  interface(REG_INTER);
++%}
++
++operand mT9RegI() %{
++  constraint(ALLOC_IN_RC(t9_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T9" %}
++  interface(REG_INTER);
++%}
++
++operand mA0RegI() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand mA1RegI() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A1" %}
++  interface(REG_INTER);
++%}
++
++operand mA2RegI() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A2" %}
++  interface(REG_INTER);
++%}
++
++operand mA3RegI() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A3" %}
++  interface(REG_INTER);
++%}
++
++operand mA4RegI() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A4" %}
++  interface(REG_INTER);
++%}
++
++operand mA5RegI() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A5" %}
++  interface(REG_INTER);
++%}
++
++operand mA6RegI() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A6" %}
++  interface(REG_INTER);
++%}
++
++operand mA7RegI() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A7" %}
++  interface(REG_INTER);
++%}
++
++operand mV0RegI() %{
++  constraint(ALLOC_IN_RC(v0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "V0" %}
++  interface(REG_INTER);
++%}
++
++operand mV1RegI() %{
++  constraint(ALLOC_IN_RC(v1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "V1" %}
++  interface(REG_INTER);
++%}
++
++operand mRegN() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegN() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegN() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegN() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegN() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t9_RegN() %{
++  constraint(ALLOC_IN_RC(t9_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegN() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegN() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegN() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegN() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegN() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5_RegN() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegN() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegN() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0_RegN() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegN() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s2_RegN() %{
++  constraint(ALLOC_IN_RC(s2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegN() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegN() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegN() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegN() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegN() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegN() %{
++  constraint(ALLOC_IN_RC(v0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1_RegN() %{
++  constraint(ALLOC_IN_RC(v1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Pointer Register
++operand mRegP() %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(RegP);
++  match(a0_RegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand no_T8_mRegP() %{
++  constraint(ALLOC_IN_RC(no_T8_p_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegP()
++%{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegP()
++%{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegP()
++%{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegP()
++%{
++  constraint(ALLOC_IN_RC(s5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegP()
++%{
++  constraint(ALLOC_IN_RC(s6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegP()
++%{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegP()
++%{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegP()
++%{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t2_RegP()
++%{
++  constraint(ALLOC_IN_RC(t2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegP()
++%{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegP()
++%{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t9_RegP()
++%{
++  constraint(ALLOC_IN_RC(t9_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegP()
++%{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegP()
++%{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegP()
++%{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegP()
++%{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegP()
++%{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++
++operand a5_RegP()
++%{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegP()
++%{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegP()
++%{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegP()
++%{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1_RegP()
++%{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++/*
++operand mSPRegP(mRegP reg) %{
++  constraint(ALLOC_IN_RC(sp_reg));
++  match(reg);
++
++  format %{ "SP"  %}
++  interface(REG_INTER);
++%}
++
++operand mFPRegP(mRegP reg) %{
++  constraint(ALLOC_IN_RC(fp_reg));
++  match(reg);
++
++  format %{ "FP"  %}
++  interface(REG_INTER);
++%}
++*/
++
++operand mRegL() %{
++  constraint(ALLOC_IN_RC(long_reg));
++  match(RegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0RegL() %{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1RegL() %{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0RegL() %{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand a1RegL() %{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2RegL() %{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3RegL() %{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0RegL() %{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1RegL() %{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3RegL() %{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8RegL() %{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4RegL() %{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5RegL() %{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6RegL() %{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7RegL() %{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0RegL() %{
++  constraint(ALLOC_IN_RC(s0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1RegL() %{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3RegL() %{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4RegL() %{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7RegL() %{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Floating register operands
++operand regF() %{
++  constraint(ALLOC_IN_RC(flt_reg));
++  match(RegF);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//Double Precision Floating register operands
++operand regD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(RegD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//----------Memory Operands----------------------------------------------------
++// Indirect Memory Operand
++operand indirect(mRegP reg) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(reg);
++
++  format %{ "[$reg] @ indirect" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);  /* NO_INDEX */
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset8(mRegP reg, immL8 off)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg off);
++
++  op_cost(10);
++  format %{ "[$reg + $off (8-bit)] @ indOffset8" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0); /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Times Scale Plus Index Register
++operand indIndexScale(mRegP reg, mRegL lreg, immI_0_3 scale)
++%{
++  predicate(UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg (LShiftL lreg scale));
++
++  op_cost(10);
++  format %{"[$reg + $lreg << $scale] @ indIndexScale" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale($scale);
++    disp(0x0);
++  %}
++%}
++
++
++// [base + index + offset]
++operand baseIndexOffset8(mRegP base, mRegL index, immL8 off)
++%{
++  predicate(UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(5);
++  match(AddP (AddP base index) off);
++
++  format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8" %}
++  interface(MEMORY_INTER) %{
++    base($base);
++    index($index);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// [base + index + offset]
++operand baseIndexOffset8_convI2L(mRegP base, mRegI index, immL8 off)
++%{
++  predicate(UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(5);
++  match(AddP (AddP base (ConvI2L index)) off);
++
++  format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8_convI2L" %}
++  interface(MEMORY_INTER) %{
++    base($base);
++    index($index);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// [base + index<<scale + offset]
++operand basePosIndexScaleOffset8(mRegP base, mRegI index, immL8 off, immI_0_31 scale)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  //predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
++  op_cost(10);
++  match(AddP (AddP base (LShiftL (ConvI2L index) scale)) off);
++
++  format %{ "[$base + $index << $scale + $off (8-bit)] @ basePosIndexScaleOffset8" %}
++  interface(MEMORY_INTER) %{
++    base($base);
++    index($index);
++    scale($scale);
++    disp($off);
++  %}
++%}
++
++//FIXME: I think it's better to limit the immI to be 16-bit at most!
++// Indirect Memory Plus Long Offset Operand
++operand indOffset32(mRegP reg, immL32 off) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(20);
++  match(AddP reg off);
++
++  format %{ "[$reg + $off (32-bit)] @ indOffset32" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);   /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Index Register
++operand indIndex(mRegP addr, mRegL index) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP addr index);
++
++  op_cost(20);
++  format %{"[$addr + $index] @ indIndex" %}
++  interface(MEMORY_INTER) %{
++    base($addr);
++    index($index);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++operand indirectNarrowKlass(mRegN reg)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(DecodeNKlass reg);
++
++  format %{ "[$reg] @ indirectNarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++operand indOffset8NarrowKlass(mRegN reg, immL8 off)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeNKlass reg) off);
++
++  format %{ "[$reg + $off (8-bit)] @ indOffset8NarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indOffset32NarrowKlass(mRegN reg, immL32 off)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeNKlass reg) off);
++
++  format %{ "[$reg + $off (32-bit)] @ indOffset32NarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indIndexOffsetNarrowKlass(mRegN reg, mRegL lreg, immL32 off)
++%{
++  predicate(UseLEXT1);
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP (AddP (DecodeNKlass reg) lreg) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $lreg] @ indIndexOffsetNarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indIndexNarrowKlass(mRegN reg, mRegL lreg)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP (DecodeNKlass reg) lreg);
++
++  op_cost(10);
++  format %{"[$reg + $lreg] @ indIndexNarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Operand
++operand indirectNarrow(mRegN reg)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(DecodeN reg);
++
++  format %{ "[$reg] @ indirectNarrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset8Narrow(mRegN reg, immL8 off)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeN reg) off);
++
++  format %{ "[$reg + $off (8-bit)] @ indOffset8Narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Index Register Plus Offset Operand
++operand indIndexOffset8Narrow(mRegN reg, mRegL lreg, immL8 off)
++%{
++  predicate((Universe::narrow_oop_shift() == 0) && UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP (AddP (DecodeN reg) lreg) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $lreg] @ indIndexOffset8Narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++//----------Conditional Branch Operands----------------------------------------
++// Comparison Op  - This is the operation of the comparison, and is limited to
++//                  the following set of codes:
++//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
++//
++// Other attributes of the comparison, such as unsignedness, are specified
++// by the comparison instruction that sets a condition code flags register.
++// That result is represented by a flags operand whose subtype is appropriate
++// to the unsignedness (etc.) of the comparison.
++//
++// Later, the instruction which matches both the Comparison Op (a Bool) and
++// the flags (produced by the Cmp) specifies the coding of the comparison op
++// by matching a specific subtype of Bool operand below, such as cmpOpU.
++
++// Comparision Code
++operand cmpOp() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++
++// Comparision Code
++// Comparison Code, unsigned compare.  Used by FP also, with
++// C2 (unordered) turned into GT or LT already.  The other bits
++// C0 and C3 are turned into Carry & Zero flags.
++operand cmpOpU() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++
++//----------Special Memory Operands--------------------------------------------
++// Stack Slot Operand - This operand is used for loading and storing temporary
++//                      values on the stack where a match requires a value to
++//                      flow through memory.
++operand stackSlotP(sRegP reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotI(sRegI reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotF(sRegF reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotD(sRegD reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotL(sRegL reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++
++//------------------------OPERAND CLASSES--------------------------------------
++//opclass memory( direct, indirect, indOffset16, indOffset32, indOffset32X, indIndexOffset );
++opclass memory( indirect, indirectNarrow, indOffset8, indOffset32, indIndex, indIndexScale, baseIndexOffset8, baseIndexOffset8_convI2L, indOffset8Narrow, indIndexOffset8Narrow);
++
++
++//----------PIPELINE-----------------------------------------------------------
++// Rules which define the behavior of the target architectures pipeline.
++
++pipeline %{
++
++  //----------ATTRIBUTES---------------------------------------------------------
++  attributes %{
++    fixed_size_instructions;          // Fixed size instructions
++    branch_has_delay_slot;      // branch have delay slot in gs2
++    max_instructions_per_bundle = 1;     // 1 instruction per bundle
++    max_bundles_per_cycle = 4;         // Up to 4 bundles per cycle
++         bundle_unit_size=4;
++    instruction_unit_size = 4;           // An instruction is 4 bytes long
++    instruction_fetch_unit_size = 16;    // The processor fetches one line
++    instruction_fetch_units = 1;         // of 16 bytes
++
++    // List of nop instructions
++    nops( MachNop );
++  %}
++
++  //----------RESOURCES----------------------------------------------------------
++  // Resources are the functional units available to the machine
++
++  resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4,  ALU1, ALU2,  ALU = ALU1 | ALU2,  FPU1, FPU2, FPU = FPU1 | FPU2,  MEM,  BR);
++
++  //----------PIPELINE DESCRIPTION-----------------------------------------------
++  // Pipeline Description specifies the stages in the machine's pipeline
++
++  // IF: fetch
++  // ID: decode
++  // RD: read
++  // CA: caculate
++  // WB: write back
++  // CM: commit
++
++  pipe_desc(IF, ID, RD, CA, WB, CM);
++
++
++  //----------PIPELINE CLASSES---------------------------------------------------
++  // Pipeline Classes describe the stages in which input and output are
++  // referenced by the hardware pipeline.
++
++  //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{
++    single_instruction;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+1;
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.19 Integer mult operation : dst <-- reg1 mult reg2
++  pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer div operation : dst <-- reg1 div reg2
++  pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer mod operation : dst <-- reg1 mod reg2
++  pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{
++    instruction_count(2);
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //no.16 load Long from memory :
++  pipe_class ialu_loadL(mRegL dst, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.17 Store Long to Memory :
++  pipe_class ialu_storeL(mRegL src, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{
++         single_instruction;
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.3 Integer move operation : dst <-- reg
++  pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.4 No instructions : do nothing
++  pipe_class empty( ) %{
++    instruction_count(0);
++  %}
++
++  //No.5 UnConditional branch :
++  pipe_class pipe_jump( label labl ) %{
++    multiple_bundles;
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //No.6 ALU Conditional branch :
++  pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //no.7 load integer from memory :
++  pipe_class ialu_loadI(mRegI dst, memory mem) %{
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.8 Store Integer to Memory :
++  pipe_class ialu_storeI(mRegI src, memory mem) %{
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++
++  //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  //No.22 Floating div operation : dst <-- reg1 div reg2
++  pipe_class fpu_div(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  pipe_class fcvt_I2D(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class fcvt_D2I(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class pipe_mfc1(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  pipe_class pipe_mtc1(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD(5);
++  %}
++
++  //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2
++  pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  //No.11 Load Floating from Memory :
++  pipe_class fpu_loadF(regF dst, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.12 Store Floating to Memory :
++  pipe_class fpu_storeF(regF src, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.13 FPU Conditional branch :
++  pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++//No.14 Floating FPU reg operation : dst <-- op reg
++  pipe_class fpu1_regF(regF dst, regF src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  pipe_class long_memory_op() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(30);
++  %}
++
++  pipe_class simple_call() %{
++   instruction_count(10); multiple_bundles; force_serialization;
++   fixed_latency(200);
++   BR     : RD;
++  %}
++
++  pipe_class call() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(200);
++  %}
++
++  //FIXME:
++  //No.9 Piple slow : for multi-instructions
++  pipe_class pipe_slow(  ) %{
++    instruction_count(20);
++    force_serialization;
++    multiple_bundles;
++    fixed_latency(50);
++  %}
++
++%}
++
++
++
++//----------INSTRUCTIONS-------------------------------------------------------
++//
++// match      -- States which machine-independent subtree may be replaced
++//               by this instruction.
++// ins_cost   -- The estimated cost of this instruction is used by instruction
++//               selection to identify a minimum cost tree of machine
++//               instructions that matches a tree of machine-independent
++//               instructions.
++// format     -- A string providing the disassembly for this instruction.
++//               The value of an instruction's operand may be inserted
++//               by referring to it with a '$' prefix.
++// opcode     -- Three instruction opcodes may be provided.  These are referred
++//               to within an encode class as $primary, $secondary, and $tertiary
++//               respectively.  The primary opcode is commonly used to
++//               indicate the type of machine instruction, while secondary
++//               and tertiary are often used for prefix options or addressing
++//               modes.
++// ins_encode -- A list of encode classes with parameters. The encode class
++//               name must have been defined in an 'enc_class' specification
++//               in the encode section of the architecture description.
++
++
++// Load Integer
++instruct loadI(mRegI dst, memory mem) %{
++  match(Set dst (LoadI mem));
++
++  ins_cost(125);
++  format %{ "lw    $dst, $mem   #@loadI" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadI_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadI mem)));
++
++  ins_cost(125);
++  format %{ "lw    $dst, $mem   #@loadI_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Integer (32 bit signed) to Byte (8 bit signed)
++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "lb  $dst, $mem\t# int -> byte #@loadI2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "lbu  $dst, $mem\t# int -> ubyte #@loadI2UB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Short (16 bit signed)
++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
++
++  ins_cost(125);
++  format %{ "lh  $dst, $mem\t# int -> short #@loadI2S" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "lhu  $dst, $mem\t# int -> ushort/char #@loadI2US" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Long.
++instruct loadL(mRegL dst, memory mem) %{
++//  predicate(!((LoadLNode*)n)->require_atomic_access());
++  match(Set dst (LoadL mem));
++
++  ins_cost(250);
++  format %{ "ld    $dst, $mem   #@loadL" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Load Long - UNaligned
++instruct loadL_unaligned(mRegL dst, memory mem) %{
++  match(Set dst (LoadL_unaligned mem));
++
++  // FIXME: Need more effective ldl/ldr
++  ins_cost(450);
++  format %{ "ld    $dst, $mem   #@loadL_unaligned\n\t" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Store Long
++instruct storeL_reg(memory mem, mRegL src) %{
++  match(Set mem (StoreL mem src));
++
++  ins_cost(200);
++  format %{ "sd    $mem,   $src #@storeL_reg\n" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_immL_0(memory mem, immL_0 zero) %{
++  match(Set mem (StoreL mem zero));
++
++  ins_cost(180);
++  format %{ "sd    zero, $mem #@storeL_immL_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++// Load Compressed Pointer
++instruct loadN(mRegN dst, memory mem)
++%{
++   match(Set dst (LoadN mem));
++
++   ins_cost(125); // XXX
++   format %{ "lwu    $dst, $mem\t# compressed ptr @ loadN" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2P(mRegP dst, memory mem)
++%{
++   match(Set dst (DecodeN (LoadN mem)));
++   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++
++   ins_cost(125); // XXX
++   format %{ "lwu    $dst, $mem\t# @ loadN2P" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Pointer
++instruct loadP(mRegP dst, memory mem) %{
++  match(Set dst (LoadP mem));
++
++  ins_cost(125);
++  format %{ "ld    $dst, $mem #@loadP" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Klass Pointer
++instruct loadKlass(mRegP dst, memory mem) %{
++  match(Set dst (LoadKlass mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load narrow Klass Pointer
++instruct loadNKlass(mRegN dst, memory mem)
++%{
++  match(Set dst (LoadNKlass mem));
++
++  ins_cost(125); // XXX
++  format %{ "lwu    $dst, $mem\t# compressed klass ptr @ loadNKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2PKlass(mRegP dst, memory mem)
++%{
++  match(Set dst (DecodeNKlass (LoadNKlass mem)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "lwu    $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Constant
++instruct loadConI(mRegI dst, immI src) %{
++  match(Set dst src);
++
++  ins_cost(150);
++  format %{ "mov    $dst, $src #@loadConI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    int    value = $src$$constant;
++    __ move(dst, value);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct loadConL_set64(mRegL dst, immL src) %{
++  match(Set dst src);
++  ins_cost(120);
++  format %{ "li   $dst, $src @ loadConL_set64" %}
++  ins_encode %{
++    __ set64($dst$$Register, $src$$constant);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct loadConL16(mRegL dst, immL16 src) %{
++  match(Set dst src);
++  ins_cost(105);
++  format %{ "mov    $dst, $src #@loadConL16" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    int      value   = $src$$constant;
++    __ daddiu(dst_reg, R0, value);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct loadConL_immL_0(mRegL dst, immL_0 src) %{
++  match(Set dst src);
++  ins_cost(100);
++  format %{ "mov    $dst, zero #@loadConL_immL_0" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    __ daddu(dst_reg, R0, R0);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Load Range
++instruct loadRange(mRegI dst, memory mem) %{
++  match(Set dst (LoadRange mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadRange" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct storeP(memory mem, mRegP src ) %{
++  match(Set mem (StoreP mem src));
++
++  ins_cost(125);
++  format %{ "sd    $src, $mem #@storeP" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store NULL Pointer, mark word, or other simple pointer constant.
++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{
++  match(Set mem (StoreP mem zero));
++
++  ins_cost(125);
++  format %{ "mov    $mem, $zero #@storeImmP_immP_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Compressed Pointer
++instruct storeN(memory mem, mRegN src)
++%{
++  match(Set mem (StoreN mem src));
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# compressed ptr @ storeN" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2N(memory mem, mRegP src)
++%{
++  match(Set mem (StoreN mem (EncodeP src)));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# @ storeP2N" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeNKlass(memory mem, mRegN src)
++%{
++  match(Set mem (StoreNKlass mem src));
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# compressed klass ptr @ storeNKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2NKlass(memory mem, mRegP src)
++%{
++  match(Set mem (StoreNKlass mem (EncodePKlass src)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# @ storeP2NKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmN_immN_0(memory mem, immN_0 zero)
++%{
++  match(Set mem (StoreN mem zero));
++
++  ins_cost(125); // XXX
++  format %{ "storeN0    zero, $mem\t# compressed ptr" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Byte
++instruct storeB_immB_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreB mem zero));
++
++  format %{ "mov    $mem, zero #@storeB_immB_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeB(memory mem, mRegI src) %{
++  match(Set mem (StoreB mem src));
++
++  ins_cost(125);
++  format %{ "sb    $src, $mem #@storeB" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeB_convL2I(memory mem, mRegL src) %{
++  match(Set mem (StoreB mem (ConvL2I src)));
++
++  ins_cost(125);
++  format %{ "sb    $src, $mem #@storeB_convL2I" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Byte (8bit signed)
++instruct loadB(mRegI dst, memory mem) %{
++  match(Set dst (LoadB mem));
++
++  ins_cost(125);
++  format %{ "lb   $dst, $mem #@loadB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadB mem)));
++
++  ins_cost(125);
++  format %{ "lb   $dst, $mem #@loadB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Byte (8bit UNsigned)
++instruct loadUB(mRegI dst, memory mem) %{
++  match(Set dst (LoadUB mem));
++
++  ins_cost(125);
++  format %{ "lbu   $dst, $mem #@loadUB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUB mem)));
++
++  ins_cost(125);
++  format %{ "lbu   $dst, $mem #@loadUB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16bit signed)
++instruct loadS(mRegI dst, memory mem) %{
++  match(Set dst (LoadS mem));
++
++  ins_cost(125);
++  format %{ "lh   $dst, $mem #@loadS" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16 bit signed) to Byte (8 bit signed)
++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "lb $dst, $mem\t# short -> byte #@loadS2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadS mem)));
++
++  ins_cost(125);
++  format %{ "lh   $dst, $mem #@loadS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Integer Immediate
++instruct storeI_immI_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreI mem zero));
++
++  format %{ "mov    $mem, zero #@storeI_immI_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Integer
++instruct storeI(memory mem, mRegI src) %{
++  match(Set mem (StoreI mem src));
++
++  ins_cost(125);
++  format %{ "sw    $mem, $src #@storeI" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeI_convL2I(memory mem, mRegL src) %{
++  match(Set mem (StoreI mem (ConvL2I src)));
++
++  ins_cost(125);
++  format %{ "sw    $mem, $src #@storeI_convL2I" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Float
++instruct loadF(regF dst, memory mem) %{
++  match(Set dst (LoadF mem));
++
++  ins_cost(150);
++  format %{ "loadF $dst, $mem #@loadF" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadConP_general(mRegP dst, immP src) %{
++  match(Set dst src);
++
++  ins_cost(120);
++  format %{ "li   $dst, $src #@loadConP_general" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    long* value = (long*)$src$$constant;
++
++    if($src->constant_reloc() == relocInfo::metadata_type){
++      int klass_index = __ oop_recorder()->find_index((Klass*)value);
++      RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++
++      __ relocate(rspec);
++      __ patchable_set48(dst, (long)value);
++    } else if($src->constant_reloc() == relocInfo::oop_type){
++      int oop_index = __ oop_recorder()->find_index((jobject)value);
++      RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++      __ relocate(rspec);
++      __ patchable_set48(dst, (long)value);
++    } else if ($src->constant_reloc() == relocInfo::none) {
++        __ set64(dst, (long)value);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{
++  match(Set dst src);
++
++  ins_cost(80);
++  format %{ "li    $dst, $src @ loadConP_no_oop_cheap" %}
++
++  ins_encode %{
++    __ set64($dst$$Register, $src$$constant);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct loadConP_poll(mRegP dst, immP_poll src) %{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "li   $dst, $src #@loadConP_poll" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    intptr_t value = (intptr_t)$src$$constant;
++
++    __ set64(dst, (jlong)value);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_immP_0(mRegP dst, immP_0 src)
++%{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "mov    $dst, R0\t# ptr" %}
++  ins_encode %{
++     Register dst_reg = $dst$$Register;
++     __ daddu(dst_reg, R0, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{
++  match(Set dst src);
++  format %{ "move    $dst, R0\t# compressed NULL ptr" %}
++  ins_encode %{
++    __ move($dst$$Register, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN(mRegN dst, immN src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed ptr @ loadConN" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_oop(dst, (jobject)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++instruct loadConNKlass(mRegN dst, immNKlass src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed klass ptr @ loadConNKlass" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_klass(dst, (Klass*)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++//FIXME
++// Tail Call; Jump from runtime stub to Java code.
++// Also known as an 'interprocedural jump'.
++// Target of jump will eventually return to caller.
++// TailJump below removes the return address.
++instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{
++  match(TailCall jump_target method_oop );
++  ins_cost(300);
++  format %{ "JMP    $jump_target \t# @TailCalljmpInd" %}
++
++  ins_encode %{
++    Register target = $jump_target$$Register;
++    Register    oop = $method_oop$$Register;
++
++    // RA will be used in generate_forward_exception()
++    __ push(RA);
++
++    __ move(S3, oop);
++    __ jr(target);
++    __ delayed()->nop();
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++// Create exception oop: created by stack-crawling runtime code.
++// Created exception is now available to this handler, and is setup
++// just prior to jumping to this handler.  No code emitted.
++instruct CreateException( a0_RegP ex_oop )
++%{
++  match(Set ex_oop (CreateEx));
++
++  // use the following format syntax
++  format %{ "# exception oop is in A0; no code emitted @CreateException" %}
++  ins_encode %{
++    // X86 leaves this function empty
++    __ block_comment("CreateException is empty in MIPS");
++  %}
++  ins_pipe( empty );
++//  ins_pipe( pipe_jump );
++%}
++
++
++/* The mechanism of exception handling is clear now.
++
++- Common try/catch:
++  [stubGenerator_mips.cpp] generate_forward_exception()
++      |- V0, V1 are created
++      |- T9 <= SharedRuntime::exception_handler_for_return_address
++      `- jr T9
++           `- the caller's exception_handler
++                 `- jr OptoRuntime::exception_blob
++                        `- here
++- Rethrow(e.g. 'unwind'):
++  * The callee:
++     |- an exception is triggered during execution
++     `- exits the callee method through RethrowException node
++          |- The callee pushes exception_oop(T0) and exception_pc(RA)
++          `- The callee jumps to OptoRuntime::rethrow_stub()
++  * In OptoRuntime::rethrow_stub:
++     |- The VM calls _rethrow_Java to determine the return address in the caller method
++     `- exits the stub with tailjmpInd
++          |- pops exception_oop(V0) and exception_pc(V1)
++          `- jumps to the return address(usually an exception_handler)
++  * The caller:
++     `- continues processing the exception_blob with V0/V1
++*/
++
++// Rethrow exception:
++// The exception oop will come in the first argument position.
++// Then JUMP (not call) to the rethrow stub code.
++instruct RethrowException()
++%{
++  match(Rethrow);
++
++  // use the following format syntax
++  format %{ "JMP    rethrow_stub #@RethrowException" %}
++  ins_encode %{
++    __ block_comment("@ RethrowException");
++
++    cbuf.set_insts_mark();
++    cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec());
++
++    // call OptoRuntime::rethrow_stub to get the exception handler in parent method
++    __ patchable_jump((address)OptoRuntime::rethrow_stub());
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Branch Instructions --- long offset versions
++
++// Jump Direct
++instruct jmpDir_long(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_long" %}
++
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jmp_far(*L);
++  %}
++
++  ins_pipe( pipe_jump );
++  //ins_pc_relative(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = AT;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    __ move(op2, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #mips uses T0 as equivalent to eflag @jmpCon_flags_long" %}
++
++  ins_encode %{
++    Label*    L =  $labl$$label;
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        __ bne_long($cr$$Register, R0, *L);
++        break;
++      case 0x02: //not equal
++        __ beq_long($cr$$Register, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++// Conditional jumps
++instruct branchConP_zero_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_zero_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConN2P_zero_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_zero_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag)
++    {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++       break;
++      default:
++          Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_long" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      __ beq_long(op1, op2, *L);
++      break;
++    case 0x02: //not_equal
++      __ bne_long(op1, op2, *L);
++      break;
++    default:
++          Unimplemented();
++    }
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_long" %}
++  ins_encode %{
++    Register op1_reg = $op1$$Register;
++    Register op2_reg = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      __ beq_long(op1_reg, op2_reg, *L);
++      break;
++    case 0x02: //not_equal
++      __ bne_long(op1_reg, op2_reg, *L);
++      break;
++    case 0x03: //above
++      __ sltu(AT, op2_reg, op1_reg);
++      __ bne_long(R0, AT, *L);
++      break;
++    case 0x04: //above_equal
++      __ sltu(AT, op1_reg, op2_reg);
++      __ beq_long(AT, R0, *L);
++      break;
++    case 0x05: //below
++      __ sltu(AT, op1_reg, op2_reg);
++      __ bne_long(R0, AT, *L);
++      break;
++    case 0x06: //below_equal
++      __ sltu(AT, op2_reg, op1_reg);
++      __ beq_long(AT, R0, *L);
++      break;
++    default:
++      Unimplemented();
++    }
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //above
++        __ sltu(AT, AT, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, AT);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, AT);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, AT, op1);
++        __ beq_long(AT, R0, *L);
++       break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(170);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, R0, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, R0, *L);
++        break;
++      case 0x03: //greater
++        __ slt(AT, R0, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //greater_equal
++        __ slt(AT, op1, R0);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //less
++        __ slt(AT, op1, R0);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //less_equal
++        __ slt(AT, R0, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //greater
++        __ slt(AT, AT, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //greater_equal
++        __ slt(AT, op1, AT);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //less
++        __ slt(AT, op1, AT);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //less_equal
++        __ slt(AT, AT, op1);
++        __ beq_long(AT, R0, *L);
++       break;
++      default:
++          Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
++  match( If cmp (CmpU src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, R0, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, R0, *L);
++        break;
++      case 0x03: //above
++        __ bne_long(R0, op1, *L);
++        break;
++      case 0x04: //above_equal
++        __ beq_long(R0, R0, *L);
++        break;
++      case 0x05: //below
++        return;
++        break;
++      case 0x06: //below_equal
++        __ beq_long(op1, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConIU_reg_immI16_long(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  ins_cost(180);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_immI16_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ move(AT, val);
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ move(AT, val);
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //above
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltiu(AT, op1, val);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltiu(AT, op1, val);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: // not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match( If cmp (CmpL src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_long" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = R0;
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match(If cmp (CmpUL src1 zero));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_long" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = R0;
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++      case 0x04: // greater_equal
++      case 0x06: // less_equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: // not_equal
++      case 0x03: // greater
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x05: // less
++        __ beq_long(R0, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: // equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: // not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label* L = $labl$$label;
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_s(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: // not_equal
++        __ c_eq_s(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: // greater
++        __ c_ule_s(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_s(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: // less
++        __ c_ult_s(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_s(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label* L = $labl$$label;
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_d(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: // not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ c_eq_d(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: // greater
++        __ c_ule_d(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_d(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: // less
++        __ c_ult_d(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_d(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++
++// ============================================================================
++// Branch Instructions -- short offset versions
++
++// Jump Direct
++instruct jmpDir_short(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_short" %}
++
++  ins_encode %{
++    Label &L = *($labl$$label);
++    if(&L)
++       __ b(L);
++    else
++       __ b(int(0));
++    __ delayed()->nop();
++  %}
++
++    ins_pipe( pipe_jump );
++    ins_pc_relative(1);
++    ins_short_branch(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = AT;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++
++    __ move(op2, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #mips uses T0 as equivalent to eflag @jmpCon_flags_short" %}
++
++  ins_encode %{
++    Label    &L =  *($labl$$label);
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        if (&L)
++          __ bne($cr$$Register, R0, L);
++        else
++          __ bne($cr$$Register, R0, (int)0);
++        break;
++      case 0x02: //not equal
++        if (&L)
++          __ beq($cr$$Register, R0, L);
++        else
++          __ beq($cr$$Register, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++// Conditional jumps
++instruct branchConP_zero_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_zero_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConN2P_zero_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_zero_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag)
++    {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        if(&L)
++                 __ beq(AT, R0, L);
++        else
++                 __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_short" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      if (&L)
++        __ beq(op1, op2, L);
++      else
++        __ beq(op1, op2, (int)0);
++      break;
++    case 0x02: //not_equal
++      if (&L)
++        __ bne(op1, op2, L);
++      else
++        __ bne(op1, op2, (int)0);
++      break;
++    default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_short" %}
++  ins_encode %{
++    Register op1_reg = $op1$$Register;
++    Register op2_reg = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      if (&L)
++        __ beq(op1_reg, op2_reg, L);
++      else
++        __ beq(op1_reg, op2_reg, (int)0);
++      break;
++    case 0x02: //not_equal
++      if (&L)
++        __ bne(op1_reg, op2_reg, L);
++      else
++        __ bne(op1_reg, op2_reg, (int)0);
++      break;
++    case 0x03: //above
++      __ sltu(AT, op2_reg, op1_reg);
++      if(&L)
++        __ bne(R0, AT, L);
++      else
++        __ bne(R0, AT, (int)0);
++      break;
++    case 0x04: //above_equal
++      __ sltu(AT, op1_reg, op2_reg);
++      if(&L)
++        __ beq(AT, R0, L);
++      else
++        __ beq(AT, R0, (int)0);
++      break;
++    case 0x05: //below
++      __ sltu(AT, op1_reg, op2_reg);
++      if(&L)
++        __ bne(R0, AT, L);
++      else
++        __ bne(R0, AT, (int)0);
++      break;
++    case 0x06: //below_equal
++      __ sltu(AT, op2_reg, op1_reg);
++      if(&L)
++        __ beq(AT, R0, L);
++      else
++        __ beq(AT, R0, (int)0);
++      break;
++    default:
++      Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++                __ bne(AT, R0, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        if(&L)
++           __ bne(AT, R0, L);
++        else
++           __ bne(AT, R0, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //above
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, AT);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, AT);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(170);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, R0, L);
++        else
++          __ beq(op1, R0, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, R0, L);
++        else
++          __ bne(op1, R0, (int)0);
++        break;
++      case 0x03: //greater
++        if(&L)
++               __ bgtz(op1, L);
++        else
++               __ bgtz(op1, (int)0);
++        break;
++      case 0x04: //greater_equal
++        if(&L)
++               __ bgez(op1, L);
++        else
++               __ bgez(op1, (int)0);
++        break;
++      case 0x05: //less
++        if(&L)
++                __ bltz(op1, L);
++        else
++                __ bltz(op1, (int)0);
++        break;
++      case 0x06: //less_equal
++        if(&L)
++               __ blez(op1, L);
++        else
++               __ blez(op1, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //greater
++        __ slt(AT, AT, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //greater_equal
++        __ slt(AT, op1, AT);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //less
++        __ slt(AT, op1, AT);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //less_equal
++        __ slt(AT, AT, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
++  match( If cmp (CmpU src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, R0, L);
++        else
++          __ beq(op1, R0, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, R0, L);
++        else
++          __ bne(op1, R0, (int)0);
++        break;
++      case 0x03: //above
++        if(&L)
++          __ bne(R0, op1, L);
++        else
++          __ bne(R0, op1, (int)0);
++        break;
++      case 0x04: //above_equal
++        if(&L)
++          __ beq(R0, R0, L);
++        else
++          __ beq(R0, R0, (int)0);
++        break;
++      case 0x05: //below
++        return;
++        break;
++      case 0x06: //below_equal
++        if(&L)
++          __ beq(op1, R0, L);
++        else
++          __ beq(op1, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++    %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConIU_reg_immI16_short(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  ins_cost(180);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_immI16_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ move(AT, val);
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        __ move(AT, val);
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //above
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++          __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltiu(AT, op1, val);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltiu(AT, op1, val);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++          __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match( If cmp (CmpUL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x02: // not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match( If cmp (CmpL src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_short" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++           __ beq(opr1_reg, R0, target);
++        else
++           __ beq(opr1_reg, R0, int(0));
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++           __ bne(opr1_reg, R0, target);
++        else
++           __ bne(opr1_reg, R0, (int)0);
++        break;
++
++      case 0x03: //greater
++        if(&target)
++           __ bgtz(opr1_reg, target);
++        else
++           __ bgtz(opr1_reg, (int)0);
++       break;
++
++      case 0x04: //greater_equal
++        if(&target)
++           __ bgez(opr1_reg, target);
++        else
++           __ bgez(opr1_reg, (int)0);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, R0);
++        if(&target)
++           __ bne(AT, R0, target);
++        else
++           __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x06: //less_equal
++        if (&target)
++           __ blez(opr1_reg, target);
++        else
++           __ blez(opr1_reg, int(0));
++        break;
++
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match(If cmp (CmpUL src1 zero));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_short" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++      case 0x04: // greater_equal
++      case 0x06: // less_equal
++        if (&target)
++           __ beq(opr1_reg, R0, target);
++        else
++           __ beq(opr1_reg, R0, int(0));
++        break;
++
++      case 0x02: // not_equal
++      case 0x03: // greater
++        if(&target)
++           __ bne(opr1_reg, R0, target);
++        else
++           __ bne(opr1_reg, R0, (int)0);
++        break;
++
++      case 0x05: // less
++        if(&target)
++           __ beq(R0, R0, target);
++        else
++           __ beq(R0, R0, (int)0);
++        break;
++
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: // equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x02: // not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label& L = *($labl$$label);
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_s(reg_op1, reg_op2);
++        if (&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x02: // not_equal
++        __ c_eq_s(reg_op1, reg_op2);
++        if (&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x03: // greater
++        __ c_ule_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x05: // less
++        __ c_ult_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label& L = *($labl$$label);
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_d(reg_op1, reg_op2);
++        if (&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x02: // not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ c_eq_d(reg_op1, reg_op2);
++        if (&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x03: // greater
++        __ c_ule_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x05: // less
++        __ c_ult_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++// =================== End of branch instructions ==========================
++
++// Call Runtime Instruction
++instruct CallRuntimeDirect(method meth) %{
++  match(CallRuntime );
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,runtime #@CallRuntimeDirect" %}
++  ins_encode( Java_To_Runtime( meth ) );
++  ins_pipe( pipe_slow );
++  ins_alignment(16);
++%}
++
++
++
++//------------------------MemBar Instructions-------------------------------
++//Memory barrier flavors
++
++instruct membar_acquire() %{
++  match(MemBarAcquire);
++  ins_cost(400);
++
++  format %{ "MEMBAR-acquire @ membar_acquire" %}
++  ins_encode %{
++    __ sync();
++  %}
++  ins_pipe(empty);
++%}
++
++instruct load_fence() %{
++  match(LoadFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ load_fence" %}
++  ins_encode %{
++    __ sync();
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_acquire_lock()
++%{
++  match(MemBarAcquireLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct membar_release() %{
++  match(MemBarRelease);
++  ins_cost(400);
++
++  format %{ "MEMBAR-release @ membar_release" %}
++
++  ins_encode %{
++    // Attention: DO NOT DELETE THIS GUY!
++    __ sync();
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct store_fence() %{
++  match(StoreFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ store_fence" %}
++
++  ins_encode %{
++    __ sync();
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_release_lock()
++%{
++  match(MemBarReleaseLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++
++instruct membar_volatile() %{
++  match(MemBarVolatile);
++  ins_cost(400);
++
++  format %{ "MEMBAR-volatile" %}
++  ins_encode %{
++    if( !os::is_MP() ) return;     // Not needed on single CPU
++    __ sync();
++
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct unnecessary_membar_volatile() %{
++  match(MemBarVolatile);
++  predicate(Matcher::post_store_load_barrier(n));
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %}
++  ins_encode( );
++  ins_pipe(empty);
++%}
++
++instruct membar_storestore() %{
++  match(MemBarStoreStore);
++
++  ins_cost(400);
++  format %{ "MEMBAR-storestore @ membar_storestore" %}
++  ins_encode %{
++    __ sync();
++  %}
++  ins_pipe(empty);
++%}
++
++//----------Move Instructions--------------------------------------------------
++instruct castX2P(mRegP dst, mRegL src) %{
++  match(Set dst (CastX2P src));
++  format %{ "castX2P  $dst, $src @ castX2P" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_cost(10);
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct castP2X(mRegL dst, mRegP src ) %{
++  match(Set dst (CastP2X src));
++
++  format %{ "mov    $dst, $src\t  #@castP2X" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{
++  match(Set dst (MoveF2I src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveF2I   $dst, $src @ MoveF2I_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ mfc1(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{
++  match(Set dst (MoveI2F src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveI2F   $dst, $src @ MoveI2F_reg_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ mtc1(src, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{
++  match(Set dst (MoveD2L src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveD2L   $dst, $src @ MoveD2L_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ dmfc1(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{
++  match(Set dst (MoveL2D src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveL2D   $dst, $src @ MoveL2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ dmtc1(src, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//----------Conditional Move---------------------------------------------------
++// Conditional move
++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst     = $dst$$Register;
++    Register src     = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst     = $dst$$Register;
++    Register src     = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovD_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovF_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpP_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpP_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//FIXME
++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovF_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Manifest a CmpL result in an integer register.  Very painful.
++// This is the test to avoid.
++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{
++  match(Set dst (CmpL3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpL3  $dst, $src1, $src2 @ cmpL3_reg_reg" %}
++  ins_encode %{
++    Register opr1 = as_Register($src1$$reg);
++    Register opr2 = as_Register($src2$$reg);
++    Register dst  = as_Register($dst$$reg);
++
++    __ slt(AT, opr1, opr2);
++    __ slt(dst, opr2, opr1);
++    __ subu(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//
++// less_rsult     = -1
++// greater_result =  1
++// equal_result   =  0
++// nan_result     = -1
++//
++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{
++  match(Set dst (CmpF3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpF3  $dst, $src1, $src2 @ cmpF3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ ori(dst, R0, 1);
++    __ ori(AT, R0, 1);
++    __ c_olt_s(src2, src1);
++    __ movf(dst, R0);
++    __ c_ult_s(src1, src2);
++    __ movf(AT, R0);
++    __ subu(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{
++  match(Set dst (CmpD3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpD3  $dst, $src1, $src2 @ cmpD3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ ori(dst, R0, 1);
++    __ ori(AT, R0, 1);
++    __ c_olt_d(src2, src1);
++    __ movf(dst, R0);
++    __ c_ult_d(src1, src2);
++    __ movf(AT, R0);
++    __ subu(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{
++  match(Set dummy (ClearArray cnt base));
++  format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %}
++  ins_encode %{
++    //Assume cnt is the number of bytes in an array to be cleared,
++    //and base points to the starting address of the array.
++    Register base = $base$$Register;
++    Register num  = $cnt$$Register;
++    Label Loop, done;
++
++    __ beq(num, R0, done);
++    __ delayed()->daddu(AT, base, R0);
++
++    __ move(T9, num);  /* T9 = words */
++
++    __ bind(Loop);
++    __ sd(R0, AT, 0);
++    __ daddiu(T9, T9, -1);
++    __ bne(T9, R0, Loop);
++    __ delayed()->daddiu(AT, AT, wordSize);
++
++    __ bind(done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::LL);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareU" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::UU);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareLU" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::LU);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareUL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::UL);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// intrinsic optimization
++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp);
++
++  format %{ "String Equal $str1, $str2, len:$cnt  tmp:$temp -> $result @ string_equals" %}
++  ins_encode %{
++    __ arrays_equals($str1$$Register, $str2$$Register,
++                     $cnt$$Register, $temp$$Register, $result$$Register,
++                     false/* byte */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//----------Arithmetic Instructions-------------------------------------------
++//----------Addition Instructions---------------------------------------------
++instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add   $dst, $src1, $src2 #@addI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ addu32(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addI_Reg_imm(mRegI dst, mRegI src1,  immI src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add    $dst, $src1, $src2 #@addI_Reg_imm" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    int       imm = $src2$$constant;
++
++    if(Assembler::is_simm16(imm)) {
++       __ addiu32(dst, src1, imm);
++    } else {
++       __ move(AT, imm);
++       __ addu32(dst, src1, AT);
++    }
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegL src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "dadd    $dst, $src1, $src2 #@addP_reg_reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ daddu(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_reg_convI2L(mRegP dst, mRegP src1, mRegI src2) %{
++  match(Set dst (AddP src1 (ConvI2L src2)));
++
++  format %{ "dadd    $dst, $src1, $src2 #@addP_reg_reg_convI2L" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ daddu(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_imm(mRegP dst, mRegP src1,  immL16 src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "daddi   $dst, $src1, $src2 #@addP_reg_imm" %}
++  ins_encode %{
++    Register  src1 = $src1$$Register;
++    long      src2 = $src2$$constant;
++    Register  dst  = $dst$$Register;
++
++    __ daddiu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++// Add Long Register with Register
++instruct addL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (AddL src1 src2));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_Reg\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_Reg_imm(mRegL dst, mRegL src1, immL16 src2)
++%{
++  match(Set dst (AddL src1 src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_imm " %}
++  ins_encode %{
++    Register dst_reg  = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    int      src2_imm = $src2$$constant;
++
++    __ daddiu(dst_reg, src1_reg, src2_imm);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_RegI2L_imm(mRegL dst, mRegI src1, immL16 src2)
++%{
++  match(Set dst (AddL (ConvI2L src1) src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_imm " %}
++  ins_encode %{
++    Register dst_reg  = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    int      src2_imm = $src2$$constant;
++
++    __ daddiu(dst_reg, src1_reg, src2_imm);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{
++  match(Set dst (AddL (ConvI2L src1) src2));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_Reg\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{
++  match(Set dst (AddL (ConvI2L src1) (ConvI2L src2)));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_RegI2L\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (AddL src1 (ConvI2L src2)));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_RegI2L\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++//----------Abs Instructions-------------------------------------------
++
++// Integer Absolute Instructions
++instruct absI_rReg(mRegI dst, mRegI src)
++%{
++  match(Set dst (AbsI src));
++  effect(TEMP dst);
++  format %{ "AbsI $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ sra(AT, src, 31);
++    __ xorr(dst, src, AT);
++    __ subu32(dst, dst, AT);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Long Absolute Instructions
++instruct absL_rReg(mRegL dst, mRegL src)
++%{
++  match(Set dst (AbsL src));
++  effect(TEMP dst);
++  format %{ "AbsL $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dsra32(AT, src, 31);
++    __ xorr(dst, src, AT);
++    __ subu(dst, dst, AT);
++  %}
++
++  ins_pipe(ialu_regL_regL);
++%}
++
++//----------Subtraction Instructions-------------------------------------------
++// Integer Subtraction Instructions
++instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(100);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ subu32(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subI_Reg_immI_M32767_32768(mRegI dst, mRegI src1,  immI_M32767_32768 src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_immI_M32767_32768" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ addiu32(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negI_Reg(mRegI dst, immI_0 zero,  mRegI src) %{
++  match(Set dst (SubI zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negI_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ subu32(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negL_Reg(mRegL dst, immL_0 zero,  mRegL src) %{
++  match(Set dst (SubL zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negL_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ subu(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subL_Reg_immL_M32767_32768(mRegL dst, mRegL src1,  immL_M32767_32768 src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subL_Reg_immL_M32767_32768" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ daddiu(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Subtract Long Register with Register.
++instruct subL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(100);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_Reg" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct subL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (SubL src1 (ConvI2L src2)));
++  ins_cost(100);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_RegI2L" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct subL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{
++  match(Set dst (SubL (ConvI2L src1) src2));
++  ins_cost(200);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_RegI2L_Reg" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct subL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{
++  match(Set dst (SubL (ConvI2L src1) (ConvI2L src2)));
++  ins_cost(200);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_RegI2L_RegI2L" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Integer MOD with Register
++instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (ModI src1 src2));
++  ins_cost(300);
++  format %{ "modi   $dst, $src1, $src2 @ modI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    //if (UseLEXT1) {
++    if (0) {
++      // Experiments show that gsmod is slower that div+mfhi.
++      // So I just disable it here.
++      __ gsmod(dst, src1, src2);
++    } else {
++      __ div(src1, src2);
++      __ mfhi(dst);
++    }
++  %}
++
++  //ins_pipe( ialu_mod );
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct modL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (ModL src1 src2));
++  format %{ "modL  $dst, $src1, $src2 @modL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsdmod(dst, op1, op2);
++    } else {
++      __ ddiv(op1, op2);
++      __ mfhi(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (MulI src1 src2));
++
++  ins_cost(300);
++  format %{ "mul   $dst, $src1, $src2 @ mulI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mul(dst, src1, src2);
++  %}
++  ins_pipe( ialu_mult );
++%}
++
++instruct maddI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2, mRegI src3) %{
++  match(Set dst (AddI (MulI src1 src2) src3));
++
++  ins_cost(999);
++  format %{ "madd   $dst, $src1 * $src2 + $src3 #@maddI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register src3 = $src3$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mtlo(src3);
++     __ madd(src1, src2);
++     __ mflo(dst);
++  %}
++  ins_pipe( ialu_mult );
++%}
++
++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (DivI src1 src2));
++
++  ins_cost(300);
++  format %{ "div   $dst, $src1, $src2 @ divI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++    // In MIPS, div does not cause exception.
++    //   We must trap an exception manually.
++    __ teq(R0, src2, 0x7);
++
++    if (UseLEXT1) {
++      __ gsdiv(dst, src1, src2);
++    } else {
++      __ div(src1, src2);
++
++      __ nop();
++      __ nop();
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( ialu_mod );
++%}
++
++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (DivF src1 src2));
++
++  ins_cost(300);
++  format %{ "divF   $dst, $src1, $src2 @ divF_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    /* Here do we need to trap an exception manually ? */
++    __ div_s(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (DivD src1 src2));
++
++  ins_cost(300);
++  format %{ "divD   $dst, $src1, $src2 @ divD_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    /* Here do we need to trap an exception manually ? */
++    __ div_d(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (MulL src1 src2));
++  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsdmult(dst, op1, op2);
++    } else {
++      __ dmult(op1, op2);
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulL_reg_regI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (MulL src1 (ConvI2L src2)));
++  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_regI2L" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsdmult(dst, op1, op2);
++    } else {
++      __ dmult(op1, op2);
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (DivL src1 src2));
++  format %{ "divL  $dst, $src1, $src2 @divL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsddiv(dst, op1, op2);
++    } else {
++      __ ddiv(op1, op2);
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (AddF src1 src2));
++  format %{ "AddF  $dst, $src1, $src2 @addF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ add_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (SubF src1 src2));
++  format %{ "SubF  $dst, $src1, $src2 @subF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ sub_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (AddD src1 src2));
++  format %{ "AddD  $dst, $src1, $src2 @addD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ add_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (SubD src1 src2));
++  format %{ "SubD  $dst, $src1, $src2 @subD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ sub_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negF_reg(regF dst, regF src) %{
++  match(Set dst (NegF src));
++  format %{ "negF  $dst, $src @negF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ neg_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negD_reg(regD dst, regD src) %{
++  match(Set dst (NegD src));
++  format %{ "negD  $dst, $src @negD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ neg_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (MulF src1 src2));
++  format %{ "MULF  $dst, $src1, $src2 @mulF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ mul_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// Mul two double precision floating piont number
++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (MulD src1 src2));
++  format %{ "MULD  $dst, $src1, $src2 @mulD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ mul_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct absF_reg(regF dst, regF src) %{
++  match(Set dst (AbsF src));
++  ins_cost(100);
++  format %{ "absF  $dst, $src @absF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ abs_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// intrinsics for math_native.
++// AbsD  SqrtD  CosD  SinD  TanD  LogD  Log10D
++
++instruct absD_reg(regD dst, regD src) %{
++  match(Set dst (AbsD src));
++  ins_cost(100);
++  format %{ "absD  $dst, $src @absD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ abs_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtD_reg(regD dst, regD src) %{
++  match(Set dst (SqrtD src));
++  ins_cost(100);
++  format %{ "SqrtD  $dst, $src @sqrtD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ sqrt_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtF_reg(regF dst, regF src) %{
++  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
++  ins_cost(100);
++  format %{ "SqrtF  $dst, $src @sqrtF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ sqrt_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// src1 * src2 + src3
++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary src1 src2)));
++
++  format %{ "madd_s  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ madd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 + src3
++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary src1 src2)));
++
++  format %{ "madd_d  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ madd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 - src3
++instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
++
++  format %{ "msub_s $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ msub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 - src3
++instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
++
++  format %{ "msub_d  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ msub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 - src3
++instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
++  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
++
++  format %{ "nmadds  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 - src3
++instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
++  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
++
++  format %{ "nmaddd   $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 + src3
++instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
++  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
++
++  format %{ "nmsubs  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 + src3
++instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
++  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
++
++  format %{ "nmsubd   $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++//----------------------------------Logical Instructions----------------------
++//__________________________________Integer Logical Instructions-------------
++
++//And Instuctions
++// And Register with Immediate
++instruct andI_Reg_immI(mRegI dst, mRegI src1,  immI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and  $dst, $src1, $src2 #@andI_Reg_immI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ move(AT, val);
++    __ andr(dst, src, AT);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andI_Reg_imm_0_65535(mRegI dst, mRegI src1,  immI_0_65535 src2) %{
++  match(Set dst (AndI src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andI_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1,  immI_nonneg_mask mask) %{
++  match(Set dst (AndI src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ ext(dst, src, 0, size);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1,  immL_nonneg_mask mask) %{
++  match(Set dst (AndL src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_jlong_mask($mask$$constant);
++
++    __ dext(dst, src, 0, size);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_imm_0_65535(mRegI dst, mRegI src1,  immI_0_65535 src2) %{
++  match(Set dst (XorI src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorI_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++       __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1,  immI_M1 M1) %{
++  match(Set dst (XorI src1 M1));
++  predicate(UseLEXT3);
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorI_Reg_immI_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ gsorn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1,  immI_M1 M1) %{
++  match(Set dst (XorI (ConvL2I src1) M1));
++  predicate(UseLEXT3);
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ gsorn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorL_Reg_imm_0_65535(mRegL dst, mRegL src1,  immL_0_65535 src2) %{
++  match(Set dst (XorL src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorL_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++       __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/*
++instruct xorL_Reg_immL_M1(mRegL dst, mRegL src1,  immL_M1 M1) %{
++  match(Set dst (XorL src1 M1));
++  predicate(UseLEXT3);
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorL_Reg_immL_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ gsorn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++instruct lbu_and_lmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI mask (LoadB mem)));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_lmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct lbu_and_rmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI (LoadB mem) mask));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_rmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct andI_Reg_Reg(mRegI dst, mRegI src1,  mRegI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and    $dst, $src1, $src2 #@andI_Reg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ andr(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI src1 (XorI src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src1, $src2 #@andnI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI src1 (XorI src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src1, $src2 #@ornI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI (XorI src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src2, $src1 #@andnI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI (XorI src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src2, $src1 #@ornI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// And Long Register with Register
++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (AndL src1 src2));
++  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ andr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct andL_Reg_Reg_convI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (AndL src1 (ConvI2L src2)));
++  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg_convI2L\n\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ andr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct andL_Reg_imm_0_65535(mRegL dst, mRegL src1,  immL_0_65535 src2) %{
++  match(Set dst (AndL src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++       __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL2I_Reg_imm_0_65535(mRegI dst, mRegL src1,  immL_0_65535 src2) %{
++  match(Set dst (ConvL2I (AndL src1 src2)));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL2I_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++       __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/*
++instruct andnL_Reg_nReg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (AndL src1 (XorL src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src1, $src2 #@andnL_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/*
++instruct ornL_Reg_nReg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (OrL src1 (XorL src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src1, $src2 #@ornL_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/*
++instruct andnL_nReg_Reg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (AndL (XorL src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src2, $src1 #@andnL_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/*
++instruct ornL_nReg_Reg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (OrL (XorL src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src2, $src1 #@ornL_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++instruct andL_Reg_immL_M8(mRegL dst,  immL_M8 M8) %{
++  match(Set dst (AndL dst M8));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M8 #@andL_Reg_immL_M8" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 0, 3);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M5(mRegL dst,  immL_M5 M5) %{
++  match(Set dst (AndL dst M5));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M5 #@andL_Reg_immL_M5" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 2, 1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M7(mRegL dst,  immL_M7 M7) %{
++  match(Set dst (AndL dst M7));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M7 #@andL_Reg_immL_M7" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 1, 2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M4(mRegL dst,  immL_M4 M4) %{
++  match(Set dst (AndL dst M4));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M4 #@andL_Reg_immL_M4" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 0, 2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M121(mRegL dst,  immL_M121 M121) %{
++  match(Set dst (AndL dst M121));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M121 #@andL_Reg_immL_M121" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 3, 4);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Long Register with Register
++instruct orL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (OrL src1 src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegL src2) %{
++  match(Set dst (OrL (CastP2X src1) src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Long Register with Register
++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (XorL src1 src2));
++  format %{ "XOR    $dst, $src1, $src2 @ xorL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ xorr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left by 8-bit immediate
++instruct salI_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ sll(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct salL2I_Reg_imm(mRegI dst, mRegL src, immI8 shift) %{
++  match(Set dst (LShiftI (ConvL2I src) shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salL2I_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ sll(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{
++  match(Set dst (AndI (LShiftI src shift) mask));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm_and_M65536" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ sll(dst, src, 16);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen));
++
++  format %{ "andi  $dst, $src, 7\t# @land7_2_s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ andi(dst, src, 7);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
++// This idiom is used by the compiler the i2s bytecode.
++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
++
++  format %{ "i2s  $dst, $src\t# @i2s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ seh(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
++// This idiom is used by the compiler for the i2b bytecode.
++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour)
++%{
++  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
++
++  format %{ "i2b  $dst, $src\t# @i2b" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ seb(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immI8 shift) %{
++  match(Set dst (LShiftI (ConvL2I src) shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_RegL2I_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ sll(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Shift Left by 8-bit immediate
++instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shamt = $shift$$Register;
++    __ sllv(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++// Shift Left Long
++instruct salL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    if (__ is_simm(shamt, 5))
++        __ dsll(dst_reg, src_reg, shamt);
++    else {
++      int sa = Assembler::low(shamt, 6);
++      if (sa < 32) {
++        __ dsll(dst_reg, src_reg, sa);
++      } else {
++        __ dsll32(dst_reg, src_reg, sa - 32);
++      }
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct salL_RegI2L_imm(mRegL dst, mRegI src, immI8 shift) %{
++  match(Set dst (LShiftL (ConvI2L src) shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_RegI2L_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    if (__ is_simm(shamt, 5))
++        __ dsll(dst_reg, src_reg, shamt);
++    else {
++      int sa = Assembler::low(shamt, 6);
++      if (sa < 32) {
++        __ dsll(dst_reg, src_reg, sa);
++      } else {
++        __ dsll32(dst_reg, src_reg, sa - 32);
++      }
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left Long
++instruct salL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ dsllv(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long
++instruct sarL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = ($shift$$constant & 0x3f);
++    if (__  is_simm(shamt, 5))
++      __ dsra(dst_reg, src_reg, shamt);
++    else {
++      int sa = Assembler::low(shamt, 6);
++      if (sa < 32) {
++        __ dsra(dst_reg, src_reg, sa);
++      } else {
++        __ dsra32(dst_reg, src_reg, sa - 32);
++      }
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegL src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (RShiftL src shift)));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt   = $shift$$constant;
++
++    __ dsra32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long arithmetically
++instruct sarL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ dsrav(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long logically
++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(100);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ dsrlv(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegL src, immI_0_31 shift, immI_MaxI max_int) %{
++  match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int));
++  ins_cost(80);
++  format %{ "dext    $dst, $src, $shift, 31 @ slrL_Reg_immI_0_31_and_max_int" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dext(dst_reg, src_reg, shamt, 31);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegL src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (URShiftL src shift)));
++  predicate(n->in(1)->in(2)->get_int() > 32);
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_convL2I" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Instructions
++// Xor Register with Register
++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (XorI src1 src2));
++
++  format %{ "XOR    $dst, $src1, $src2 #@xorI_Reg_Reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ xorr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Instructions
++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_32767 src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_imm" %}
++  ins_encode %{
++    __ ori($dst$$Register, $src1$$Register, $src2$$constant);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++// Or Register with Register
++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift)));
++  predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int())));
++
++  format %{ "rotr     $dst, $src, 1 ...\n\t"
++            "srl      $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %}
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register   src = $src$$Register;
++    int     rshift = $rshift$$constant;
++
++    __ rotr(dst, src, 1);
++    if (rshift - 1) {
++      __ srl(dst, dst, rshift - 1);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{
++  match(Set dst (OrI src1 (CastP2X src2)));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_castP2X" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right by 8-bit immediate
++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
++  match(Set dst (URShiftI src shift));
++  //effect(KILL cr);
++
++  format %{ "SRL    $dst, $src, $shift #@shr_logical_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++
++    __ srl(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{
++  match(Set dst (AndI (URShiftI src shift) mask));
++
++  format %{ "ext    $dst, $src, $shift, one-bits($mask) #@shr_logical_Reg_imm_nonneg_mask" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int      pos = $shift$$constant;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ ext(dst, src, pos, size);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolI_Reg_immI_0_31(mRegI dst, immI_0_31 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $dst, $rshift #@rolI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotr(dst, dst, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_0_31(mRegL dst, mRegL src, immI_32_63 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rolL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_32_63(mRegL dst, mRegL src, immI_0_31 lshift, immI_32_63 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rolL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr32(dst, src, sa - 32);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rorI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 rshift, immI_32_63 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rorL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rorL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr32(dst, src, sa - 32);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right
++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (URShiftI src shift));
++
++  format %{ "SRL    $dst, $src, $shift #@shr_logical_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ srlv(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRA    $dst, $src, $shift #@shr_arith_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++    __ sra(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRA    $dst, $src, $shift #@shr_arith_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ srav(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++//----------Convert Int to Boolean---------------------------------------------
++
++instruct convI2B(mRegI dst, mRegI src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convI2B    $dst, $src @ convI2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, src);
++    } else {
++      __ move(AT, src);
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct convI2L_reg( mRegL dst, mRegI src) %{
++  match(Set dst (ConvI2L src));
++
++  ins_cost(100);
++  format %{ "SLL    $dst, $src @ convI2L_reg\t"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if(dst != src) __ sll(dst, src, 0);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct convL2I_reg( mRegI dst, mRegL src ) %{
++  match(Set dst (ConvL2I src));
++
++  format %{ "MOV    $dst, $src @ convL2I_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ sll(dst, src, 0);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct convL2I2L_reg( mRegL dst, mRegL src ) %{
++  match(Set dst (ConvI2L (ConvL2I src)));
++
++  format %{ "sll    $dst, $src, 0 @ convL2I2L_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ sll(dst, src, 0);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct convL2D_reg( regD dst, mRegL src ) %{
++  match(Set dst (ConvL2D src));
++  format %{ "convL2D    $dst, $src @ convL2D_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ dmtc1(src, dst);
++    __ cvt_d_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2L_reg_fast( mRegL dst, regD src ) %{
++  match(Set dst (ConvD2L src));
++  ins_cost(150);
++  format %{ "convD2L    $dst, $src @ convD2L_reg_fast" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    Label Done;
++
++    __ trunc_l_d(F30, src);
++    // max_long:    0x7fffffffffffffff
++    // __ set64(AT, 0x7fffffffffffffff);
++    __ daddiu(AT, R0, -1);
++    __ dsrl(AT, AT, 1);
++    __ dmfc1(dst, F30);
++
++    __ bne(dst, AT, Done);
++    __ delayed()->mtc1(R0, F30);
++
++    __ cvt_d_w(F30, F30);
++    __ c_ult_d(src, F30);
++    __ bc1f(Done);
++    __ delayed()->daddiu(T9, R0, -1);
++
++    __ c_un_d(src, src);    //NaN?
++    __ subu(dst, T9, AT);
++    __ movt(dst, R0);
++
++    __ bind(Done);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2L_reg_slow( mRegL dst, regD src ) %{
++  match(Set dst (ConvD2L src));
++  ins_cost(250);
++  format %{ "convD2L    $dst, $src @ convD2L_reg_slow" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    Label L;
++
++    __ c_un_d(src, src);    //NaN?
++    __ bc1t(L);
++    __ delayed();
++    __ move(dst, R0);
++
++    __ trunc_l_d(F30, src);
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->dmfc1(dst, F30);
++
++    __ mov_d(F12, src);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
++    __ move(dst, V0);
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2I_reg_fast( mRegI dst, regF src ) %{
++  match(Set dst (ConvF2I src));
++  ins_cost(150);
++  format %{ "convf2i    $dst, $src @ convF2I_reg_fast" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ trunc_w_s(F30, fval);
++    __ move(AT, 0x7fffffff);
++    __ mfc1(dreg, F30);
++    __ c_un_s(fval, fval);    //NaN?
++    __ movt(dreg, R0);
++
++    __ bne(AT, dreg, L);
++    __ delayed()->lui(T9, 0x8000);
++
++    __ mfc1(AT, fval);
++    __ andr(AT, AT, T9);
++
++    __ movn(dreg, T9, AT);
++
++    __ bind(L);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++
++instruct convF2I_reg_slow( mRegI dst, regF src ) %{
++  match(Set dst (ConvF2I src));
++  ins_cost(250);
++  format %{ "convf2i    $dst, $src @ convF2I_reg_slow" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ c_un_s(fval, fval);    //NaN?
++    __ bc1t(L);
++    __ delayed();
++    __ move(dreg, R0);
++
++    __ trunc_w_s(F30, fval);
++
++    /* Call SharedRuntime:f2i() to do valid convention */
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->mfc1(dreg, F30);
++
++    __ mov_s(F12, fval);
++
++    //This bug was found when running ezDS's control-panel.
++    //    J 982 C2 javax.swing.text.BoxView.layoutMajorAxis(II[I[I)V (283 bytes) @ 0x000000555c46aa74
++    //
++    // An interger array index has been assigned to V0, and then changed from 1 to Integer.MAX_VALUE.
++    // V0 is corrupted during call_VM_leaf(), and should be preserved.
++    //
++    __ push(fval);
++    if(dreg != V0) {
++      __ push(V0);
++    }
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
++    if(dreg != V0) {
++      __ move(dreg, V0);
++      __ pop(V0);
++    }
++    __ pop(fval);
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2L_reg_fast( mRegL dst, regF src ) %{
++  match(Set dst (ConvF2L src));
++  ins_cost(150);
++  format %{ "convf2l    $dst, $src @ convF2L_reg_fast" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ trunc_l_s(F30, fval);
++    __ daddiu(AT, R0, -1);
++    __ dsrl(AT, AT, 1);
++    __ dmfc1(dreg, F30);
++    __ c_un_s(fval, fval);    //NaN?
++    __ movt(dreg, R0);
++
++    __ bne(AT, dreg, L);
++    __ delayed()->lui(T9, 0x8000);
++
++    __ mfc1(AT, fval);
++    __ andr(AT, AT, T9);
++
++    __ dsll32(T9, T9, 0);
++    __ movn(dreg, T9, AT);
++
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2L_reg_slow( mRegL dst, regF src ) %{
++  match(Set dst (ConvF2L src));
++  ins_cost(250);
++  format %{ "convf2l    $dst, $src @ convF2L_reg_slow" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ c_un_s(fval, fval);    //NaN?
++    __ bc1t(L);
++    __ delayed();
++    __ move(dst, R0);
++
++    __ trunc_l_s(F30, fval);
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->dmfc1(dst, F30);
++
++    __ mov_s(F12, fval);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
++    __ move(dst, V0);
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convL2F_reg( regF dst, mRegL src ) %{
++  match(Set dst (ConvL2F src));
++  format %{ "convl2f    $dst, $src @ convL2F_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    Register src = as_Register($src$$reg);
++    Label L;
++
++    __ dmtc1(src, dst);
++    __ cvt_s_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convI2F_reg( regF dst, mRegI src ) %{
++  match(Set dst (ConvI2F src));
++  format %{ "convi2f    $dst, $src @ convI2F_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ mtc1(src, dst);
++    __ cvt_s_w(dst, dst);
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{
++  match(Set dst (CmpLTMask p zero));
++  ins_cost(100);
++
++  format %{ "sra    $dst, $p, 31 @ cmpLTMask_immI_0" %}
++    ins_encode %{
++       Register src = $p$$Register;
++       Register dst = $dst$$Register;
++
++       __ sra(dst, src, 31);
++    %}
++    ins_pipe( pipe_slow );
++%}
++
++
++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{
++  match(Set dst (CmpLTMask p q));
++  ins_cost(400);
++
++  format %{ "cmpLTMask    $dst, $p, $q @ cmpLTMask" %}
++  ins_encode %{
++    Register p   = $p$$Register;
++    Register q   = $q$$Register;
++    Register dst = $dst$$Register;
++
++    __ slt(dst, p, q);
++    __ subu(dst, R0, dst);
++    %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct convP2B(mRegI dst, mRegP src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convP2B    $dst, $src @ convP2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, src);
++    } else {
++      __ move(AT, src);
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct convI2D_reg_reg(regD dst, mRegI src) %{
++  match(Set dst (ConvI2D src));
++  format %{ "conI2D $dst, $src @convI2D_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++    __ mtc1(src, dst);
++    __ cvt_d_w(dst, dst);
++    %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convF2D_reg_reg(regD dst, regF src) %{
++  match(Set dst (ConvF2D src));
++  format %{ "convF2D  $dst, $src\t# @convF2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ cvt_d_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convD2F_reg_reg(regF dst, regD src) %{
++  match(Set dst (ConvD2F src));
++  format %{ "convD2F  $dst, $src\t# @convD2F_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ cvt_s_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
++instruct convD2I_reg_reg_fast( mRegI dst, regD src ) %{
++  match(Set dst (ConvD2I src));
++
++  ins_cost(150);
++  format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_fast" %}
++
++  ins_encode %{
++    FloatRegister src = $src$$FloatRegister;
++    Register      dst = $dst$$Register;
++
++    Label Done;
++
++    __ trunc_w_d(F30, src);
++    // max_int: 2147483647
++    __ move(AT, 0x7fffffff);
++    __ mfc1(dst, F30);
++
++    __ bne(dst, AT, Done);
++    __ delayed()->mtc1(R0, F30);
++
++    __ cvt_d_w(F30, F30);
++    __ c_ult_d(src, F30);
++    __ bc1f(Done);
++    __ delayed()->addiu(T9, R0, -1);
++
++    __ c_un_d(src, src);    //NaN?
++    __ subu32(dst, T9, AT);
++    __ movt(dst, R0);
++
++    __ bind(Done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2I_reg_reg_slow( mRegI dst, regD src ) %{
++  match(Set dst (ConvD2I src));
++
++  ins_cost(250);
++  format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_slow" %}
++
++  ins_encode %{
++    FloatRegister src = $src$$FloatRegister;
++    Register      dst = $dst$$Register;
++    Label L;
++
++    __ trunc_w_d(F30, src);
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->mfc1(dst, F30);
++
++    __ mov_d(F12, src);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
++    __ move(dst, V0);
++    __ bind(L);
++
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Convert oop pointer into compressed form
++instruct encodeHeapOop(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop $dst,$src" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ encode_heap_oop(dst, src);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %}
++  ins_encode %{
++    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
++            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++
++    __ decode_heap_oop(d, s);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
++            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_heap_oop_not_null(d, s);
++    } else {
++      __ decode_heap_oop_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{
++  match(Set dst (EncodePKlass src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %}
++  ins_encode %{
++    __ encode_klass_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{
++  match(Set dst (DecodeNKlass src));
++  format %{ "decode_heap_klass_not_null $dst,$src" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_klass_not_null(d, s);
++    } else {
++      __ decode_klass_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++//FIXME
++instruct tlsLoadP(mRegP dst) %{
++  match(Set dst (ThreadLocal));
++
++  ins_cost(0);
++  format %{ " get_thread in $dst #@tlsLoadP" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++#ifdef OPT_THREAD
++    __ move(dst, TREG);
++#else
++    __ get_thread(dst);
++#endif
++  %}
++
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct checkCastPP( mRegP dst ) %{
++  match(Set dst (CheckCastPP dst));
++
++  format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %}
++  ins_encode( /*empty encoding*/ );
++  ins_pipe( empty );
++%}
++
++instruct castPP(mRegP dst)
++%{
++  match(Set dst (CastPP dst));
++
++  size(0);
++  format %{ "# castPP of $dst" %}
++  ins_encode(/* empty encoding */);
++  ins_pipe(empty);
++%}
++
++instruct castII( mRegI dst ) %{
++  match(Set dst (CastII dst));
++  format %{ "#castII of $dst  empty encoding" %}
++  ins_encode( /*empty encoding*/ );
++  ins_cost(0);
++  ins_pipe( empty );
++%}
++
++// Return Instruction
++// Remove the return address & jump to it.
++instruct Ret() %{
++  match(Return);
++  format %{ "RET #@Ret" %}
++
++  ins_encode %{
++   __ jr(RA);
++   __ delayed()->nop();
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++/*
++// For Loongson CPUs, jr seems too slow, so this rule shouldn't be imported.
++instruct jumpXtnd(mRegL switch_val) %{
++  match(Jump switch_val);
++
++  ins_cost(350);
++
++  format %{  "load   T9 <-- [$constanttablebase, $switch_val, $constantoffset] @ jumpXtnd\n\t"
++             "jr     T9\n\t"
++             "nop" %}
++  ins_encode %{
++    Register table_base = $constanttablebase;
++    int      con_offset = $constantoffset;
++    Register switch_reg = $switch_val$$Register;
++
++    if (UseLEXT1) {
++       if (Assembler::is_simm(con_offset, 8)) {
++         __ gsldx(T9, table_base, switch_reg, con_offset);
++       } else if (Assembler::is_simm16(con_offset)) {
++         __ daddu(T9, table_base, switch_reg);
++         __ ld(T9, T9, con_offset);
++       } else {
++         __ move(T9, con_offset);
++         __ daddu(AT, table_base, switch_reg);
++         __ gsldx(T9, AT, T9, 0);
++       }
++    } else {
++       if (Assembler::is_simm16(con_offset)) {
++         __ daddu(T9, table_base, switch_reg);
++         __ ld(T9, T9, con_offset);
++       } else {
++         __ move(T9, con_offset);
++         __ daddu(AT, table_base, switch_reg);
++         __ daddu(AT, T9, AT);
++         __ ld(T9, AT, 0);
++       }
++    }
++
++    __ jr(T9);
++    __ delayed()->nop();
++
++  %}
++  ins_pipe(pipe_jump);
++%}
++*/
++
++
++// Tail Jump; remove the return address; jump to target.
++// TailCall above leaves the return address around.
++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
++// "restore" before this instruction (in Epilogue), we need to materialize it
++// in %i0.
++//FIXME
++instruct tailjmpInd(mRegP jump_target,mRegP ex_oop) %{
++  match( TailJump jump_target ex_oop );
++  ins_cost(200);
++  format %{ "Jmp     $jump_target  ; ex_oop = $ex_oop #@tailjmpInd" %}
++  ins_encode %{
++    Register target = $jump_target$$Register;
++
++    // V0, V1 are indicated in:
++    //     [stubGenerator_mips.cpp] generate_forward_exception()
++    //     [runtime_mips.cpp] OptoRuntime::generate_exception_blob()
++    //
++    Register oop  = $ex_oop$$Register;
++    Register exception_oop = V0;
++    Register exception_pc = V1;
++
++    __ move(exception_pc, RA);
++    __ move(exception_oop, oop);
++
++    __ jr(target);
++    __ delayed()->nop();
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Procedure Call/Return Instructions
++// Call Java Static Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallStaticJavaDirect(method meth) %{
++  match(CallStaticJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,static #@CallStaticJavaDirect " %}
++  ins_encode( Java_Static_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++// Call Java Dynamic Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallDynamicJavaDirect(method meth) %{
++  match(CallDynamicJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t"
++           "CallDynamic @ CallDynamicJavaDirect" %}
++  ins_encode( Java_Dynamic_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++instruct CallLeafNoFPDirect(method meth) %{
++  match(CallLeafNoFP);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF_NOFP,runtime " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++// Prefetch instructions for allocation.
++
++instruct prefetchAllocNTA( memory mem ) %{
++  match(PrefetchAllocation mem);
++  ins_cost(125);
++  format %{ "pref $mem\t# Prefetch allocation @ prefetchAllocNTA" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++
++// Call runtime without safepoint
++instruct CallLeafDirect(method meth) %{
++  match(CallLeaf);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF,runtime #@CallLeafDirect " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++// Load Char (16bit unsigned)
++instruct loadUS(mRegI dst, memory mem) %{
++  match(Set dst (LoadUS mem));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadC" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUS mem)));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadUS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Char (16bit unsigned)
++instruct storeC(memory mem, mRegI src) %{
++  match(Set mem (StoreC mem src));
++
++  ins_cost(125);
++  format %{ "storeC  $src, $mem @ storeC" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeC_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreC mem zero));
++
++  ins_cost(125);
++  format %{ "storeC  $zero, $mem @ storeC_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct loadConF_immF_0(regF dst, immF_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConF_immF_0\n"%}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ mtc1(R0, dst);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConF(regF dst, immF src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "lwc1  $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm16(con_offset)) {
++      __ lwc1($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ set64(AT, con_offset);
++      if (UseLEXT1) {
++        __ gslwxc1($dst$$FloatRegister, $constanttablebase, AT, 0);
++      } else {
++        __ daddu(AT, $constanttablebase, AT);
++        __ lwc1($dst$$FloatRegister, AT, 0);
++      }
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConD_immD_0(regD dst, immD_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConD_immD_0"%}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ dmtc1(R0, dst);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++instruct loadConD(regD dst, immD src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "ldc1  $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm16(con_offset)) {
++      __ ldc1($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ set64(AT, con_offset);
++      if (UseLEXT1) {
++        __ gsldxc1($dst$$FloatRegister, $constanttablebase, AT, 0);
++      } else {
++        __ daddu(AT, $constanttablebase, AT);
++        __ ldc1($dst$$FloatRegister, AT, 0);
++      }
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++// Store register Float value (it is faster than store from FPU register)
++instruct storeF_reg( memory mem, regF src) %{
++  match(Set mem (StoreF mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeF_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeF_immF_0( memory mem, immF_0 zero) %{
++  match(Set mem (StoreF mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeF_immF_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Double
++instruct loadD(regD dst, memory mem) %{
++  match(Set dst (LoadD mem));
++
++  ins_cost(150);
++  format %{ "loadD   $dst, $mem #@loadD" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Double - UNaligned
++instruct loadD_unaligned(regD dst, memory mem ) %{
++  match(Set dst (LoadD_unaligned mem));
++  ins_cost(250);
++  // FIXME: Need more effective ldl/ldr
++  format %{ "loadD_unaligned   $dst, $mem #@loadD_unaligned" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeD_reg( memory mem, regD src) %{
++  match(Set mem (StoreD mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeD_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeD_immD_0( memory mem, immD_0 zero) %{
++  match(Set mem (StoreD mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeD_immD_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct loadSSI(mRegI dst, stackSlotI src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "lw    $dst, $src\t# int stk @ loadSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSI) !");
++    __ lw($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSI(stackSlotI dst, mRegI src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sw    $dst, $src\t# int stk @ storeSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSI) !");
++    __ sw($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSL(mRegL dst, stackSlotL src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld    $dst, $src\t# long stk @ loadSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSL) !");
++    __ ld($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSL(stackSlotL dst, mRegL src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sd    $dst, $src\t# long stk @ storeSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSL) !");
++    __ sd($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSP(mRegP dst, stackSlotP src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld    $dst, $src\t# ptr stk @ loadSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSP) !");
++    __ ld($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSP(stackSlotP dst, mRegP src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sd    $dst, $src\t# ptr stk @ storeSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSP) !");
++    __ sd($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSF(regF dst, stackSlotF src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "lwc1   $dst, $src\t# float stk @ loadSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSF) !");
++    __ lwc1($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSF(stackSlotF dst, regF src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "swc1    $dst, $src\t# float stk @ storeSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSF) !");
++    __ swc1($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++// Use the same format since predicate() can not be used here.
++instruct loadSSD(regD dst, stackSlotD src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ldc1   $dst, $src\t# double stk @ loadSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSD) !");
++    __ ldc1($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSD(stackSlotD dst, regD src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sdc1    $dst, $src\t# double stk @ storeSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSD) !");
++    __ sdc1($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastLock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %}
++  ins_encode %{
++    __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastUnlock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %}
++  ins_encode %{
++    __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++// Store CMS card-mark Immediate 0
++instruct storeImmCM(memory mem, immI_0 zero) %{
++  match(Set mem (StoreCM mem zero));
++
++  ins_cost(150);
++  format %{ "MEMBAR\n\t"
++            "sb   $mem, zero\t! CMS card-mark imm0" %}
++  ins_encode %{
++    __ sync();
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Die now
++instruct ShouldNotReachHere( )
++%{
++  match(Halt);
++  ins_cost(300);
++
++  // Use the following format syntax
++  format %{ "ILLTRAP   ;#@ShouldNotReachHere" %}
++  ins_encode %{
++    // Here we should emit illtrap !
++
++    __ stop("in ShoudNotReachHere");
++
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++instruct leaP8Narrow(mRegP dst, indOffset8Narrow mem)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# ptr off8narrow @ leaP8Narrow" %}
++  ins_encode %{
++    Register  dst  = $dst$$Register;
++    Register  base = as_Register($mem$$base);
++    int       disp = $mem$$disp;
++
++    __ daddiu(dst, base, disp);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct leaPPosIdxScaleOff8(mRegP dst, basePosIndexScaleOffset8 mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# @ PosIdxScaleOff8" %}
++  ins_encode %{
++    Register  dst   = $dst$$Register;
++    Register  base  = as_Register($mem$$base);
++    Register  index = as_Register($mem$$index);
++    int       scale = $mem$$scale;
++    int       disp  = $mem$$disp;
++
++    if (scale == 0) {
++      __ daddu(AT, base, index);
++      __ daddiu(dst, AT, disp);
++    } else {
++      __ dsll(AT, index, scale);
++      __ daddu(AT, base, AT);
++      __ daddiu(dst, AT, disp);
++    }
++ %}
++
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct leaPIdxScale(mRegP dst, indIndexScale mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# @ leaPIdxScale" %}
++  ins_encode %{
++    Register  dst   = $dst$$Register;
++    Register  base  = as_Register($mem$$base);
++    Register  index = as_Register($mem$$index);
++    int       scale = $mem$$scale;
++
++    if (scale == 0) {
++       __ daddu(dst, base, index);
++    } else {
++       __ dsll(AT, index, scale);
++       __ daddu(dst, base, AT);
++    }
++ %}
++
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++
++// ============================================================================
++// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
++// array for an instance of the superklass.  Set a hidden internal cache on a
++// hit (cache is checked with exposed code in gen_subtype_check()).  Return
++// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{
++  match(Set result (PartialSubtypeCheck sub super));
++  effect(KILL tmp);
++  ins_cost(1100);  // slightly larger than the next version
++  format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %}
++
++  ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) );
++  ins_pipe( pipe_slow );
++%}
++
++// Conditional-store of the updated heap-top.
++// Used during allocation of the shared heap.
++
++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{
++  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
++
++  format %{ "move AT, $newval\n\t"
++            "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t"
++            "move $cr, AT\n" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp);
++
++    int     index = $heap_top_ptr$$index;
++    int     scale = $heap_top_ptr$$scale;
++    int      disp = $heap_top_ptr$$disp;
++
++    guarantee(Assembler::is_simm16(disp), "");
++
++    if (index != 0) {
++      __ stop("in storePConditional: index != 0");
++    } else {
++      __ move(AT, newval);
++      __ scd(AT, addr);
++      __ move($cr$$Register, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of an int value.
++// AT flag is set on success, reset otherwise.
++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{
++  match(Set cr (StoreIConditional mem (Binary oldval newval)));
++  format %{ "CMPXCHG  $newval, $mem, $oldval \t# @storeIConditional" %}
++
++  ins_encode %{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address  addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm16(disp), "");
++
++    if (index != 0) {
++      __ stop("in storeIConditional: index != 0");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg32(addr, oldval, newval, cr, true, false, true);
++      } else {
++        __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++        __ move(cr, AT);
++      }
++    }
++%}
++
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of a long value.
++// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr)
++%{
++  match(Set cr (StoreLConditional mem (Binary oldval newval)));
++
++  format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm16(disp), "");
++
++    if (index != 0) {
++      __ stop("in storeIConditional: index != 0");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg(addr, oldval, newval, cr, false, true);
++      } else {
++        __ cmpxchg(addr, oldval, newval, AT, false, true);
++        __ move(cr, AT);
++      }
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Implement LoadPLocked. Must be ordered against changes of the memory location
++// by storePConditional.
++instruct loadPLocked(mRegP dst, memory mem) %{
++  match(Set dst (LoadPLocked mem));
++  ins_cost(MEMORY_REF_COST);
++
++  format %{ "lld    $dst, $mem #@loadPLocked\n\t" %}
++  size(12);
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{
++  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, true, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{
++  predicate(VM_Version::supports_cx8());
++  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{
++  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{
++  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, false, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, false, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++//----------Max and Min--------------------------------------------------------
++// Min Instructions
++////
++//   *** Min and Max using the conditional move are slower than the
++//   *** branch version on a Pentium III.
++// // Conditional move for min
++//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
++//  effect( USE_DEF op2, USE op1, USE cr );
++//  format %{ "CMOVlt $op2,$op1\t! min" %}
++//  opcode(0x4C,0x0F);
++//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
++//  ins_pipe( pipe_cmov_reg );
++//%}
++//
++//// Min Register with Register (P6 version)
++//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
++//  predicate(VM_Version::supports_cmov() );
++//  match(Set op2 (MinI op1 op2));
++//  ins_cost(200);
++//  expand %{
++//    eFlagsReg cr;
++//    compI_eReg(cr,op1,op2);
++//    cmovI_reg_lt(op2,op1,cr);
++//  %}
++//%}
++
++// Min Register with Register (generic version)
++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MinI dst src));
++  //effect(KILL flags);
++  ins_cost(80);
++
++  format %{ "MIN    $dst, $src @minI_Reg_Reg" %}
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, src, dst);
++    __ movn(dst, src, AT);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// Max Register with Register
++//   *** Min and Max using the conditional move are slower than the
++//   *** branch version on a Pentium III.
++// // Conditional move for max
++//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
++//  effect( USE_DEF op2, USE op1, USE cr );
++//  format %{ "CMOVgt $op2,$op1\t! max" %}
++//  opcode(0x4F,0x0F);
++//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
++//  ins_pipe( pipe_cmov_reg );
++//%}
++//
++// // Max Register with Register (P6 version)
++//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
++//  predicate(VM_Version::supports_cmov() );
++//  match(Set op2 (MaxI op1 op2));
++//  ins_cost(200);
++//  expand %{
++//    eFlagsReg cr;
++//    compI_eReg(cr,op1,op2);
++//    cmovI_reg_gt(op2,op1,cr);
++//  %}
++//%}
++
++// Max Register with Register (generic version)
++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MaxI dst src));
++  ins_cost(80);
++
++  format %{ "MAX    $dst, $src @maxI_Reg_Reg" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, dst, src);
++    __ movn(dst, src, AT);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{
++  match(Set dst (MaxI dst zero));
++  ins_cost(50);
++
++  format %{ "MAX    $dst, 0 @maxI_Reg_zero" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++
++    __ slt(AT, dst, R0);
++    __ movn(dst, R0, AT);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL src mask));
++
++  format %{ "movl    $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dext(dst, src, 0, 32);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32)
++%{
++  match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32)));
++
++  format %{ "combine_i2l    $dst, $src2(H), $src1(L) @ combine_i2l" %}
++  ins_encode %{
++    Register dst  = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    if (src1 == dst) {
++       __ dinsu(dst, src2, 32, 32);
++    } else if (src2 == dst) {
++       __ dsll32(dst, dst, 0);
++       __ dins(dst, src1, 0, 32);
++    } else {
++       __ dext(dst, src1, 0, 32);
++       __ dinsu(dst, src2, 32, 32);
++    }
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Zero-extend convert int to long
++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L src) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dext(dst, src, 0, 32);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L (ConvL2I src)) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dext(dst, src, 0, 32);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Match loading integer and casting it to unsigned int in long register.
++// LoadI + ConvI2L + AndL 0xffffffff.
++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
++
++  format %{ "lwu     $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL mask (ConvI2L (LoadI mem))));
++
++  format %{ "lwu     $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++
++// ============================================================================
++// Safepoint Instruction
++
++instruct safePoint_poll() %{
++  predicate(SafepointMechanism::uses_global_page_poll());
++  match(SafePoint);
++
++  ins_cost(105);
++  format %{ "poll for GC @ safePoint_poll" %}
++
++  ins_encode %{
++    __ block_comment("Safepoint:");
++    __ set64(T9, (long)os::get_polling_page());
++    __ relocate(relocInfo::poll_type);
++    __ lw(AT, T9, 0);
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++instruct safePoint_poll_tls(mRegP poll) %{
++  match(SafePoint poll);
++  predicate(SafepointMechanism::uses_thread_local_poll());
++  effect(USE poll);
++
++  ins_cost(125);
++  format %{ "lw AT, [$poll]\t"
++            "Safepoint @ [$poll] : poll for GC" %}
++  size(4);
++  ins_encode %{
++    Register poll_reg = $poll$$Register;
++
++    __ block_comment("Safepoint:");
++    __ relocate(relocInfo::poll_type);
++    address pre_pc = __ pc();
++    __ lw(AT, poll_reg, 0);
++    assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit lw AT, [$poll]");
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++//----------Arithmetic Conversion Instructions---------------------------------
++
++instruct roundFloat_nop(regF dst)
++%{
++  match(Set dst (RoundFloat dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct roundDouble_nop(regD dst)
++%{
++  match(Set dst (RoundDouble dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++//---------- Zeros Count Instructions ------------------------------------------
++// CountLeadingZerosINode CountTrailingZerosINode
++instruct countLeadingZerosI(mRegI dst, mRegI src) %{
++  predicate(UseCountLeadingZerosInstructionMIPS64);
++  match(Set dst (CountLeadingZerosI src));
++
++  format %{ "clz  $dst, $src\t# count leading zeros (int)" %}
++  ins_encode %{
++    __ clz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countLeadingZerosL(mRegI dst, mRegL src) %{
++  predicate(UseCountLeadingZerosInstructionMIPS64);
++  match(Set dst (CountLeadingZerosL src));
++
++  format %{ "dclz  $dst, $src\t# count leading zeros (long)" %}
++  ins_encode %{
++    __ dclz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosI(mRegI dst, mRegI src) %{
++  predicate(UseCountTrailingZerosInstructionMIPS64);
++  match(Set dst (CountTrailingZerosI src));
++
++  format %{ "ctz    $dst, $src\t# count trailing zeros (int)" %}
++  ins_encode %{
++    // ctz and dctz is gs instructions.
++    __ ctz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosL(mRegI dst, mRegL src) %{
++  predicate(UseCountTrailingZerosInstructionMIPS64);
++  match(Set dst (CountTrailingZerosL src));
++
++  format %{ "dcto    $dst, $src\t# count trailing zeros (long)" %}
++  ins_encode %{
++    __ dctz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// ====================VECTOR INSTRUCTIONS=====================================
++
++// Load vectors (8 bytes long)
++instruct loadV8(vecD dst, memory mem) %{
++  predicate(n->as_LoadVector()->memory_size() == 8);
++  match(Set dst (LoadVector mem));
++  ins_cost(125);
++  format %{ "load    $dst, $mem\t! load vector (8 bytes)" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++// Store vectors (8 bytes long)
++instruct storeV8(memory mem, vecD src) %{
++  predicate(n->as_StoreVector()->memory_size() == 8);
++  match(Set mem (StoreVector mem src));
++  ins_cost(145);
++  format %{ "store    $mem, $src\t! store vector (8 bytes)" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct Repl8B_DSP(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8 && UseLEXT3);
++  match(Set dst (ReplicateB src));
++  ins_cost(100);
++  format %{ "replv_ob    AT, $src\n\t"
++            "dmtc1 AT, $dst\t! replicate8B" %}
++  ins_encode %{
++    __ replv_ob(AT, $src$$Register);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB src));
++  ins_cost(140);
++  format %{ "move       AT,  $src\n\t"
++            "dins  AT, AT,  8,  8\n\t"
++            "dins  AT, AT, 16, 16\n\t"
++            "dinsu AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst\t! replicate8B" %}
++  ins_encode %{
++    __ move(AT, $src$$Register);
++    __ dins(AT, AT, 8, 8);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_imm_DSP(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 8 && UseLEXT3 && VM_Version::supports_dsp());
++  match(Set dst (ReplicateB con));
++  ins_cost(110);
++  format %{ "repl_ob    AT, [$con]\n\t"
++            "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %}
++  ins_encode %{
++    int      val = $con$$constant;
++    __ repl_ob(AT, val);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_imm(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB con));
++  ins_cost(150);
++  format %{ "move      AT, [$con]\n\t"
++            "dins  AT, AT,  8,  8\n\t"
++            "dins  AT, AT, 16, 16\n\t"
++            "dinsu AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %}
++  ins_encode %{
++    __ move(AT, $con$$constant);
++    __ dins(AT, AT, 8, 8);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_zero(vecD dst, immI_0 zero) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB zero));
++  ins_cost(90);
++  format %{ "dmtc1    R0, $dst\t! replicate8B zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_M1(vecD dst, immI_M1 M1) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB M1));
++  ins_cost(80);
++  format %{ "dmtc1    -1, $dst\t! replicate8B -1" %}
++  ins_encode %{
++    __ nor(AT, R0, R0);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_DSP(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp());
++  match(Set dst (ReplicateS src));
++  ins_cost(100);
++  format %{ "replv_qh    AT, $src\n\t"
++            "dmtc1 AT, $dst\t! replicate4S" %}
++  ins_encode %{
++    __ replv_qh(AT, $src$$Register);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS src));
++  ins_cost(120);
++  format %{ "move    AT,     $src  \n\t"
++            "dins    AT, AT, 16, 16\n\t"
++            "dinsu   AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst\t! replicate4S" %}
++  ins_encode %{
++    __ move(AT, $src$$Register);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_imm_DSP(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp());
++  match(Set dst (ReplicateS con));
++  ins_cost(100);
++  format %{ "repl_qh    AT, [$con]\n\t"
++            "dmtc1 AT, $dst\t! replicate4S($con)" %}
++  ins_encode %{
++    int      val = $con$$constant;
++    if ( Assembler::is_simm(val, 10)) {
++      //repl_qh supports 10 bits immediate
++      __ repl_qh(AT, val);
++    } else {
++      __ li32(AT, val);
++      __ replv_qh(AT, AT);
++    }
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_imm(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS con));
++  ins_cost(110);
++  format %{ "move    AT,   [$con]\n\t"
++            "dins  AT, AT, 16, 16\n\t"
++            "dinsu AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst\t! replicate4S($con)" %}
++  ins_encode %{
++    __ move(AT, $con$$constant);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_zero(vecD dst, immI_0 zero) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS zero));
++  format %{ "dmtc1    R0, $dst\t! replicate4S zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_M1(vecD dst, immI_M1 M1) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS M1));
++  format %{ "dmtc1    -1, $dst\t! replicate4S -1" %}
++  ins_encode %{
++    __ nor(AT, R0, R0);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar to be vector
++instruct Repl2I(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI src));
++  format %{ "dins    AT, $src, 0, 32\n\t"
++            "dinsu   AT, $src, 32, 32\n\t"
++            "dmtc1   AT, $dst\t! replicate2I" %}
++  ins_encode %{
++    __ dins(AT, $src$$Register, 0, 32);
++    __ dinsu(AT, $src$$Register, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
++instruct Repl2I_imm(vecD dst, immI con, mA7RegI tmp) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI con));
++  effect(KILL tmp);
++  format %{ "li32    AT, [$con], 32\n\t"
++            "dinsu   AT,         AT\n\t"
++            "dmtc1   AT, $dst\t! replicate2I($con)" %}
++  ins_encode %{
++    int      val = $con$$constant;
++    __ li32(AT, val);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar zero to be vector
++instruct Repl2I_zero(vecD dst, immI_0 zero) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI zero));
++  format %{ "dmtc1    R0, $dst\t! replicate2I zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar -1 to be vector
++instruct Repl2I_M1(vecD dst, immI_M1 M1) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI M1));
++  format %{ "dmtc1    -1, $dst\t! replicate2I -1, use AT" %}
++  ins_encode %{
++    __ nor(AT, R0, R0);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate float (4 byte) scalar to be vector
++instruct Repl2F(vecD dst, regF src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateF src));
++  format %{ "cvt.ps  $dst, $src, $src\t! replicate2F" %}
++  ins_encode %{
++    __ cvt_ps_s($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Replicate float (4 byte) scalar zero to be vector
++instruct Repl2F_zero(vecD dst, immF_0 zero) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateF zero));
++  format %{ "dmtc1   R0, $dst\t! replicate2F zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++
++// ====================VECTOR ARITHMETIC=======================================
++
++// --------------------------------- ADD --------------------------------------
++
++// Floats vector add
++// kernel does not have emulation of PS instructions yet, so PS instructions is disabled.
++instruct vadd2F(vecD dst, vecD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVF dst src));
++  format %{ "add.ps   $dst,$src\t! add packed2F" %}
++  ins_encode %{
++    __ add_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct vadd2F3(vecD dst, vecD src1, vecD src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVF src1 src2));
++  format %{ "add.ps   $dst,$src1,$src2\t! add packed2F" %}
++  ins_encode %{
++    __ add_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// --------------------------------- SUB --------------------------------------
++
++// Floats vector sub
++instruct vsub2F(vecD dst, vecD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVF dst src));
++  format %{ "sub.ps   $dst,$src\t! sub packed2F" %}
++  ins_encode %{
++    __ sub_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// --------------------------------- MUL --------------------------------------
++
++// Floats vector mul
++instruct vmul2F(vecD dst, vecD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (MulVF dst src));
++  format %{ "mul.ps   $dst, $src\t! mul packed2F" %}
++  ins_encode %{
++    __ mul_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct vmul2F3(vecD dst, vecD src1, vecD src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (MulVF src1 src2));
++  format %{ "mul.ps   $dst, $src1, $src2\t! mul packed2F" %}
++  ins_encode %{
++    __ mul_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// --------------------------------- DIV --------------------------------------
++// MIPS do not have div.ps
++
++// --------------------------------- MADD --------------------------------------
++// Floats vector madd
++//instruct vmadd2F(vecD dst, vecD src1, vecD src2, vecD src3) %{
++//  predicate(n->as_Vector()->length() == 2);
++//  match(Set dst (AddVF (MulVF src1 src2) src3));
++//  ins_cost(50);
++//  format %{ "madd.ps   $dst, $src3, $src1, $src2\t! madd packed2F" %}
++//  ins_encode %{
++//    __ madd_ps($dst$$FloatRegister, $src3$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++//  %}
++//  ins_pipe( fpu_regF_regF );
++//%}
++
++
++//----------PEEPHOLE RULES-----------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++//
++// peepmatch ( root_instr_name [preceeding_instruction]* );
++//
++// peepconstraint %{
++// (instruction_number.operand_name relational_op instruction_number.operand_name
++//  [, ...] );
++// // instruction numbers are zero-based using left to right order in peepmatch
++//
++// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
++// // provide an instruction_number.operand_name for each operand that appears
++// // in the replacement instruction's match rule
++//
++// ---------VM FLAGS---------------------------------------------------------
++//
++// All peephole optimizations can be turned off using -XX:-OptoPeephole
++//
++// Each peephole rule is given an identifying number starting with zero and
++// increasing by one in the order seen by the parser.  An individual peephole
++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
++// on the command-line.
++//
++// ---------CURRENT LIMITATIONS----------------------------------------------
++//
++// Only match adjacent instructions in same basic block
++// Only equality constraints
++// Only constraints between operands, not (0.dest_reg == EAX_enc)
++// Only one replacement instruction
++//
++// ---------EXAMPLE----------------------------------------------------------
++//
++// // pertinent parts of existing instructions in architecture description
++// instruct movI(eRegI dst, eRegI src) %{
++//   match(Set dst (CopyI src));
++// %}
++//
++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{
++//   match(Set dst (AddI dst src));
++//   effect(KILL cr);
++// %}
++//
++// // Change (inc mov) to lea
++// peephole %{
++//   // increment preceeded by register-register move
++//   peepmatch ( incI_eReg movI );
++//   // require that the destination register of the increment
++//   // match the destination register of the move
++//   peepconstraint ( 0.dst == 1.dst );
++//   // construct a replacement instruction that sets
++//   // the destination to ( move's source register + one )
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// Implementation no longer uses movX instructions since
++// machine-independent system no longer uses CopyX nodes.
++//
++// peephole %{
++//   peepmatch ( incI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( decI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addI_eReg_imm movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addP_eReg_imm movP );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++
++// // Change load of spilled value to only a spill
++// instruct storeI(memory mem, eRegI src) %{
++//   match(Set mem (StoreI mem src));
++// %}
++//
++// instruct loadI(eRegI dst, memory mem) %{
++//   match(Set dst (LoadI mem));
++// %}
++//
++//peephole %{
++//  peepmatch ( loadI storeI );
++//  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
++//  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
++//%}
++
++//----------SMARTSPILL RULES---------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/mips.ad b/src/hotspot/cpu/mips/mips.ad
+--- a/src/hotspot/cpu/mips/mips.ad	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/mips.ad	2024-01-30 10:00:11.844765024 +0800
+@@ -0,0 +1,25 @@
++//
++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/nativeInst_mips.cpp b/src/hotspot/cpu/mips/nativeInst_mips.cpp
+--- a/src/hotspot/cpu/mips/nativeInst_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/nativeInst_mips.cpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,1821 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "compiler/disassembler.hpp"
++#include "code/codeCache.hpp"
++#include "code/compiledIC.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/ostream.hpp"
++
++#include <sys/mman.h>
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++void NativeInstruction::wrote(int offset) {
++  ICache::invalidate_word(addr_at(offset));
++}
++
++void NativeInstruction::set_long_at(int offset, long i) {
++  address addr = addr_at(offset);
++  *(long*)addr = i;
++  ICache::invalidate_range(addr, 8);
++}
++
++static int illegal_instruction_bits = 0;
++
++int NativeInstruction::illegal_instruction() {
++  if (illegal_instruction_bits == 0) {
++    ResourceMark rm;
++    char buf[40];
++    CodeBuffer cbuf((address)&buf[0], 20);
++    MacroAssembler* a = new MacroAssembler(&cbuf);
++    address ia = a->pc();
++    a->brk(11);
++    int bits = *(int*)ia;
++    illegal_instruction_bits = bits;
++  }
++  return illegal_instruction_bits;
++}
++
++bool NativeInstruction::is_int_branch() {
++  switch(Assembler::opcode(insn_word())) {
++    case Assembler::beq_op:
++    case Assembler::beql_op:
++    case Assembler::bgtz_op:
++    case Assembler::bgtzl_op:
++    case Assembler::blez_op:
++    case Assembler::blezl_op:
++    case Assembler::bne_op:
++    case Assembler::bnel_op:
++      return true;
++    case Assembler::regimm_op:
++      switch(Assembler::rt(insn_word())) {
++        case Assembler::bgez_op:
++        case Assembler::bgezal_op:
++        case Assembler::bgezall_op:
++        case Assembler::bgezl_op:
++        case Assembler::bltz_op:
++        case Assembler::bltzal_op:
++        case Assembler::bltzall_op:
++        case Assembler::bltzl_op:
++          return true;
++      }
++  }
++
++  return false;
++}
++
++bool NativeInstruction::is_float_branch() {
++  if (!is_op(Assembler::cop1_op) ||
++      !is_rs((Register)Assembler::bc1f_op)) return false;
++
++  switch(Assembler::rt(insn_word())) {
++    case Assembler::bcf_op:
++    case Assembler::bcfl_op:
++    case Assembler::bct_op:
++    case Assembler::bctl_op:
++      return true;
++  }
++
++  return false;
++}
++
++
++void NativeCall::verify() {
++  // make sure code pattern is actually a call instruction
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // jal target
++  // nop
++  if ( is_nop() &&
++  nativeInstruction_at(addr_at(4))->is_nop()   &&
++  nativeInstruction_at(addr_at(8))->is_nop()   &&
++  nativeInstruction_at(addr_at(12))->is_nop()  &&
++  is_op(int_at(16), Assembler::jal_op)  &&
++  nativeInstruction_at(addr_at(20))->is_nop() ) {
++      return;
++  }
++
++  // jal targe
++  // nop
++  if ( is_op(int_at(0), Assembler::jal_op)  &&
++  nativeInstruction_at(addr_at(4))->is_nop() ) {
++      return;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++  is_op(int_at(4), Assembler::ori_op) &&
++  is_special_op(int_at(8), Assembler::dsll_op) &&
++  is_op(int_at(12), Assembler::ori_op) &&
++  is_special_op(int_at(16), Assembler::dsll_op) &&
++  is_op(int_at(20), Assembler::ori_op) &&
++        is_special_op(int_at(24), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++    is_op  (int_at(4), Assembler::ori_op) &&
++    is_special_op(int_at(8), Assembler::dsll_op) &&
++    is_op  (int_at(12), Assembler::ori_op) &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++    is_special_op(int_at(4), Assembler::dsll_op) &&
++    is_op  (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++    is_special_op(int_at(4), Assembler::dsll_op) &&
++    nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++    nativeInstruction_at(addr_at(4))->is_nop() &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  // FIXME: why add jr_op here?
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++    nativeInstruction_at(addr_at(4))->is_nop() &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++    is_op  (int_at(4), Assembler::ori_op) &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++    nativeInstruction_at(addr_at(4))->is_nop() &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          is_special_op(int_at(8), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          is_special_op(int_at(8), Assembler::jalr_op) ) {
++      return;
++  }
++
++  if (nativeInstruction_at(addr_at(0))->is_trampoline_call())
++    return;
++
++  fatal("not a call");
++}
++
++address NativeCall::target_addr_for_insn() const {
++  // jal target
++  // nop
++  if ( is_op(int_at(0), Assembler::jal_op)         &&
++  nativeInstruction_at(addr_at(4))->is_nop()) {
++      int instr_index = int_at(0) & 0x3ffffff;
++      intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000;
++      intptr_t target = target_high | (instr_index << 2);
++      return (address)target;
++  }
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // jal target
++  // nop
++  if ( nativeInstruction_at(addr_at(0))->is_nop() &&
++  nativeInstruction_at(addr_at(4))->is_nop()   &&
++  nativeInstruction_at(addr_at(8))->is_nop()   &&
++  nativeInstruction_at(addr_at(12))->is_nop()  &&
++  is_op(int_at(16), Assembler::jal_op)         &&
++  nativeInstruction_at(addr_at(20))->is_nop()) {
++      int instr_index = int_at(16) & 0x3ffffff;
++      intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000;
++      intptr_t target = target_high | (instr_index << 2);
++      return (address)target;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::dsll_op) &&
++        is_op(int_at(20), Assembler::ori_op) ) {
++
++      return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
++                               (intptr_t)(int_at(12) & 0xffff),
++                               (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff));
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ori_op) ) {
++
++      return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
++                               (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0);
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ld dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ld_op) ) {
++
++      address dest = (address)Assembler::merge( (intptr_t)0,
++                               (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0);
++      return dest + Assembler::simm16((intptr_t)int_at(12) & 0xffff);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          is_op (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++      return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++      return (address)Assembler::merge( (intptr_t)(0),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop  <-- optional
++  //nop  <-- optional
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() ) {
++
++      int sign = int_at(0) & 0x8000;
++      if (sign == 0) {
++         return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)0,
++                                  (intptr_t)0,
++                                  (intptr_t)0);
++      } else {
++         return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff));
++      }
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop  <-- optional
++  //nop  <-- optional
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) ) {
++
++      int sign = int_at(0) & 0x8000;
++      if (sign == 0) {
++         return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)0,
++                                  (intptr_t)0);
++      } else {
++         return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff));
++      }
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop  <-- optional
++  //nop  <-- optional
++  if (  is_op(Assembler::lui_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() ) {
++
++      int sign = int_at(0) & 0x8000;
++      if (sign == 0) {
++         return (address)Assembler::merge( (intptr_t)0,
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)0,
++                                  (intptr_t)0);
++      } else {
++         return (address)Assembler::merge( (intptr_t)0,
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff));
++      }
++  }
++
++  tty->print_cr("not a call: addr = " INTPTR_FORMAT , p2i(addr_at(0)));
++  tty->print_cr("======= Start decoding at addr = " INTPTR_FORMAT " =======", p2i(addr_at(0)));
++  Disassembler::decode(addr_at(0) - 2 * 4, addr_at(0) + 8 * 4, tty);
++  tty->print_cr("======= End of decoding =======");
++  fatal("not a call");
++  return NULL; // unreachable
++}
++
++// Extract call destination from a NativeCall. The call might use a trampoline stub.
++address NativeCall::destination() const {
++  address addr = (address)this;
++  address destination = target_addr_for_insn();
++  // Do we use a trampoline stub for this call?
++  // Trampoline stubs are located behind the main code.
++  if (destination > addr) {
++    // Filter out recursive method invocation (call to verified/unverified entry point).
++    CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
++    assert(cb && cb->is_nmethod(), "sanity");
++    nmethod *nm = (nmethod *)cb;
++    NativeInstruction* ni = nativeInstruction_at(addr);
++    if (nm->stub_contains(destination) && ni->is_trampoline_call()) {
++      // Yes we do, so get the destination from the trampoline stub.
++      const address trampoline_stub_addr = destination;
++      destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
++    }
++  }
++  return destination;
++}
++
++// Similar to replace_mt_safe, but just changes the destination. The
++// important thing is that free-running threads are able to execute this
++// call instruction at all times.
++//
++// Used in the runtime linkage of calls; see class CompiledIC.
++//
++// Add parameter assert_lock to switch off assertion
++// during code generation, where no patching lock is needed.
++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
++  assert(!assert_lock ||
++         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
++         "concurrent code patching");
++
++  ResourceMark rm;
++  address addr_call = addr_at(0);
++  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
++  // Patch the constant in the call's trampoline stub.
++  if (MacroAssembler::reachable_from_cache()) {
++    set_destination(dest);
++  } else {
++    address trampoline_stub_addr = nativeCall_at(addr_call)->target_addr_for_insn();
++    assert (get_trampoline() != NULL && trampoline_stub_addr == get_trampoline(), "we need a trampoline");
++    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
++  }
++}
++
++address NativeCall::get_trampoline() {
++  address call_addr = addr_at(0);
++
++  CodeBlob *code = CodeCache::find_blob(call_addr);
++  assert(code != NULL, "Could not find the containing code blob");
++
++  if (code->is_nmethod()) {
++    return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
++  }
++  return NULL;
++}
++
++// manual implementation of GSSQ
++//
++//  00000001200009c0 <atomic_store128>:
++//     1200009c0:   0085202d        daddu   a0, a0, a1
++//     1200009c4:   e8860027        gssq    a2, a3, 0(a0)
++//     1200009c8:   03e00008        jr      ra
++//     1200009cc:   00000000        nop
++//
++typedef void (* atomic_store128_ptr)(long *addr, int offset, long low64, long hi64);
++
++static int *buf;
++
++static atomic_store128_ptr get_atomic_store128_func() {
++  assert(UseLEXT1, "UseLEXT1 must be true");
++  static atomic_store128_ptr p = NULL;
++  if (p != NULL)
++    return p;
++
++  buf = (int *)mmap(NULL, 1024, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS,
++                       -1, 0);
++  buf[0] = 0x0085202d;
++  buf[1] = (0x3a << 26) | (4 << 21) | (6 << 16) | 0x27;   /* gssq $a2, $a3, 0($a0) */
++  buf[2] = 0x03e00008;
++  buf[3] = 0;
++
++  asm("sync");
++  p = (atomic_store128_ptr)buf;
++  return p;
++}
++
++void  NativeCall::patch_on_jal_only(address dst) {
++  long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint jal_inst = (Assembler::jal_op << 26) | dest;
++    set_int_at(0, jal_inst);
++    ICache::invalidate_range(addr_at(0), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeCall::patch_on_jal_gs(address dst) {
++  long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint jal_inst = (Assembler::jal_op << 26) | dest;
++    set_int_at(16, jal_inst);
++    ICache::invalidate_range(addr_at(16), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeCall::patch_on_jal(address dst) {
++  patch_on_jal_gs(dst);
++}
++
++void  NativeCall::patch_on_trampoline(address dest) {
++  assert(nativeInstruction_at(addr_at(0))->is_trampoline_call(), "unexpected code at call site");
++  jlong dst = (jlong) dest;
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ld dst, dst, imm16
++  if ((dst> 0) && Assembler::is_simm16(dst >> 32)) {
++    dst += (dst & 0x8000) << 1;
++    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low(dst >> 32) & 0xffff));
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(dst >> 16) & 0xffff));
++    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low(dst) & 0xffff));
++
++    ICache::invalidate_range(addr_at(0), 24);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeCall::patch_on_jalr_gs(address dst) {
++  patch_set48_gs(dst);
++}
++
++void  NativeCall::patch_on_jalr(address dst) {
++  patch_set48(dst);
++}
++
++void  NativeCall::patch_set48_gs(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++  int count = 0;
++  int insts[4] = {0, 0, 0, 0};
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
++      count += 1;
++    } else {
++      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++    insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16);
++    count += 1;
++    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++    count += 1;
++    if (Assembler::split_low(value)) {
++      insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++      count += 1;
++    }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16);
++    count += 1;
++    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    insts[count] = 0;
++    count++;
++  }
++
++  guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned");
++  atomic_store128_ptr func = get_atomic_store128_func();
++  (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]);
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void NativeCall::patch_set32_gs(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  int insts[2] = {0, 0};
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      //daddiu(d, R0, value);
++      //set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
++      count += 1;
++    } else {
++      //lui(d, split_low(value >> 16));
++      //set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        //set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++    }
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 2) {
++    //nop();
++    //set_int_at(count << 2, 0);
++    insts[count] = 0;
++    count++;
++  }
++
++  long inst = insts[1];
++  inst = inst << 32;
++  inst = inst + insts[0];
++
++  set_long_at(0, inst);
++}
++
++void NativeCall::patch_set48(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      //daddiu(d, R0, value);
++      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      count += 1;
++    } else {
++      //lui(d, split_low(value >> 16));
++      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      //ori(d, R0, julong(value) >> 16);
++      set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
++      count += 1;
++      //dsll(d, d, 16);
++      set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    //lui(d, value >> 32);
++    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
++    count += 1;
++    //ori(d, d, split_low(value >> 16));
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
++    count += 1;
++    //dsll(d, d, 16);
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    //ori(d, d, split_low(value));
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    //nop();
++    set_int_at(count << 2, 0);
++    count++;
++  }
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeCall::patch_set32(address dest) {
++  patch_set32_gs(dest);
++}
++
++void  NativeCall::set_destination(address dest) {
++  OrderAccess::fence();
++
++  // li64
++  if (is_special_op(int_at(16), Assembler::dsll_op)) {
++    int first_word = int_at(0);
++    set_int_at(0, 0x1000ffff); /* .1: b .1 */
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 32) & 0xffff));
++    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 16) & 0xffff));
++    set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)dest) & 0xffff));
++    set_int_at(0, (first_word & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 48) & 0xffff));
++    ICache::invalidate_range(addr_at(0), 24);
++  } else if (is_op(int_at(16), Assembler::jal_op)) {
++    if (UseLEXT1) {
++      patch_on_jal_gs(dest);
++    } else {
++      patch_on_jal(dest);
++    }
++  } else if (is_op(int_at(0), Assembler::jal_op)) {
++    patch_on_jal_only(dest);
++  } else if (is_special_op(int_at(16), Assembler::jalr_op)) {
++    if (UseLEXT1) {
++      patch_on_jalr_gs(dest);
++    } else {
++      patch_on_jalr(dest);
++    }
++  } else if (is_special_op(int_at(8), Assembler::jalr_op)) {
++    guarantee(!os::is_MP() || (((long)addr_at(0) % 8) == 0), "destination must be aligned by 8");
++    if (UseLEXT1) {
++      patch_set32_gs(dest);
++    } else {
++      patch_set32(dest);
++    }
++    ICache::invalidate_range(addr_at(0), 8);
++  } else {
++      fatal("not a call");
++  }
++}
++
++void NativeCall::print() {
++  tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT,
++                p2i(instruction_address()), p2i(destination()));
++}
++
++// Inserts a native call instruction at a given pc
++void NativeCall::insert(address code_pos, address entry) {
++  NativeCall *call = nativeCall_at(code_pos);
++  CodeBuffer cb(call->addr_at(0), instruction_size);
++  MacroAssembler masm(&cb);
++#define __ masm.
++  __ li48(T9, (long)entry);
++  __ jalr ();
++  __ delayed()->nop();
++#undef __
++
++  ICache::invalidate_range(call->addr_at(0), instruction_size);
++}
++
++// MT-safe patching of a call instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) {
++  Unimplemented();
++}
++
++//-------------------------------------------------------------------
++
++void NativeMovConstReg::verify() {
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++       is_op(int_at(4), Assembler::ori_op) &&
++       is_special_op(int_at(8), Assembler::dsll_op) &&
++       is_op(int_at(12), Assembler::ori_op) &&
++       is_special_op(int_at(16), Assembler::dsll_op) &&
++       is_op(int_at(20), Assembler::ori_op) ) {
++    return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op  (int_at(12), Assembler::ori_op) ) {
++    return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        is_op  (int_at(8), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(12))->is_nop()) {
++    return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop()) {
++    return;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++    return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++    return;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++    return;
++  }
++
++  fatal("not a mov reg, imm64/imm48");
++}
++
++void NativeMovConstReg::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
++                p2i(instruction_address()), data());
++}
++
++intptr_t NativeMovConstReg::data() const {
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::dsll_op) &&
++        is_op(int_at(20), Assembler::ori_op) ) {
++
++    return Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
++                             (intptr_t)(int_at(12) & 0xffff),
++                             (intptr_t)(int_at(4) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff));
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ori_op) ) {
++
++    return Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
++                 (intptr_t)(int_at(4) & 0xffff),
++           (intptr_t)(int_at(0) & 0xffff),
++           (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          is_op (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return Assembler::merge( (intptr_t)(0),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++     return Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                              (intptr_t)0,
++                              (intptr_t)0,
++                              (intptr_t)0);
++    } else {
++     return Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                              (intptr_t)(0xffff),
++                              (intptr_t)(0xffff),
++                              (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++    } else {
++      return Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)(0xffff),
++                               (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return Assembler::merge( (intptr_t)0,
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++    } else {
++      return Assembler::merge( (intptr_t)0,
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)(0xffff),
++                               (intptr_t)(0xffff));
++    }
++  }
++
++  fatal("not a mov reg, imm64/imm48");
++  return 0; // unreachable
++}
++
++void NativeMovConstReg::patch_set48(intptr_t x) {
++  jlong value = (jlong) x;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      //daddiu(d, R0, value);
++      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      count += 1;
++    } else {
++      //lui(d, split_low(value >> 16));
++      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
++    count += 1;
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    if (Assembler::split_low(value)) {
++      set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++      count += 1;
++    }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
++    count += 1;
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++    count += 1;
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    set_int_at(count << 2, 0);
++    count++;
++  }
++}
++
++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) {
++  // li64 or li48
++  if ((!nativeInstruction_at(addr_at(12))->is_nop()) && is_special_op(int_at(16), Assembler::dsll_op) && is_op(long_at(20), Assembler::ori_op)) {
++    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 48) & 0xffff));
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 32) & 0xffff));
++    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 16) & 0xffff));
++    set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)x) & 0xffff));
++  } else {
++    patch_set48(x);
++  }
++
++  ICache::invalidate_range(addr_at(0), 24);
++
++  // Find and replace the oop/metadata corresponding to this
++  // instruction in oops section.
++  CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address());
++  nmethod* nm = blob->as_nmethod_or_null();
++  if (nm != NULL) {
++    o = o ? o : x;
++    RelocIterator iter(nm, instruction_address(), next_instruction_address());
++    while (iter.next()) {
++      if (iter.type() == relocInfo::oop_type) {
++        oop* oop_addr = iter.oop_reloc()->oop_addr();
++        *oop_addr = cast_to_oop(o);
++        break;
++      } else if (iter.type() == relocInfo::metadata_type) {
++        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
++        *metadata_addr = (Metadata*)o;
++        break;
++      }
++    }
++  }
++}
++
++//-------------------------------------------------------------------
++
++int NativeMovRegMem::offset() const{
++  if (is_immediate())
++    return (short)(int_at(instruction_offset)&0xffff);
++  else
++    return Assembler::merge(int_at(hiword_offset)&0xffff, long_at(instruction_offset)&0xffff);
++}
++
++void NativeMovRegMem::set_offset(int x) {
++  if (is_immediate()) {
++    assert(Assembler::is_simm16(x), "just check");
++    set_int_at(0, (int_at(0)&0xffff0000) | (x&0xffff) );
++    if (is_64ldst()) {
++      assert(Assembler::is_simm16(x+4), "just check");
++      set_int_at(4, (int_at(4)&0xffff0000) | ((x+4)&0xffff) );
++    }
++  } else {
++    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_high(x) & 0xffff));
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(x) & 0xffff));
++  }
++  ICache::invalidate_range(addr_at(0), 8);
++}
++
++void NativeMovRegMem::verify() {
++  int offset = 0;
++
++  if ( Assembler::opcode(int_at(0)) == Assembler::lui_op ) {
++
++    if ( Assembler::opcode(int_at(4)) != Assembler::ori_op ) {
++      fatal ("not a mov [reg+offs], reg instruction");
++    }
++
++    offset += 12;
++  }
++
++  switch(Assembler::opcode(int_at(offset))) {
++    case Assembler::lb_op:
++    case Assembler::lbu_op:
++    case Assembler::lh_op:
++    case Assembler::lhu_op:
++    case Assembler::lw_op:
++    case Assembler::lwu_op:
++    case Assembler::ld_op:
++    case Assembler::lwc1_op:
++    case Assembler::ldc1_op:
++    case Assembler::sb_op:
++    case Assembler::sh_op:
++    case Assembler::sw_op:
++    case Assembler::sd_op:
++    case Assembler::swc1_op:
++    case Assembler::sdc1_op:
++      break;
++    default:
++      fatal ("not a mov [reg+offs], reg instruction");
++  }
++}
++
++
++void NativeMovRegMem::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, [reg + %x]", p2i(instruction_address()), offset());
++}
++
++bool NativeInstruction::is_sigill_zombie_not_entrant() {
++  return uint_at(0) == NativeIllegalInstruction::instruction_code;
++}
++
++void NativeIllegalInstruction::insert(address code_pos) {
++  *(juint*)code_pos = instruction_code;
++  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++void NativeJump::verify() {
++  assert(((NativeInstruction *)this)->is_jump() ||
++         ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction");
++}
++
++void  NativeJump::patch_set48_gs(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  int insts[4] = {0, 0, 0, 0};
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
++      count += 1;
++    } else {
++      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16);
++      count += 1;
++      insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16);
++    count += 1;
++    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    insts[count] = 0;
++    count++;
++  }
++
++  guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned");
++  atomic_store128_ptr func = get_atomic_store128_func();
++  (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]);
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeJump::patch_set48(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      count += 1;
++    } else {
++      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
++      count += 1;
++      set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
++    count += 1;
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    set_int_at(count << 2, 0);
++    count++;
++  }
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeJump::patch_on_j_only(address dst) {
++  long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint j_inst = (Assembler::j_op << 26) | dest;
++    set_int_at(0, j_inst);
++    ICache::invalidate_range(addr_at(0), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void  NativeJump::patch_on_j_gs(address dst) {
++  long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint j_inst = (Assembler::j_op << 26) | dest;
++    set_int_at(16, j_inst);
++    ICache::invalidate_range(addr_at(16), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeJump::patch_on_j(address dst) {
++  patch_on_j_gs(dst);
++}
++
++void  NativeJump::patch_on_jr_gs(address dst) {
++  patch_set48_gs(dst);
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeJump::patch_on_jr(address dst) {
++  patch_set48(dst);
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++
++void  NativeJump::set_jump_destination(address dest) {
++  OrderAccess::fence();
++
++  if (is_short()) {
++    assert(Assembler::is_simm16(dest-addr_at(4)), "change this code");
++    set_int_at(0, (int_at(0) & 0xffff0000) | (dest - addr_at(4)) & 0xffff );
++    ICache::invalidate_range(addr_at(0), 4);
++  } else if (is_b_far()) {
++    int offset = dest - addr_at(12);
++    set_int_at(12, (int_at(12) & 0xffff0000) | (offset >> 16));
++    set_int_at(16, (int_at(16) & 0xffff0000) | (offset & 0xffff));
++  } else {
++    if (is_op(int_at(16), Assembler::j_op)) {
++      if (UseLEXT1) {
++        patch_on_j_gs(dest);
++      } else {
++        patch_on_j(dest);
++      }
++    } else if (is_op(int_at(0), Assembler::j_op)) {
++      patch_on_j_only(dest);
++    } else if (is_special_op(int_at(16), Assembler::jr_op)) {
++      if (UseLEXT1) {
++        //guarantee(!os::is_MP() || (((long)addr_at(0) % 16) == 0), "destination must be aligned for GSSD");
++        //patch_on_jr_gs(dest);
++        patch_on_jr(dest);
++      } else {
++        patch_on_jr(dest);
++      }
++    } else {
++      fatal("not a jump");
++    }
++  }
++}
++
++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
++  CodeBuffer cb(code_pos, instruction_size);
++  MacroAssembler masm(&cb);
++#define __ masm.
++  if (Assembler::is_simm16((entry - code_pos - 4) / 4)) {
++    __ b(entry);
++    __ delayed()->nop();
++  } else {
++    // Attention: We have to use a relative jump here since PC reloc-operation isn't allowed here.
++    int offset = entry - code_pos;
++
++    Label L;
++    __ bgezal(R0, L);
++    __ delayed()->lui(T9, (offset - 8) >> 16);
++    __ bind(L);
++    __ ori(T9, T9, (offset - 8) & 0xffff);
++    __ daddu(T9, T9, RA);
++    __ jr(T9);
++    __ delayed()->nop();
++  }
++
++#undef __
++
++  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++bool NativeJump::is_b_far() {
++//
++//   0x000000556809f198: daddu at, ra, zero
++//   0x000000556809f19c: [4110001]bgezal zero, 0x000000556809f1a4
++//
++//   0x000000556809f1a0: nop
++//   0x000000556809f1a4: lui t9, 0xfffffffd
++//   0x000000556809f1a8: ori t9, t9, 0x14dc
++//   0x000000556809f1ac: daddu t9, t9, ra
++//   0x000000556809f1b0: daddu ra, at, zero
++//   0x000000556809f1b4: jr t9
++//   0x000000556809f1b8: nop
++//  ;; ImplicitNullCheckStub slow case
++//   0x000000556809f1bc: lui t9, 0x55
++//
++  return is_op(int_at(12), Assembler::lui_op);
++}
++
++address NativeJump::jump_destination() {
++  if ( is_short() ) {
++    return addr_at(4) + Assembler::imm_off(int_at(instruction_offset)) * 4;
++  }
++  // Assembler::merge() is not correct in MIPS_64!
++  //
++  //   Example:
++  //     hi16 = 0xfffd,
++  //     lo16 = f7a4,
++  //
++  //     offset=0xfffdf7a4 (Right)
++  //     Assembler::merge = 0xfffcf7a4 (Wrong)
++  //
++  if ( is_b_far() ) {
++    int hi16 = int_at(12)&0xffff;
++    int low16 = int_at(16)&0xffff;
++    address target = addr_at(12) + (hi16 << 16) + low16;
++    return target;
++  }
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // j target
++  // nop
++  if ( nativeInstruction_at(addr_at(0))->is_nop() &&
++        nativeInstruction_at(addr_at(4))->is_nop()   &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop()  &&
++        is_op(int_at(16), Assembler::j_op)         &&
++        nativeInstruction_at(addr_at(20))->is_nop()) {
++    int instr_index = int_at(16) & 0x3ffffff;
++    intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000;
++    intptr_t target = target_high | (instr_index << 2);
++    return (address)target;
++  }
++
++  // j target
++  // nop
++  if ( is_op(int_at(0), Assembler::j_op)         &&
++        nativeInstruction_at(addr_at(4))->is_nop()) {
++    int instr_index = int_at(0) & 0x3ffffff;
++    intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000;
++    intptr_t target = target_high | (instr_index << 2);
++    return (address)target;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::dsll_op) &&
++        is_op(int_at(20), Assembler::ori_op) ) {
++
++    return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
++                             (intptr_t)(int_at(12) & 0xffff),
++                             (intptr_t)(int_at(4) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff));
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ori_op) ) {
++
++    return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
++                 (intptr_t)(int_at(4) & 0xffff),
++           (intptr_t)(int_at(0) & 0xffff),
++           (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          is_op (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return (address)Assembler::merge( (intptr_t)(0),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)0,
++                                        (intptr_t)0,
++                                        (intptr_t)0);
++    } else {
++      return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)0,
++                                        (intptr_t)0);
++    } else {
++      return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return (address)Assembler::merge( (intptr_t)0,
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)0,
++                                        (intptr_t)0);
++    } else {
++      return (address)Assembler::merge( (intptr_t)0,
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff));
++    }
++  }
++
++  fatal("not a jump");
++  return NULL; // unreachable
++}
++
++// MT-safe patching of a long jump instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
++  NativeGeneralJump* h_jump =  nativeGeneralJump_at (instr_addr);
++  assert((int)instruction_size == (int)NativeCall::instruction_size,
++          "note::Runtime1::patch_code uses NativeCall::instruction_size");
++
++  // ensure 100% atomicity
++  guarantee(!os::is_MP() || (((long)instr_addr % BytesPerWord) == 0), "destination must be aligned for SD");
++
++  int *p = (int *)instr_addr;
++  int jr_word = p[4];
++
++  p[4] = 0x1000fffb;   /* .1: --; --; --; --; b .1; nop */
++  memcpy(instr_addr, code_buffer, NativeCall::instruction_size - 8);
++  *(long *)(instr_addr + 16) = *(long *)(code_buffer + 16);
++}
++
++// Must ensure atomicity
++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
++  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
++  assert(nativeInstruction_at(verified_entry + BytesPerInstWord)->is_nop(), "mips64 cannot replace non-nop with jump");
++
++  if (MacroAssembler::reachable_from_cache(dest)) {
++    CodeBuffer cb(verified_entry, 1 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.j(dest);
++  } else {
++    // We use an illegal instruction for marking a method as
++    // not_entrant or zombie
++    NativeIllegalInstruction::insert(verified_entry);
++  }
++
++  ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord);
++}
++
++bool NativeInstruction::is_jump()
++{
++  if ((int_at(0) & NativeGeneralJump::b_mask) == NativeGeneralJump::beq_opcode)
++    return true;
++  if (is_op(int_at(4), Assembler::lui_op)) // simplified b_far
++    return true;
++  if (is_op(int_at(12), Assembler::lui_op)) // original b_far
++    return true;
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // j target
++  // nop
++  if ( is_nop() &&
++         nativeInstruction_at(addr_at(4))->is_nop()  &&
++         nativeInstruction_at(addr_at(8))->is_nop()  &&
++         nativeInstruction_at(addr_at(12))->is_nop() &&
++         nativeInstruction_at(addr_at(16))->is_op(Assembler::j_op) &&
++         nativeInstruction_at(addr_at(20))->is_nop() ) {
++    return true;
++  }
++
++  if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::j_op) &&
++         nativeInstruction_at(addr_at(4))->is_nop() ) {
++    return true;
++  }
++
++  // lui   rd, imm(63...48);
++  // ori   rd, rd, imm(47...32);
++  // dsll  rd, rd, 16;
++  // ori   rd, rd, imm(31...16);
++  // dsll  rd, rd, 16;
++  // ori   rd, rd, imm(15...0);
++  // jr    rd
++  // nop
++  if (is_op(int_at(0), Assembler::lui_op) &&
++          is_op(int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op(int_at(12), Assembler::ori_op) &&
++          is_special_op(int_at(16), Assembler::dsll_op) &&
++          is_op(int_at(20), Assembler::ori_op) &&
++          is_special_op(int_at(24), Assembler::jr_op)) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (is_op(int_at(0), Assembler::lui_op) &&
++          is_op(int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op(int_at(12), Assembler::ori_op) &&
++          is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        is_op  (int_at(8), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++      return true;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  return false;
++}
++
++bool NativeInstruction::is_dtrace_trap() {
++  //return (*(int32_t*)this & 0xff) == 0xcc;
++  Unimplemented();
++  return false;
++}
++
++bool NativeInstruction::is_safepoint_poll() {
++  //
++  // 390     li   T2, 0x0000000000400000 #@loadConP
++  // 394     sw    [SP + #12], V1    # spill 9
++  // 398     Safepoint @ [T2] : poll for GC @ safePoint_poll        # spec.benchmarks.compress.Decompressor::decompress @ bci:224  L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1
++  //
++  //  0x000000ffe5815130: lui t2, 0x40
++  //  0x000000ffe5815134: sw v1, 0xc(sp)    ; OopMap{a6=Oop off=920}
++  //                                        ;*goto
++  //                                        ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++  //  0x000000ffe5815138: lw at, 0x0(t2)    ;*goto       <---  PC
++  //                                        ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++
++  // Since there may be some spill instructions between the safePoint_poll and loadConP,
++  // we check the safepoint instruction like the this.
++  return is_op(Assembler::lw_op) && is_rt(AT);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/nativeInst_mips.hpp b/src/hotspot/cpu/mips/nativeInst_mips.hpp
+--- a/src/hotspot/cpu/mips/nativeInst_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/nativeInst_mips.hpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,734 @@
++/*
++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_NATIVEINST_MIPS_HPP
++#define CPU_MIPS_VM_NATIVEINST_MIPS_HPP
++
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/os.hpp"
++#include "runtime/safepointMechanism.hpp"
++
++// We have interfaces for the following instructions:
++// - NativeInstruction
++// - - NativeCall
++// - - NativeMovConstReg
++// - - NativeMovConstRegPatching
++// - - NativeMovRegMem
++// - - NativeMovRegMemPatching
++// - - NativeJump
++// - - NativeIllegalOpCode
++// - - NativeGeneralJump
++// - - NativeReturn
++// - - NativeReturnX (return with argument)
++// - - NativePushConst
++// - - NativeTstRegMem
++
++// The base class for different kinds of native instruction abstractions.
++// Provides the primitive operations to manipulate code relative to this.
++
++class NativeInstruction {
++  friend class Relocation;
++
++ public:
++  enum mips_specific_constants {
++    nop_instruction_code        =    0,
++    nop_instruction_size        =    4,
++    sync_instruction_code       =    0xf
++  };
++
++  bool is_nop()                        { return long_at(0) == nop_instruction_code; }
++  bool is_sync()                       { return long_at(0) == sync_instruction_code; }
++  bool is_dtrace_trap();
++  inline bool is_call();
++  inline bool is_illegal();
++  inline bool is_return();
++  bool is_jump();
++  inline bool is_cond_jump();
++  bool is_safepoint_poll();
++
++  //mips has no instruction to generate a illegal instrucion exception
++  //we define ours: break 11
++  static int illegal_instruction();
++
++  bool is_int_branch();
++  bool is_float_branch();
++
++  inline bool is_trampoline_call();
++
++  //We use an illegal instruction for marking a method as not_entrant or zombie.
++  bool is_sigill_zombie_not_entrant();
++
++ protected:
++  address addr_at(int offset) const    { return address(this) + offset; }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(BytesPerInstWord); }
++  address prev_instruction_address() const  { return addr_at(-BytesPerInstWord); }
++
++  s_char sbyte_at(int offset) const    { return *(s_char*) addr_at(offset); }
++  u_char ubyte_at(int offset) const    { return *(u_char*) addr_at(offset); }
++
++  jint int_at(int offset) const         { return *(jint*) addr_at(offset); }
++  juint uint_at(int offset) const       { return *(juint*) addr_at(offset); }
++
++  intptr_t ptr_at(int offset) const    { return *(intptr_t*) addr_at(offset); }
++
++  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
++  int  long_at(int offset) const       { return *(jint*)addr_at(offset); }
++
++
++  void set_char_at(int offset, char c)        { *addr_at(offset) = (u_char)c; wrote(offset); }
++  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i;  wrote(offset); }
++  void set_ptr_at (int offset, intptr_t  ptr) { *(intptr_t*) addr_at(offset) = ptr;  wrote(offset); }
++  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o;  wrote(offset); }
++  void set_long_at(int offset, long  i);
++
++  int  insn_word() const { return long_at(0); }
++  static bool is_op (int insn, Assembler::ops op) { return Assembler::opcode(insn) == (int)op; }
++  bool is_op (Assembler::ops op)     const { return is_op(insn_word(), op); }
++  bool is_rs (int insn, Register rs) const { return Assembler::rs(insn) == (int)rs->encoding(); }
++  bool is_rs (Register rs)           const { return is_rs(insn_word(), rs); }
++  bool is_rt (int insn, Register rt) const { return Assembler::rt(insn) == (int)rt->encoding(); }
++  bool is_rt (Register rt)        const { return is_rt(insn_word(), rt); }
++
++  static bool is_special_op (int insn, Assembler::special_ops op) {
++    return is_op(insn, Assembler::special_op) && Assembler::special(insn)==(int)op;
++  }
++  bool is_special_op (Assembler::special_ops op) const { return is_special_op(insn_word(), op); }
++
++  void wrote(int offset);
++
++ public:
++
++  // unit test stuff
++  static void test() {}                 // override for testing
++
++  inline friend NativeInstruction* nativeInstruction_at(address address);
++};
++
++inline NativeInstruction* nativeInstruction_at(address address) {
++  NativeInstruction* inst = (NativeInstruction*)address;
++#ifdef ASSERT
++  //inst->verify();
++#endif
++  return inst;
++}
++
++inline NativeCall* nativeCall_at(address address);
++// The NativeCall is an abstraction for accessing/manipulating native call imm32/imm64
++// instructions (used to manipulate inline caches, primitive & dll calls, etc.).
++// MIPS has no call instruction with imm32/imm64. Usually, a call was done like this:
++// 32 bits:
++//       lui     rt, imm16
++//       addiu    rt, rt, imm16
++//       jalr     rt
++//       nop
++//
++// 64 bits:
++//       lui   rd, imm(63...48);
++//       ori   rd, rd, imm(47...32);
++//       dsll  rd, rd, 16;
++//       ori   rd, rd, imm(31...16);
++//       dsll  rd, rd, 16;
++//       ori   rd, rd, imm(15...0);
++//       jalr  rd
++//       nop
++//
++
++// we just consider the above for instruction as one call instruction
++class NativeCall: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset          =    0,
++    instruction_size            =   6 * BytesPerInstWord,
++    return_address_offset_short =   4 * BytesPerInstWord,
++    return_address_offset_long  =   6 * BytesPerInstWord,
++    displacement_offset         =   0
++  };
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++
++  address next_instruction_address() const  {
++    if (is_special_op(int_at(8), Assembler::jalr_op)) {
++      return addr_at(return_address_offset_short);
++    } else {
++      return addr_at(return_address_offset_long);
++    }
++  }
++
++  address return_address() const            {
++    return next_instruction_address();
++  }
++
++  address target_addr_for_insn() const;
++  address destination() const;
++  void  set_destination(address dest);
++
++  void  patch_set48_gs(address dest);
++  void  patch_set48(address dest);
++
++  void  patch_on_jalr_gs(address dest);
++  void  patch_on_jalr(address dest);
++
++  void  patch_on_jal_gs(address dest);
++  void  patch_on_jal(address dest);
++
++  void  patch_on_trampoline(address dest);
++
++  void  patch_on_jal_only(address dest);
++
++  void  patch_set32_gs(address dest);
++  void  patch_set32(address dest);
++
++  void  verify_alignment() {  }
++  void  verify();
++  void  print();
++
++  // Creation
++  inline friend NativeCall* nativeCall_at(address address);
++  inline friend NativeCall* nativeCall_before(address return_address);
++
++  static bool is_call_at(address instr) {
++    return nativeInstruction_at(instr)->is_call();
++  }
++
++  static bool is_call_before(address return_address) {
++    return is_call_at(return_address - return_address_offset_short) | is_call_at(return_address - return_address_offset_long);
++  }
++
++  static bool is_call_to(address instr, address target) {
++    return nativeInstruction_at(instr)->is_call() &&
++nativeCall_at(instr)->destination() == target;
++  }
++
++  // MT-safe patching of a call instruction.
++  static void insert(address code_pos, address entry);
++
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++
++  // Similar to replace_mt_safe, but just changes the destination.  The
++  // important thing is that free-running threads are able to execute
++  // this call instruction at all times.  If the call is an immediate jal
++  // instruction we can simply rely on atomicity of 32-bit writes to
++  // make sure other threads will see no intermediate states.
++
++  // We cannot rely on locks here, since the free-running threads must run at
++  // full speed.
++  //
++  // Used in the runtime linkage of calls; see class CompiledIC.
++
++  // The parameter assert_lock disables the assertion during code generation.
++  void set_destination_mt_safe(address dest, bool assert_lock = true);
++
++  address get_trampoline();
++};
++
++inline NativeCall* nativeCall_at(address address) {
++  NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++inline NativeCall* nativeCall_before(address return_address) {
++  NativeCall* call = NULL;
++  if (NativeCall::is_call_at(return_address - NativeCall::return_address_offset_long)) {
++    call = (NativeCall*)(return_address - NativeCall::return_address_offset_long);
++  } else {
++    call = (NativeCall*)(return_address - NativeCall::return_address_offset_short);
++  }
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++class NativeMovConstReg: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset    =    0,
++    instruction_size            =    4 * BytesPerInstWord,
++    next_instruction_offset   =    4 * BytesPerInstWord,
++  };
++
++  int     insn_word() const                 { return long_at(instruction_offset); }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(next_instruction_offset); }
++  intptr_t data() const;
++  void    set_data(intptr_t x, intptr_t o = 0);
++
++  void    patch_set48(intptr_t x);
++
++  void  verify();
++  void  print();
++
++  // unit test stuff
++  static void test() {}
++
++  // Creation
++  inline friend NativeMovConstReg* nativeMovConstReg_at(address address);
++  inline friend NativeMovConstReg* nativeMovConstReg_before(address address);
++};
++
++inline NativeMovConstReg* nativeMovConstReg_at(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++inline NativeMovConstReg* nativeMovConstReg_before(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovConstRegPatching: public NativeMovConstReg {
++ private:
++    friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) {
++    NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++// An interface for accessing/manipulating native moves of the form:
++//       lui   AT, split_high(offset)
++//       addiu AT, split_low(offset)
++//       addu  reg, reg, AT
++//       lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0
++//       [lw/sw/lwc1/swc1                    dest, reg, 4]
++//     or
++//       lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset
++//       [lw/sw/lwc1/swc1                    dest, reg, offset+4]
++//
++// Warning: These routines must be able to handle any instruction sequences
++// that are generated as a result of the load/store byte,word,long
++// macros.
++
++class NativeMovRegMem: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset  = 0,
++    hiword_offset   = 4,
++    ldst_offset     = 12,
++    immediate_size  = 4,
++    ldst_size       = 16
++  };
++
++  //offset is less than 16 bits.
++  bool is_immediate() const { return !is_op(long_at(instruction_offset), Assembler::lui_op); }
++  bool is_64ldst() const {
++    if (is_immediate()) {
++      return (Assembler::opcode(long_at(hiword_offset)) == Assembler::opcode(long_at(instruction_offset))) &&
++       (Assembler::imm_off(long_at(hiword_offset)) == Assembler::imm_off(long_at(instruction_offset)) + wordSize);
++    } else {
++      return (Assembler::opcode(long_at(ldst_offset+hiword_offset)) == Assembler::opcode(long_at(ldst_offset))) &&
++       (Assembler::imm_off(long_at(ldst_offset+hiword_offset)) == Assembler::imm_off(long_at(ldst_offset)) + wordSize);
++    }
++  }
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  {
++    return addr_at( (is_immediate()? immediate_size : ldst_size) + (is_64ldst()? 4 : 0));
++  }
++
++  int   offset() const;
++
++  void  set_offset(int x);
++
++  void  add_offset_in_bytes(int add_offset)     { set_offset ( ( offset() + add_offset ) ); }
++
++  void verify();
++  void print ();
++
++  // unit test stuff
++  static void test() {}
++
++ private:
++  inline friend NativeMovRegMem* nativeMovRegMem_at (address address);
++};
++
++inline NativeMovRegMem* nativeMovRegMem_at (address address) {
++  NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovRegMemPatching: public NativeMovRegMem {
++ private:
++  friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) {
++    NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++
++// Handles all kinds of jump on Loongson. Long/far, conditional/unconditional
++// 32 bits:
++//    far jump:
++//        lui   reg, split_high(addr)
++//        addiu reg, split_low(addr)
++//        jr    reg
++//        nop
++//    or
++//        beq   ZERO, ZERO, offset
++//        nop
++//
++
++//64 bits:
++//    far jump:
++//          lui   rd, imm(63...48);
++//          ori   rd, rd, imm(47...32);
++//          dsll  rd, rd, 16;
++//          ori   rd, rd, imm(31...16);
++//          dsll  rd, rd, 16;
++//          ori   rd, rd, imm(15...0);
++//          jalr  rd
++//          nop
++//
++class NativeJump: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset   =    0,
++    beq_opcode           =    0x10000000,//000100|00000|00000|offset
++    b_mask               =    0xffff0000,
++    short_size           =    8,
++    instruction_size     =    6 * BytesPerInstWord
++  };
++
++  bool is_short() const { return (long_at(instruction_offset) & b_mask) == beq_opcode; }
++  bool is_b_far();
++  address instruction_address() const { return addr_at(instruction_offset); }
++  address jump_destination();
++
++  void  patch_set48_gs(address dest);
++  void  patch_set48(address dest);
++
++  void  patch_on_jr_gs(address dest);
++  void  patch_on_jr(address dest);
++
++  void  patch_on_j_gs(address dest);
++  void  patch_on_j(address dest);
++
++  void  patch_on_j_only(address dest);
++
++  void  set_jump_destination(address dest);
++
++  // Creation
++  inline friend NativeJump* nativeJump_at(address address);
++
++  // Insertion of native jump instruction
++  static void insert(address code_pos, address entry) { Unimplemented(); }
++  // MT-safe insertion of native jump at verified method entry
++  static void check_verified_entry_alignment(address entry, address verified_entry) {}
++  static void patch_verified_entry(address entry, address verified_entry, address dest);
++
++  void verify();
++};
++
++inline NativeJump* nativeJump_at(address address) {
++  NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeGeneralJump: public NativeJump {
++ public:
++  // Creation
++  inline friend NativeGeneralJump* nativeGeneralJump_at(address address);
++
++  // Insertion of native general jump instruction
++  static void insert_unconditional(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++};
++
++inline NativeGeneralJump* nativeGeneralJump_at(address address) {
++  NativeGeneralJump* jump = (NativeGeneralJump*)(address);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeIllegalInstruction: public NativeInstruction {
++public:
++  enum mips_specific_constants {
++    instruction_code          =    0x42000029,    // mips reserved instruction
++    instruction_size          =    4,
++    instruction_offset        =    0,
++    next_instruction_offset   =    4
++  };
++
++  // Insert illegal opcode as specific address
++  static void insert(address code_pos);
++};
++
++// return instruction that does not pop values of the stack
++// jr RA
++// delay slot
++class NativeReturn: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_size          =    8,
++    instruction_offset        =    0,
++    next_instruction_offset   =    8
++  };
++};
++
++
++
++
++class NativeCondJump;
++inline NativeCondJump* nativeCondJump_at(address address);
++class NativeCondJump: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_size         = 16,
++    instruction_offset        = 12,
++    next_instruction_offset   = 20
++  };
++
++
++  int insn_word() const  { return long_at(instruction_offset); }
++  address instruction_address() const { return addr_at(0); }
++  address next_instruction_address() const { return addr_at(next_instruction_offset); }
++
++  // Creation
++  inline friend NativeCondJump* nativeCondJump_at(address address);
++
++  address jump_destination()  const {
++    return ::nativeCondJump_at(addr_at(12))->jump_destination();
++  }
++
++  void set_jump_destination(address dest) {
++    ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest);
++  }
++
++};
++
++inline NativeCondJump* nativeCondJump_at(address address) {
++  NativeCondJump* jump = (NativeCondJump*)(address);
++  return jump;
++}
++
++
++
++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); }
++
++inline bool NativeInstruction::is_call()    {
++  // jal target
++  // nop
++  if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::jal_op) &&
++         nativeInstruction_at(addr_at(4))->is_nop() ) {
++      return true;
++  }
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // jal target
++  // nop
++  if ( is_nop() &&
++         nativeInstruction_at(addr_at(4))->is_nop()  &&
++         nativeInstruction_at(addr_at(8))->is_nop()  &&
++         nativeInstruction_at(addr_at(12))->is_nop() &&
++         nativeInstruction_at(addr_at(16))->is_op(Assembler::jal_op) &&
++         nativeInstruction_at(addr_at(20))->is_nop() ) {
++    return true;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++       is_op(int_at(4), Assembler::ori_op) &&
++       is_special_op(int_at(8), Assembler::dsll_op) &&
++       is_op(int_at(12), Assembler::ori_op) &&
++       is_special_op(int_at(16), Assembler::dsll_op) &&
++       is_op(int_at(20), Assembler::ori_op) &&
++       is_special_op(int_at(24), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op  (int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        is_op  (int_at(8), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++
++  //daddiu dst, R0, imm16
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        is_special_op(int_at(8), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        is_special_op(int_at(8), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  if(is_trampoline_call())
++    return true;
++
++  return false;
++
++}
++
++inline bool NativeInstruction::is_return()  { return is_special_op(Assembler::jr_op) && is_rs(RA);}
++
++inline bool NativeInstruction::is_cond_jump()    { return is_int_branch() || is_float_branch(); }
++
++// Call trampoline stubs.
++class NativeCallTrampolineStub : public NativeInstruction {
++ public:
++
++  enum mips_specific_constants {
++    instruction_size            =    2 * BytesPerInstWord,
++    instruction_offset          =    0,
++    next_instruction_offset     =    2 * BytesPerInstWord
++  };
++
++  address destination() const {
++    return (address)ptr_at(0);
++  }
++
++  void set_destination(address new_destination) {
++    set_ptr_at(0, (intptr_t)new_destination);
++  }
++};
++
++inline bool NativeInstruction::is_trampoline_call() {
++  // lui dst, imm16
++  // ori dst, dst, imm16
++  // dsll dst, dst, 16
++  // ld target, dst, imm16
++  // jalr target
++  // nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ld_op) &&
++        is_special_op(int_at(16), Assembler::jalr_op) &&
++        nativeInstruction_at(addr_at(20))->is_nop() ) {
++    return true;
++  }
++
++  return false;
++}
++
++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
++  return (NativeCallTrampolineStub*)addr;
++}
++#endif // CPU_MIPS_VM_NATIVEINST_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/register_definitions_mips.cpp b/src/hotspot/cpu/mips/register_definitions_mips.cpp
+--- a/src/hotspot/cpu/mips/register_definitions_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/register_definitions_mips.cpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,103 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/register.hpp"
++#include "register_mips.hpp"
++#ifdef TARGET_ARCH_MODEL_mips_32
++# include "interp_masm_mips_32.hpp"
++#endif
++#ifdef TARGET_ARCH_MODEL_mips_64
++# include "interp_masm_mips_64.hpp"
++#endif
++
++REGISTER_DEFINITION(Register, noreg);
++REGISTER_DEFINITION(Register, i0);
++REGISTER_DEFINITION(Register, i1);
++REGISTER_DEFINITION(Register, i2);
++REGISTER_DEFINITION(Register, i3);
++REGISTER_DEFINITION(Register, i4);
++REGISTER_DEFINITION(Register, i5);
++REGISTER_DEFINITION(Register, i6);
++REGISTER_DEFINITION(Register, i7);
++REGISTER_DEFINITION(Register, i8);
++REGISTER_DEFINITION(Register, i9);
++REGISTER_DEFINITION(Register, i10);
++REGISTER_DEFINITION(Register, i11);
++REGISTER_DEFINITION(Register, i12);
++REGISTER_DEFINITION(Register, i13);
++REGISTER_DEFINITION(Register, i14);
++REGISTER_DEFINITION(Register, i15);
++REGISTER_DEFINITION(Register, i16);
++REGISTER_DEFINITION(Register, i17);
++REGISTER_DEFINITION(Register, i18);
++REGISTER_DEFINITION(Register, i19);
++REGISTER_DEFINITION(Register, i20);
++REGISTER_DEFINITION(Register, i21);
++REGISTER_DEFINITION(Register, i22);
++REGISTER_DEFINITION(Register, i23);
++REGISTER_DEFINITION(Register, i24);
++REGISTER_DEFINITION(Register, i25);
++REGISTER_DEFINITION(Register, i26);
++REGISTER_DEFINITION(Register, i27);
++REGISTER_DEFINITION(Register, i28);
++REGISTER_DEFINITION(Register, i29);
++REGISTER_DEFINITION(Register, i30);
++REGISTER_DEFINITION(Register, i31);
++
++REGISTER_DEFINITION(FloatRegister, fnoreg);
++REGISTER_DEFINITION(FloatRegister, f0);
++REGISTER_DEFINITION(FloatRegister, f1);
++REGISTER_DEFINITION(FloatRegister, f2);
++REGISTER_DEFINITION(FloatRegister, f3);
++REGISTER_DEFINITION(FloatRegister, f4);
++REGISTER_DEFINITION(FloatRegister, f5);
++REGISTER_DEFINITION(FloatRegister, f6);
++REGISTER_DEFINITION(FloatRegister, f7);
++REGISTER_DEFINITION(FloatRegister, f8);
++REGISTER_DEFINITION(FloatRegister, f9);
++REGISTER_DEFINITION(FloatRegister, f10);
++REGISTER_DEFINITION(FloatRegister, f11);
++REGISTER_DEFINITION(FloatRegister, f12);
++REGISTER_DEFINITION(FloatRegister, f13);
++REGISTER_DEFINITION(FloatRegister, f14);
++REGISTER_DEFINITION(FloatRegister, f15);
++REGISTER_DEFINITION(FloatRegister, f16);
++REGISTER_DEFINITION(FloatRegister, f17);
++REGISTER_DEFINITION(FloatRegister, f18);
++REGISTER_DEFINITION(FloatRegister, f19);
++REGISTER_DEFINITION(FloatRegister, f20);
++REGISTER_DEFINITION(FloatRegister, f21);
++REGISTER_DEFINITION(FloatRegister, f22);
++REGISTER_DEFINITION(FloatRegister, f23);
++REGISTER_DEFINITION(FloatRegister, f24);
++REGISTER_DEFINITION(FloatRegister, f25);
++REGISTER_DEFINITION(FloatRegister, f26);
++REGISTER_DEFINITION(FloatRegister, f27);
++REGISTER_DEFINITION(FloatRegister, f28);
++REGISTER_DEFINITION(FloatRegister, f29);
++REGISTER_DEFINITION(FloatRegister, f30);
++REGISTER_DEFINITION(FloatRegister, f31);
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/registerMap_mips.hpp b/src/hotspot/cpu/mips/registerMap_mips.hpp
+--- a/src/hotspot/cpu/mips/registerMap_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/registerMap_mips.hpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
++#define CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
++
++// machine-dependent implemention for register maps
++  friend class frame;
++
++ private:
++#ifndef CORE
++  // This is the hook for finding a register in an "well-known" location,
++  // such as a register block of a predetermined format.
++  // Since there is none, we just return NULL.
++  // See registerMap_sparc.hpp for an example of grabbing registers
++  // from register save areas of a standard layout.
++   address pd_location(VMReg reg) const {return NULL;}
++#endif
++
++  // no PD state to clear or copy:
++  void pd_clear() {}
++  void pd_initialize() {}
++  void pd_initialize_from(const RegisterMap* map) {}
++
++#endif // CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/register_mips.cpp b/src/hotspot/cpu/mips/register_mips.cpp
+--- a/src/hotspot/cpu/mips/register_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/register_mips.cpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "register_mips.hpp"
++
++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1;
++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
++                                                                 2 * FloatRegisterImpl::number_of_registers;
++
++const char* RegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3",
++    "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
++    "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
++    "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
++
++const char* FloatRegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "f0",  "f1",   "f2",  "f3",   "f4",  "f5",   "f6",  "f7",
++    "f8",  "f9",  "f10", "f11",  "f12", "f13",  "f14", "f15",
++    "f16", "f17",  "f18", "f19", "f20", "f21",  "f22", "f23",
++    "f24", "f25",  "f26", "f27",  "f28", "f29",  "f30", "f31",
++  };
++  return is_valid() ? names[encoding()] : "fnoreg";
++}
++
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/register_mips.hpp b/src/hotspot/cpu/mips/register_mips.hpp
+--- a/src/hotspot/cpu/mips/register_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/register_mips.hpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,341 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_REGISTER_MIPS_HPP
++#define CPU_MIPS_VM_REGISTER_MIPS_HPP
++
++#include "asm/register.hpp"
++#include "utilities/formatBuffer.hpp"
++
++class VMRegImpl;
++typedef VMRegImpl* VMReg;
++
++// Use Register as shortcut
++class RegisterImpl;
++typedef RegisterImpl* Register;
++
++inline Register as_Register(int encoding) {
++  return (Register)(intptr_t) encoding;
++}
++
++class RegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers     = 32
++  };
++
++  // derived registers, offsets, and addresses
++  Register successor() const                          { return as_Register(encoding() + 1); }
++
++  // construction
++  inline friend Register as_Register(int encoding);
++
++  VMReg as_VMReg();
++
++  // accessors
++  int   encoding() const                         { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; }
++  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++};
++
++
++// The integer registers of the MIPS32 architecture
++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
++
++
++CONSTANT_REGISTER_DECLARATION(Register, i0,    (0));
++CONSTANT_REGISTER_DECLARATION(Register, i1,    (1));
++CONSTANT_REGISTER_DECLARATION(Register, i2,    (2));
++CONSTANT_REGISTER_DECLARATION(Register, i3,    (3));
++CONSTANT_REGISTER_DECLARATION(Register, i4,    (4));
++CONSTANT_REGISTER_DECLARATION(Register, i5,    (5));
++CONSTANT_REGISTER_DECLARATION(Register, i6,    (6));
++CONSTANT_REGISTER_DECLARATION(Register, i7,    (7));
++CONSTANT_REGISTER_DECLARATION(Register, i8,    (8));
++CONSTANT_REGISTER_DECLARATION(Register, i9,    (9));
++CONSTANT_REGISTER_DECLARATION(Register, i10,   (10));
++CONSTANT_REGISTER_DECLARATION(Register, i11,   (11));
++CONSTANT_REGISTER_DECLARATION(Register, i12,   (12));
++CONSTANT_REGISTER_DECLARATION(Register, i13,   (13));
++CONSTANT_REGISTER_DECLARATION(Register, i14,   (14));
++CONSTANT_REGISTER_DECLARATION(Register, i15,   (15));
++CONSTANT_REGISTER_DECLARATION(Register, i16,   (16));
++CONSTANT_REGISTER_DECLARATION(Register, i17,   (17));
++CONSTANT_REGISTER_DECLARATION(Register, i18,   (18));
++CONSTANT_REGISTER_DECLARATION(Register, i19,   (19));
++CONSTANT_REGISTER_DECLARATION(Register, i20,   (20));
++CONSTANT_REGISTER_DECLARATION(Register, i21,   (21));
++CONSTANT_REGISTER_DECLARATION(Register, i22,   (22));
++CONSTANT_REGISTER_DECLARATION(Register, i23,   (23));
++CONSTANT_REGISTER_DECLARATION(Register, i24,   (24));
++CONSTANT_REGISTER_DECLARATION(Register, i25,   (25));
++CONSTANT_REGISTER_DECLARATION(Register, i26,   (26));
++CONSTANT_REGISTER_DECLARATION(Register, i27,   (27));
++CONSTANT_REGISTER_DECLARATION(Register, i28,   (28));
++CONSTANT_REGISTER_DECLARATION(Register, i29,   (29));
++CONSTANT_REGISTER_DECLARATION(Register, i30,   (30));
++CONSTANT_REGISTER_DECLARATION(Register, i31,   (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define NOREG ((Register)(noreg_RegisterEnumValue))
++
++#define I0 ((Register)(i0_RegisterEnumValue))
++#define I1 ((Register)(i1_RegisterEnumValue))
++#define I2 ((Register)(i2_RegisterEnumValue))
++#define I3 ((Register)(i3_RegisterEnumValue))
++#define I4 ((Register)(i4_RegisterEnumValue))
++#define I5 ((Register)(i5_RegisterEnumValue))
++#define I6 ((Register)(i6_RegisterEnumValue))
++#define I7 ((Register)(i7_RegisterEnumValue))
++#define I8 ((Register)(i8_RegisterEnumValue))
++#define I9 ((Register)(i9_RegisterEnumValue))
++#define I10 ((Register)(i10_RegisterEnumValue))
++#define I11 ((Register)(i11_RegisterEnumValue))
++#define I12 ((Register)(i12_RegisterEnumValue))
++#define I13 ((Register)(i13_RegisterEnumValue))
++#define I14 ((Register)(i14_RegisterEnumValue))
++#define I15 ((Register)(i15_RegisterEnumValue))
++#define I16 ((Register)(i16_RegisterEnumValue))
++#define I17 ((Register)(i17_RegisterEnumValue))
++#define I18 ((Register)(i18_RegisterEnumValue))
++#define I19 ((Register)(i19_RegisterEnumValue))
++#define I20 ((Register)(i20_RegisterEnumValue))
++#define I21 ((Register)(i21_RegisterEnumValue))
++#define I22 ((Register)(i22_RegisterEnumValue))
++#define I23 ((Register)(i23_RegisterEnumValue))
++#define I24 ((Register)(i24_RegisterEnumValue))
++#define I25 ((Register)(i25_RegisterEnumValue))
++#define I26 ((Register)(i26_RegisterEnumValue))
++#define I27 ((Register)(i27_RegisterEnumValue))
++#define I28 ((Register)(i28_RegisterEnumValue))
++#define I29 ((Register)(i29_RegisterEnumValue))
++#define I30 ((Register)(i30_RegisterEnumValue))
++#define I31 ((Register)(i31_RegisterEnumValue))
++
++#define R0 ((Register)(i0_RegisterEnumValue))
++#define AT ((Register)(i1_RegisterEnumValue))
++#define V0 ((Register)(i2_RegisterEnumValue))
++#define V1 ((Register)(i3_RegisterEnumValue))
++#define A0 ((Register)(i4_RegisterEnumValue))
++#define A1 ((Register)(i5_RegisterEnumValue))
++#define A2 ((Register)(i6_RegisterEnumValue))
++#define A3 ((Register)(i7_RegisterEnumValue))
++#define A4 ((Register)(i8_RegisterEnumValue))
++#define A5 ((Register)(i9_RegisterEnumValue))
++#define A6 ((Register)(i10_RegisterEnumValue))
++#define A7 ((Register)(i11_RegisterEnumValue))
++#define RT0 ((Register)(i12_RegisterEnumValue))
++#define RT1 ((Register)(i13_RegisterEnumValue))
++#define RT2 ((Register)(i14_RegisterEnumValue))
++#define RT3 ((Register)(i15_RegisterEnumValue))
++#define S0 ((Register)(i16_RegisterEnumValue))
++#define S1 ((Register)(i17_RegisterEnumValue))
++#define S2 ((Register)(i18_RegisterEnumValue))
++#define S3 ((Register)(i19_RegisterEnumValue))
++#define S4 ((Register)(i20_RegisterEnumValue))
++#define S5 ((Register)(i21_RegisterEnumValue))
++#define S6 ((Register)(i22_RegisterEnumValue))
++#define S7 ((Register)(i23_RegisterEnumValue))
++#define RT8 ((Register)(i24_RegisterEnumValue))
++#define RT9 ((Register)(i25_RegisterEnumValue))
++#define K0 ((Register)(i26_RegisterEnumValue))
++#define K1 ((Register)(i27_RegisterEnumValue))
++#define GP ((Register)(i28_RegisterEnumValue))
++#define SP ((Register)(i29_RegisterEnumValue))
++#define FP ((Register)(i30_RegisterEnumValue))
++#define S8 ((Register)(i30_RegisterEnumValue))
++#define RA ((Register)(i31_RegisterEnumValue))
++
++#define c_rarg0       RT0
++#define c_rarg1       RT1
++#define Rmethod       S3
++#define Rsender       S4
++#define Rnext         S1
++
++/*
++#define RT0       T0
++#define RT1       T1
++#define RT2       T2
++#define RT3       T3
++#define RT4       T8
++#define RT5       T9
++*/
++
++
++//for interpreter frame
++// bytecode pointer register
++#define BCP            S0
++// local variable pointer register
++#define LVP            S7
++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM
++// be sure to save and restore its value in call_stub
++#define TSR            S2
++
++#define OPT_THREAD 1
++
++#define TREG           S6
++
++#define  S5_heapbase   S5
++
++#define mh_SP_save     SP
++
++#define FSR            V0
++#define SSR            V1
++#define FSF            F0
++#define SSF            F1
++#define FTF            F14
++#define STF            F15
++
++#define AFT            F30
++
++#define RECEIVER       T0
++#define IC_Klass       T1
++
++#define SHIFT_count    T3
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Use FloatRegister as shortcut
++class FloatRegisterImpl;
++typedef FloatRegisterImpl* FloatRegister;
++
++inline FloatRegister as_FloatRegister(int encoding) {
++  return (FloatRegister)(intptr_t) encoding;
++}
++
++// The implementation of floating point registers for the architecture
++class FloatRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    float_arg_base      = 12,
++    number_of_registers = 32
++  };
++
++  // construction
++  inline friend FloatRegister as_FloatRegister(int encoding);
++
++  VMReg as_VMReg();
++
++  // derived registers, offsets, and addresses
++  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
++
++  // accessors
++  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
++  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++
++};
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue))
++#define F0     ((FloatRegister)(    f0_FloatRegisterEnumValue))
++#define F1     ((FloatRegister)(    f1_FloatRegisterEnumValue))
++#define F2     ((FloatRegister)(    f2_FloatRegisterEnumValue))
++#define F3     ((FloatRegister)(    f3_FloatRegisterEnumValue))
++#define F4     ((FloatRegister)(    f4_FloatRegisterEnumValue))
++#define F5     ((FloatRegister)(    f5_FloatRegisterEnumValue))
++#define F6     ((FloatRegister)(    f6_FloatRegisterEnumValue))
++#define F7     ((FloatRegister)(    f7_FloatRegisterEnumValue))
++#define F8     ((FloatRegister)(    f8_FloatRegisterEnumValue))
++#define F9     ((FloatRegister)(    f9_FloatRegisterEnumValue))
++#define F10    ((FloatRegister)(   f10_FloatRegisterEnumValue))
++#define F11    ((FloatRegister)(   f11_FloatRegisterEnumValue))
++#define F12    ((FloatRegister)(   f12_FloatRegisterEnumValue))
++#define F13    ((FloatRegister)(   f13_FloatRegisterEnumValue))
++#define F14    ((FloatRegister)(   f14_FloatRegisterEnumValue))
++#define F15    ((FloatRegister)(   f15_FloatRegisterEnumValue))
++#define F16    ((FloatRegister)(   f16_FloatRegisterEnumValue))
++#define F17    ((FloatRegister)(   f17_FloatRegisterEnumValue))
++#define F18    ((FloatRegister)(   f18_FloatRegisterEnumValue))
++#define F19    ((FloatRegister)(   f19_FloatRegisterEnumValue))
++#define F20    ((FloatRegister)(   f20_FloatRegisterEnumValue))
++#define F21    ((FloatRegister)(   f21_FloatRegisterEnumValue))
++#define F22    ((FloatRegister)(   f22_FloatRegisterEnumValue))
++#define F23    ((FloatRegister)(   f23_FloatRegisterEnumValue))
++#define F24    ((FloatRegister)(   f24_FloatRegisterEnumValue))
++#define F25    ((FloatRegister)(   f25_FloatRegisterEnumValue))
++#define F26    ((FloatRegister)(   f26_FloatRegisterEnumValue))
++#define F27    ((FloatRegister)(   f27_FloatRegisterEnumValue))
++#define F28    ((FloatRegister)(   f28_FloatRegisterEnumValue))
++#define F29    ((FloatRegister)(   f29_FloatRegisterEnumValue))
++#define F30    ((FloatRegister)(   f30_FloatRegisterEnumValue))
++#define F31    ((FloatRegister)(   f31_FloatRegisterEnumValue))
++#endif // DONT_USE_REGISTER_DEFINES
++
++
++const int MIPS_ARGS_IN_REGS_NUM = 4;
++
++// Need to know the total number of registers of all sorts for SharedInfo.
++// Define a class that exports it.
++class ConcreteRegisterImpl : public AbstractRegisterImpl {
++ public:
++  enum {
++  // A big enough number for C2: all the registers plus flags
++  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
++  // There is no requirement that any ordering here matches any ordering c2 gives
++  // it's optoregs.
++    number_of_registers = (RegisterImpl::number_of_registers + FloatRegisterImpl::number_of_registers) * 2
++  };
++
++  static const int max_gpr;
++  static const int max_fpr;
++};
++
++#endif //CPU_MIPS_VM_REGISTER_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/relocInfo_mips.cpp b/src/hotspot/cpu/mips/relocInfo_mips.cpp
+--- a/src/hotspot/cpu/mips/relocInfo_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/relocInfo_mips.cpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,160 @@
++/*
++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/relocInfo.hpp"
++#include "compiler/disassembler.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/compressedOops.inline.hpp"
++#include "oops/oop.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/safepoint.hpp"
++
++
++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
++  x += o;
++  typedef Assembler::WhichOperand WhichOperand;
++  WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop
++  assert(which == Assembler::disp32_operand ||
++         which == Assembler::narrow_oop_operand ||
++         which == Assembler::imm_operand, "format unpacks ok");
++  if (which == Assembler::imm_operand) {
++    if (verify_only) {
++      assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match");
++    } else {
++      nativeMovConstReg_at(addr())->set_data((intptr_t)(x));
++    }
++  } else if (which == Assembler::narrow_oop_operand) {
++    // both compressed oops and compressed classes look the same
++    if (Universe::heap()->is_in_reserved((oop)x)) {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode((oop)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(oop(x))), (intptr_t)(x));
++      }
++    } else {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x));
++      }
++    }
++  } else {
++    // Note:  Use runtime_call_type relocations for call32_operand.
++    assert(0, "call32_operand not supported in MIPS64");
++  }
++}
++
++
++//NOTICE HERE, this relocate is not need for MIPS, since MIPS USE abosolutly target,
++//Maybe We should FORGET CALL RELOCATION
++address Relocation::pd_call_destination(address orig_addr) {
++  intptr_t adj = 0;
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_call()) {
++    if (!ni->is_trampoline_call()) {
++      return nativeCall_at(addr())->target_addr_for_insn();
++    } else {
++      address trampoline = nativeCall_at(addr())->get_trampoline();
++      if (trampoline) {
++        return nativeCallTrampolineStub_at(trampoline)->destination();
++      } else {
++        return (address) -1;
++      }
++    }
++  } else if (ni->is_jump()) {
++    return nativeGeneralJump_at(addr())->jump_destination() + adj;
++  } else if (ni->is_cond_jump()) {
++    return nativeCondJump_at(addr())->jump_destination() +adj;
++  } else {
++    tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr()));
++    Disassembler::decode(addr() - 10 * 4, addr() + 10 * 4, tty);
++    ShouldNotReachHere();
++    return NULL;
++  }
++}
++
++
++void Relocation::pd_set_call_destination(address x) {
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_call()) {
++    NativeCall* call = nativeCall_at(addr());
++    if (!ni->is_trampoline_call()) {
++      call->set_destination(x);
++    } else {
++      address trampoline_stub_addr = call->get_trampoline();
++      if (trampoline_stub_addr != NULL) {
++        address orig = call->target_addr_for_insn();
++        if (orig != trampoline_stub_addr) {
++          call->patch_on_trampoline(trampoline_stub_addr);
++        }
++        call->set_destination_mt_safe(x, false);
++      }
++    }
++  } else if (ni->is_jump())
++    nativeGeneralJump_at(addr())->set_jump_destination(x);
++  else if (ni->is_cond_jump())
++    nativeCondJump_at(addr())->set_jump_destination(x);
++  else
++    { ShouldNotReachHere(); }
++
++    // Unresolved jumps are recognized by a destination of -1
++    // However 64bit can't actually produce such an address
++    // and encodes a jump to self but jump_destination will
++    // return a -1 as the signal. We must not relocate this
++    // jmp or the ic code will not see it as unresolved.
++}
++
++
++address* Relocation::pd_address_in_code() {
++  return (address*)addr();
++}
++
++
++address Relocation::pd_get_address_from_code() {
++  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
++  return (address)ni->data();
++}
++
++
++
++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++
++/*
++void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++*/
++
++void internal_pc_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++  address target =0;
++  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
++  target = new_addr_for((address)ni->data(), src, dest);
++  ni->set_data((intptr_t)target);
++}
++
++void metadata_Relocation::pd_fix_value(address x) {
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/relocInfo_mips.hpp b/src/hotspot/cpu/mips/relocInfo_mips.hpp
+--- a/src/hotspot/cpu/mips/relocInfo_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/relocInfo_mips.hpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_RELOCINFO_MIPS_HPP
++#define CPU_MIPS_VM_RELOCINFO_MIPS_HPP
++
++  // machine-dependent parts of class relocInfo
++ private:
++  enum {
++    // Since MIPS instructions are whole words,
++    // the two low-order offset bits can always be discarded.
++    offset_unit        =  4,
++
++    // imm_oop_operand vs. narrow_oop_operand
++    format_width       =  2
++  };
++
++ public:
++
++  static bool mustIterateImmediateOopsInCode() { return false; }
++
++#endif // CPU_MIPS_VM_RELOCINFO_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/runtime_mips_64.cpp b/src/hotspot/cpu/mips/runtime_mips_64.cpp
+--- a/src/hotspot/cpu/mips/runtime_mips_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/runtime_mips_64.cpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,198 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#ifdef COMPILER2
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "code/vmreg.hpp"
++#include "interpreter/interpreter.hpp"
++#include "opto/runtime.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/globalDefinitions.hpp"
++#include "vmreg_mips.inline.hpp"
++#endif
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++//-------------- generate_exception_blob -----------
++// creates _exception_blob.
++// The exception blob is jumped to from a compiled method.
++// (see emit_exception_handler in sparc.ad file)
++//
++// Given an exception pc at a call we call into the runtime for the
++// handler in this method. This handler might merely restore state
++// (i.e. callee save registers) unwind the frame and jump to the
++// exception handler for the nmethod if there is no Java level handler
++// for the nmethod.
++//
++// This code is entered with a jump, and left with a jump.
++//
++// Arguments:
++//   V0: exception oop
++//   V1: exception pc
++//
++// Results:
++//   A0: exception oop
++//   A1: exception pc in caller or ???
++//   jumps to: exception handler of caller
++//
++// Note: the exception pc MUST be at a call (precise debug information)
++//
++//  [stubGenerator_mips.cpp] generate_forward_exception()
++//      |- V0, V1 are created
++//      |- T9 <= SharedRuntime::exception_handler_for_return_address
++//      `- jr T9
++//           `- the caller's exception_handler
++//                 `- jr OptoRuntime::exception_blob
++//                        `- here
++//
++void OptoRuntime::generate_exception_blob() {
++  // Capture info about frame layout
++  enum layout {
++    fp_off,
++    return_off,                 // slot for return address
++    framesize
++  };
++
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer   buffer("exception_blob", 5120, 5120);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++
++  address start = __ pc();
++
++  __ daddiu(SP, SP, -1 * framesize * wordSize);   // Prolog!
++
++  // this frame will be treated as the original caller method.
++  // So, the return pc should be filled with the original exception pc.
++  //   ref: X86's implementation
++  __ sd(V1, SP, return_off  *wordSize);  // return address
++  __ sd(FP, SP, fp_off  *wordSize);
++
++  // Save callee saved registers.  None for UseSSE=0,
++  // floats-only for UseSSE=1, and doubles for UseSSE=2.
++
++  __ daddiu(FP, SP, fp_off * wordSize);
++
++  // Store exception in Thread object. We cannot pass any arguments to the
++  // handle_exception call, since we do not want to make any assumption
++  // about the size of the frame where the exception happened in.
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ sd(V0, Address(thread, JavaThread::exception_oop_offset()));
++  __ sd(V1, Address(thread, JavaThread::exception_pc_offset()));
++
++  // This call does all the hard work.  It checks if an exception handler
++  // exists in the method.
++  // If so, it returns the handler address.
++  // If not, it prepares for stack-unwinding, restoring the callee-save
++  // registers of the frame being removed.
++  __ set_last_Java_frame(thread, NOREG, NOREG, NULL);
++
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  __ relocate(relocInfo::internal_pc_type);
++
++  {
++    long save_pc = (long)__ pc() + 48;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++
++  __ move(A0, thread);
++  __ patchable_set48(T9, (long)OptoRuntime::handle_exception_C);
++  __ jalr(T9);
++  __ delayed()->nop();
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap( framesize, 0 );
++
++  oop_maps->add_gc_map( __ offset(), map);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(thread, true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // V0: exception handler
++
++  // We have a handler in V0, (could be deopt blob)
++  __ move(T9, V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // Get the exception
++  __ ld(A0, Address(thread, JavaThread::exception_oop_offset()));
++  // Get the exception pc in case we are deoptimized
++  __ ld(A1, Address(thread, JavaThread::exception_pc_offset()));
++#ifdef ASSERT
++  __ sd(R0, Address(thread, JavaThread::exception_handler_pc_offset()));
++  __ sd(R0, Address(thread, JavaThread::exception_pc_offset()));
++#endif
++  // Clear the exception oop so GC no longer processes it as a root.
++  __ sd(R0, Address(thread, JavaThread::exception_oop_offset()));
++
++  // Fix seg fault when running:
++  //    Eclipse + Plugin + Debug As
++  //  This is the only condition where C2 calls SharedRuntime::generate_deopt_blob()
++  //
++  __ move(V0, A0);
++  __ move(V1, A1);
++
++  // V0: exception oop
++  // T9: exception handler
++  // A1: exception pc
++  __ jr(T9);
++  __ delayed()->nop();
++
++  // make sure all code is generated
++  masm->flush();
++
++  _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp
+--- a/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,3879 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/debugInfoRec.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nativeInst.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klass.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/vframeArray.hpp"
++#include "vmreg_mips.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++#include <alloca.h>
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
++
++class RegisterSaver {
++  enum { FPU_regs_live = 32 };
++  // Capture info about frame layout
++  enum layout {
++#define DEF_LAYOUT_OFFS(regname)  regname ## _off,  regname ## H_off,
++    DEF_LAYOUT_OFFS(for_16_bytes_aligned)
++    DEF_LAYOUT_OFFS(fpr0)
++    DEF_LAYOUT_OFFS(fpr1)
++    DEF_LAYOUT_OFFS(fpr2)
++    DEF_LAYOUT_OFFS(fpr3)
++    DEF_LAYOUT_OFFS(fpr4)
++    DEF_LAYOUT_OFFS(fpr5)
++    DEF_LAYOUT_OFFS(fpr6)
++    DEF_LAYOUT_OFFS(fpr7)
++    DEF_LAYOUT_OFFS(fpr8)
++    DEF_LAYOUT_OFFS(fpr9)
++    DEF_LAYOUT_OFFS(fpr10)
++    DEF_LAYOUT_OFFS(fpr11)
++    DEF_LAYOUT_OFFS(fpr12)
++    DEF_LAYOUT_OFFS(fpr13)
++    DEF_LAYOUT_OFFS(fpr14)
++    DEF_LAYOUT_OFFS(fpr15)
++    DEF_LAYOUT_OFFS(fpr16)
++    DEF_LAYOUT_OFFS(fpr17)
++    DEF_LAYOUT_OFFS(fpr18)
++    DEF_LAYOUT_OFFS(fpr19)
++    DEF_LAYOUT_OFFS(fpr20)
++    DEF_LAYOUT_OFFS(fpr21)
++    DEF_LAYOUT_OFFS(fpr22)
++    DEF_LAYOUT_OFFS(fpr23)
++    DEF_LAYOUT_OFFS(fpr24)
++    DEF_LAYOUT_OFFS(fpr25)
++    DEF_LAYOUT_OFFS(fpr26)
++    DEF_LAYOUT_OFFS(fpr27)
++    DEF_LAYOUT_OFFS(fpr28)
++    DEF_LAYOUT_OFFS(fpr29)
++    DEF_LAYOUT_OFFS(fpr30)
++    DEF_LAYOUT_OFFS(fpr31)
++
++    DEF_LAYOUT_OFFS(v0)
++    DEF_LAYOUT_OFFS(v1)
++    DEF_LAYOUT_OFFS(a0)
++    DEF_LAYOUT_OFFS(a1)
++    DEF_LAYOUT_OFFS(a2)
++    DEF_LAYOUT_OFFS(a3)
++    DEF_LAYOUT_OFFS(a4)
++    DEF_LAYOUT_OFFS(a5)
++    DEF_LAYOUT_OFFS(a6)
++    DEF_LAYOUT_OFFS(a7)
++    DEF_LAYOUT_OFFS(t0)
++    DEF_LAYOUT_OFFS(t1)
++    DEF_LAYOUT_OFFS(t2)
++    DEF_LAYOUT_OFFS(t3)
++    DEF_LAYOUT_OFFS(s0)
++    DEF_LAYOUT_OFFS(s1)
++    DEF_LAYOUT_OFFS(s2)
++    DEF_LAYOUT_OFFS(s3)
++    DEF_LAYOUT_OFFS(s4)
++    DEF_LAYOUT_OFFS(s5)
++    DEF_LAYOUT_OFFS(s6)
++    DEF_LAYOUT_OFFS(s7)
++    DEF_LAYOUT_OFFS(t8)
++    DEF_LAYOUT_OFFS(t9)
++
++    DEF_LAYOUT_OFFS(gp)
++    DEF_LAYOUT_OFFS(fp)
++    DEF_LAYOUT_OFFS(return)
++    reg_save_size
++  };
++
++  public:
++
++  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false );
++  static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
++  static int raOffset(void) { return return_off / 2; }
++  //Rmethod
++  static int methodOffset(void) { return s3_off / 2; }
++
++  static int v0Offset(void) { return v0_off / 2; }
++  static int v1Offset(void) { return v1_off / 2; }
++
++  static int fpResultOffset(void) { return fpr0_off / 2; }
++
++  // During deoptimization only the result register need to be restored
++  // all the other values have already been extracted.
++  static void restore_result_registers(MacroAssembler* masm);
++};
++
++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) {
++
++  // Always make the frame size 16-byte aligned
++  int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
++                                     reg_save_size*BytesPerInt, 16);
++  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
++  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
++  // The caller will allocate additional_frame_words
++  int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
++  // CodeBlob frame size is in words.
++  int frame_size_in_words = frame_size_in_bytes / wordSize;
++  *total_frame_words = frame_size_in_words;
++
++  // save registers
++
++  __ daddiu(SP, SP, - reg_save_size * jintSize);
++
++  __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize);
++  __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize);
++  __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize);
++  __ sdc1(F6, SP, fpr6_off * jintSize);  __ sdc1(F7, SP, fpr7_off * jintSize);
++  __ sdc1(F8, SP, fpr8_off * jintSize);  __ sdc1(F9, SP, fpr9_off * jintSize);
++  __ sdc1(F10, SP, fpr10_off * jintSize);  __ sdc1(F11, SP, fpr11_off * jintSize);
++  __ sdc1(F12, SP, fpr12_off * jintSize);  __ sdc1(F13, SP, fpr13_off * jintSize);
++  __ sdc1(F14, SP, fpr14_off * jintSize);  __ sdc1(F15, SP, fpr15_off * jintSize);
++  __ sdc1(F16, SP, fpr16_off * jintSize);  __ sdc1(F17, SP, fpr17_off * jintSize);
++  __ sdc1(F18, SP, fpr18_off * jintSize);  __ sdc1(F19, SP, fpr19_off * jintSize);
++  __ sdc1(F20, SP, fpr20_off * jintSize);  __ sdc1(F21, SP, fpr21_off * jintSize);
++  __ sdc1(F22, SP, fpr22_off * jintSize);  __ sdc1(F23, SP, fpr23_off * jintSize);
++  __ sdc1(F24, SP, fpr24_off * jintSize);  __ sdc1(F25, SP, fpr25_off * jintSize);
++  __ sdc1(F26, SP, fpr26_off * jintSize);  __ sdc1(F27, SP, fpr27_off * jintSize);
++  __ sdc1(F28, SP, fpr28_off * jintSize);  __ sdc1(F29, SP, fpr29_off * jintSize);
++  __ sdc1(F30, SP, fpr30_off * jintSize);  __ sdc1(F31, SP, fpr31_off * jintSize);
++  __ sd(V0, SP, v0_off * jintSize);  __ sd(V1, SP, v1_off * jintSize);
++  __ sd(A0, SP, a0_off * jintSize);  __ sd(A1, SP, a1_off * jintSize);
++  __ sd(A2, SP, a2_off * jintSize);  __ sd(A3, SP, a3_off * jintSize);
++  __ sd(A4, SP, a4_off * jintSize);  __ sd(A5, SP, a5_off * jintSize);
++  __ sd(A6, SP, a6_off * jintSize);  __ sd(A7, SP, a7_off * jintSize);
++  __ sd(T0, SP, t0_off * jintSize);
++  __ sd(T1, SP, t1_off * jintSize);
++  __ sd(T2, SP, t2_off * jintSize);
++  __ sd(T3, SP, t3_off * jintSize);
++  __ sd(S0, SP, s0_off * jintSize);
++  __ sd(S1, SP, s1_off * jintSize);
++  __ sd(S2, SP, s2_off * jintSize);
++  __ sd(S3, SP, s3_off * jintSize);
++  __ sd(S4, SP, s4_off * jintSize);
++  __ sd(S5, SP, s5_off * jintSize);
++  __ sd(S6, SP, s6_off * jintSize);
++  __ sd(S7, SP, s7_off * jintSize);
++
++  __ sd(T8, SP, t8_off * jintSize);
++  __ sd(T9, SP, t9_off * jintSize);
++
++  __ sd(GP, SP, gp_off * jintSize);
++  __ sd(FP, SP, fp_off * jintSize);
++  __ sd(RA, SP, return_off * jintSize);
++  __ daddiu(FP, SP, fp_off * jintSize);
++
++  OopMapSet *oop_maps = new OopMapSet();
++  //OopMap* map =  new OopMap( frame_words, 0 );
++  OopMap* map =  new OopMap( frame_size_in_slots, 0 );
++
++
++//#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
++#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
++  map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg());
++
++  map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg());
++
++#undef STACK_OFFSET
++  return map;
++}
++
++
++// Pop the current frame and restore all the registers that we
++// saved.
++void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
++  __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize);
++  __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize);
++  __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize);
++  __ ldc1(F6, SP, fpr6_off * jintSize);  __ ldc1(F7, SP, fpr7_off * jintSize);
++  __ ldc1(F8, SP, fpr8_off * jintSize);  __ ldc1(F9, SP, fpr9_off * jintSize);
++  __ ldc1(F10, SP, fpr10_off * jintSize);  __ ldc1(F11, SP, fpr11_off * jintSize);
++  __ ldc1(F12, SP, fpr12_off * jintSize);  __ ldc1(F13, SP, fpr13_off * jintSize);
++  __ ldc1(F14, SP, fpr14_off * jintSize);  __ ldc1(F15, SP, fpr15_off * jintSize);
++  __ ldc1(F16, SP, fpr16_off * jintSize);  __ ldc1(F17, SP, fpr17_off * jintSize);
++  __ ldc1(F18, SP, fpr18_off * jintSize);  __ ldc1(F19, SP, fpr19_off * jintSize);
++  __ ldc1(F20, SP, fpr20_off * jintSize);  __ ldc1(F21, SP, fpr21_off * jintSize);
++  __ ldc1(F22, SP, fpr22_off * jintSize);  __ ldc1(F23, SP, fpr23_off * jintSize);
++  __ ldc1(F24, SP, fpr24_off * jintSize);  __ ldc1(F25, SP, fpr25_off * jintSize);
++  __ ldc1(F26, SP, fpr26_off * jintSize);  __ ldc1(F27, SP, fpr27_off * jintSize);
++  __ ldc1(F28, SP, fpr28_off * jintSize);  __ ldc1(F29, SP, fpr29_off * jintSize);
++  __ ldc1(F30, SP, fpr30_off * jintSize);  __ ldc1(F31, SP, fpr31_off * jintSize);
++
++  __ ld(V0, SP, v0_off * jintSize);  __ ld(V1, SP, v1_off * jintSize);
++  __ ld(A0, SP, a0_off * jintSize);  __ ld(A1, SP, a1_off * jintSize);
++  __ ld(A2, SP, a2_off * jintSize);  __ ld(A3, SP, a3_off * jintSize);
++  __ ld(A4, SP, a4_off * jintSize);  __ ld(A5, SP, a5_off * jintSize);
++  __ ld(A6, SP, a6_off * jintSize);  __ ld(A7, SP, a7_off * jintSize);
++  __ ld(T0, SP, t0_off * jintSize);
++  __ ld(T1, SP, t1_off * jintSize);
++  __ ld(T2, SP, t2_off * jintSize);
++  __ ld(T3, SP, t3_off * jintSize);
++  __ ld(S0, SP, s0_off * jintSize);
++  __ ld(S1, SP, s1_off * jintSize);
++  __ ld(S2, SP, s2_off * jintSize);
++  __ ld(S3, SP, s3_off * jintSize);
++  __ ld(S4, SP, s4_off * jintSize);
++  __ ld(S5, SP, s5_off * jintSize);
++  __ ld(S6, SP, s6_off * jintSize);
++  __ ld(S7, SP, s7_off * jintSize);
++
++  __ ld(T8, SP, t8_off * jintSize);
++  __ ld(T9, SP, t9_off * jintSize);
++
++  __ ld(GP, SP, gp_off * jintSize);
++  __ ld(FP, SP, fp_off * jintSize);
++  __ ld(RA, SP, return_off * jintSize);
++
++  __ addiu(SP, SP, reg_save_size * jintSize);
++}
++
++// Pop the current frame and restore the registers that might be holding
++// a result.
++void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
++
++  // Just restore result register. Only used by deoptimization. By
++  // now any callee save register that needs to be restore to a c2
++  // caller of the deoptee has been extracted into the vframeArray
++  // and will be stuffed into the c2i adapter we create for later
++  // restoration so only result registers need to be restored here.
++
++  __ ld(V0, SP, v0_off * jintSize);
++  __ ld(V1, SP, v1_off * jintSize);
++  __ ldc1(F0, SP, fpr0_off * jintSize);
++  __ ldc1(F1, SP, fpr1_off * jintSize);
++  __ addiu(SP, SP, return_off * jintSize);
++}
++
++// Is vector's size (in bytes) bigger than a size saved by default?
++// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
++bool SharedRuntime::is_wide_vector(int size) {
++  return size > 16;
++}
++
++size_t SharedRuntime::trampoline_size() {
++  return 32;
++}
++
++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
++  // trampoline is not in CodeCache
++  __ set64(T9, (long)destination);
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++// The java_calling_convention describes stack locations as ideal slots on
++// a frame with no abi restrictions. Since we must observe abi restrictions
++// (like the placement of the register window) the slots must be biased by
++// the following value.
++
++static int reg2offset_in(VMReg r) {
++  // Account for saved fp and return address
++  // This should really be in_preserve_stack_slots
++  return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size;  // + 2 * VMRegImpl::stack_slot_size);
++}
++
++static int reg2offset_out(VMReg r) {
++  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++}
++
++// ---------------------------------------------------------------------------
++// Read the array of BasicTypes from a signature, and compute where the
++// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
++// quantities.  Values less than SharedInfo::stack0 are registers, those above
++// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
++// as framesizes are fixed.
++// VMRegImpl::stack0 refers to the first slot 0(sp).
++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
++// up to RegisterImpl::number_of_registers) are the 32-bit
++// integer registers.
++
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++// Note: the INPUTS in sig_bt are in units of Java argument words, which are
++// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
++// units regardless of build.
++
++
++// ---------------------------------------------------------------------------
++// The compiled Java calling convention.
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
++                                           VMRegPair *regs,
++                                           int total_args_passed,
++                                           int is_outgoing) {
++
++  // Create the mapping between argument positions and registers.
++  static const Register INT_ArgReg[Argument::n_register_parameters] = {
++    T0, A0, A1, A2, A3, A4, A5, A6
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    F12, F13, F14, F15, F16, F17, F18, F19
++  };
++
++  uint args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID:
++      // halves of T_LONG or T_DOUBLE
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return round_to(stk_args, 2);
++}
++
++// Patch the callers callsite with entry to compiled code if it exists.
++static void patch_callers_callsite(MacroAssembler *masm) {
++  Label L;
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++  __ beq(AT, R0, L);
++  __ delayed()->nop();
++  // Schedule the branch target address early.
++  // Call into the VM to patch the caller, then jump to compiled callee
++  // V0 isn't live so capture return address while we easily can
++  __ move(V0, RA);
++
++  __ pushad();
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // VM needs caller's callsite
++  // VM needs target method
++
++  __ move(A0, Rmethod);
++  __ move(A1, V0);
++  // we should preserve the return address
++  __ move(TSR, SP);
++  __ move(AT, -(StackAlignmentInBytes));   // align the stack
++  __ andr(SP, SP, AT);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
++          relocInfo::runtime_call_type);
++
++  __ delayed()->nop();
++  __ move(SP, TSR);
++  __ popad();
++  __ bind(L);
++}
++
++static void gen_c2i_adapter(MacroAssembler *masm,
++                            int total_args_passed,
++                            int comp_args_on_stack,
++                            const BasicType *sig_bt,
++                            const VMRegPair *regs,
++                            Label& skip_fixup) {
++
++  // Before we get into the guts of the C2I adapter, see if we should be here
++  // at all.  We've come from compiled code and are attempting to jump to the
++  // interpreter, which means the caller made a static call to get here
++  // (vcalls always get a compiled target if there is one).  Check for a
++  // compiled target.  If there is one, we need to patch the caller's call.
++  // However we will run interpreted if we come thru here. The next pass
++  // thru the call site will run compiled. If we ran compiled here then
++  // we can (theorectically) do endless i2c->c2i->i2c transitions during
++  // deopt/uncommon trap cycles. If we always go interpreted here then
++  // we can have at most one and don't need to play any tricks to keep
++  // from endlessly growing the stack.
++  //
++  // Actually if we detected that we had an i2c->c2i transition here we
++  // ought to be able to reset the world back to the state of the interpreted
++  // call and not bother building another interpreter arg area. We don't
++  // do that at this point.
++
++  patch_callers_callsite(masm);
++  __ bind(skip_fixup);
++
++#ifdef COMPILER2
++  __ empty_FPU_stack();
++#endif
++  //this is for native ?
++  // Since all args are passed on the stack, total_args_passed * interpreter_
++  // stack_element_size  is the
++  // space we need.
++  int extraspace = total_args_passed * Interpreter::stackElementSize;
++
++  // stack is aligned, keep it that way
++  extraspace = round_to(extraspace, 2*wordSize);
++
++  // Get return address
++  __ move(V0, RA);
++  // set senderSP value
++  //refer to interpreter_mips.cpp:generate_asm_entry
++  __ move(Rsender, SP);
++  __ addiu(SP, SP, -extraspace);
++
++  // Now write the args into the outgoing interpreter space
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // st_off points to lowest address on stack.
++    int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
++    // Say 4 args:
++    // i   st_off
++    // 0   12 T_LONG
++    // 1    8 T_VOID
++    // 2    4 T_OBJECT
++    // 3    0 T_BOOL
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // memory to memory use fpu stack top
++      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
++      if (!r_2->is_valid()) {
++        __ ld_ptr(AT, SP, ld_off);
++        __ st_ptr(AT, SP, st_off);
++
++      } else {
++
++
++        int next_off = st_off - Interpreter::stackElementSize;
++        __ ld_ptr(AT, SP, ld_off);
++        __ st_ptr(AT, SP, st_off);
++
++        // Ref to is_Register condition
++        if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ st_ptr(AT, SP, st_off - 8);
++      }
++    } else if (r_1->is_Register()) {
++      Register r = r_1->as_Register();
++      if (!r_2->is_valid()) {
++          __ sd(r, SP, st_off);
++      } else {
++        //FIXME, mips will not enter here
++        // long/double in gpr
++        __ sd(r, SP, st_off);
++        // In [java/util/zip/ZipFile.java]
++        //
++        //    private static native long open(String name, int mode, long lastModified);
++        //    private static native int getTotal(long jzfile);
++        //
++        // We need to transfer T_LONG paramenters from a compiled method to a native method.
++        // It's a complex process:
++        //
++        // Caller -> lir_static_call -> gen_resolve_stub
++        //      -> -- resolve_static_call_C
++        //         `- gen_c2i_adapter()  [*]
++        //             |
++        //       `- AdapterHandlerLibrary::get_create_apapter_index
++        //      -> generate_native_entry
++        //      -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
++        //
++        // In [**], T_Long parameter is stored in stack as:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // However, the sequence is reversed here:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
++        //
++        if (sig_bt[i] == T_LONG)
++          __ sd(r, SP, st_off - 8);
++      }
++    } else if (r_1->is_FloatRegister()) {
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++        __ swc1(fr, SP, st_off);
++      else {
++        __ sdc1(fr, SP, st_off);
++        __ sdc1(fr, SP, st_off - 8);  // T_DOUBLE needs two slots
++      }
++    }
++  }
++
++  // Schedule the branch target address early.
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) );
++  // And repush original return address
++  __ move(RA, V0);
++  __ jr (AT);
++  __ delayed()->nop();
++}
++
++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
++                                    int total_args_passed,
++                                    int comp_args_on_stack,
++                                    const BasicType *sig_bt,
++                                    const VMRegPair *regs) {
++
++  // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
++  // layout.  Lesp was saved by the calling I-frame and will be restored on
++  // return.  Meanwhile, outgoing arg space is all owned by the callee
++  // C-frame, so we can mangle it at will.  After adjusting the frame size,
++  // hoist register arguments and repack other args according to the compiled
++  // code convention.  Finally, end in a jump to the compiled code.  The entry
++  // point address is the start of the buffer.
++
++  // We will only enter here from an interpreted frame and never from after
++  // passing thru a c2i. Azul allowed this but we do not. If we lose the
++  // race and use a c2i we will remain interpreted for the race loser(s).
++  // This removes all sorts of headaches on the mips side and also eliminates
++  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
++
++
++  __ move(T9, SP);
++
++  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
++  // in registers, we will occasionally have no stack args.
++  int comp_words_on_stack = 0;
++  if (comp_args_on_stack) {
++    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
++    // registers are below.  By subtracting stack0, we either get a negative
++    // number (all values in registers) or the maximum stack slot accessed.
++    // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
++    // Convert 4-byte stack slots to words.
++    comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
++    // Round up to miminum stack alignment, in wordSize
++    comp_words_on_stack = round_to(comp_words_on_stack, 2);
++    __ daddiu(SP, SP, -comp_words_on_stack * wordSize);
++  }
++
++  // Align the outgoing SP
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  // push the return address on the stack (note that pushing, rather
++  // than storing it, yields the correct frame alignment for the callee)
++  // Put saved SP in another register
++  const Register saved_sp = V0;
++  __ move(saved_sp, T9);
++
++
++  // Will jump to the compiled code just as if compiled code was doing it.
++  // Pre-load the register-jump target early, to schedule it better.
++  __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset()));
++
++  // Now generate the shuffle code.  Pick up all register args and move the
++  // rest through the floating point stack top.
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      // Longs and doubles are passed in native word order, but misaligned
++      // in the 32-bit build.
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // Pick up 0, 1 or 2 words from SP+offset.
++
++    //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
++    // Load in argument order going down.
++    int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
++    // Point to interpreter value (vs. tag)
++    int next_off = ld_off - Interpreter::stackElementSize;
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // Convert stack slot to an SP offset (+ wordSize to
++      // account for return address )
++      // NOTICE HERE!!!! I sub a wordSize here
++      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
++      //+ wordSize;
++
++      if (!r_2->is_valid()) {
++        __ ld(AT, saved_sp, ld_off);
++        __ sd(AT, SP, st_off);
++      } else {
++        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
++        // are accessed as negative so LSW is at LOW address
++
++        // ld_off is MSW so get LSW
++        // st_off is LSW (i.e. reg.first())
++
++        // [./org/eclipse/swt/graphics/GC.java]
++        // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
++        //  int destX, int destY, int destWidth, int destHeight,
++        //  boolean simple,
++        //  int imgWidth, int imgHeight,
++        //  long maskPixmap,  <-- Pass T_LONG in stack
++        //  int maskType);
++        // Before this modification, Eclipse displays icons with solid black background.
++        //
++        __ ld(AT, saved_sp, ld_off);
++        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ ld(AT, saved_sp, ld_off - 8);
++        __ sd(AT, SP, st_off);
++      }
++    } else if (r_1->is_Register()) {  // Register argument
++      Register r = r_1->as_Register();
++      if (r_2->is_valid()) {
++        // Remember r_1 is low address (and LSB on mips)
++        // So r_2 gets loaded from high address regardless of the platform
++        assert(r_2->as_Register() == r_1->as_Register(), "");
++        __ ld(r, saved_sp, ld_off);
++
++        //
++        // For T_LONG type, the real layout is as below:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // We should load the low-8 bytes.
++        //
++        if (sig_bt[i] == T_LONG)
++          __ ld(r, saved_sp, ld_off - 8);
++      } else {
++        __ lw(r, saved_sp, ld_off);
++      }
++    } else if (r_1->is_FloatRegister()) { // Float Register
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++          __ lwc1(fr, saved_sp, ld_off);
++      else {
++          __ ldc1(fr, saved_sp, ld_off);
++          __ ldc1(fr, saved_sp, ld_off - 8);
++      }
++    }
++  }
++
++  // 6243940 We might end up in handle_wrong_method if
++  // the callee is deoptimized as we race thru here. If that
++  // happens we don't want to take a safepoint because the
++  // caller frame will look interpreted and arguments are now
++  // "compiled" so it is much better to make this transition
++  // invisible to the stack walking code. Unfortunately if
++  // we try and find the callee by normal means a safepoint
++  // is possible. So we stash the desired callee in the thread
++  // and the vm will find there should this case occur.
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ sd(Rmethod, thread, in_bytes(JavaThread::callee_target_offset()));
++
++  // move methodOop to V0 in case we end up in an c2i adapter.
++  // the c2i adapters expect methodOop in V0 (c2) because c2's
++  // resolve stubs return the result (the method) in V0.
++  // I'd love to fix this.
++  __ move(V0, Rmethod);
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++// ---------------------------------------------------------------
++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
++                                                            int total_args_passed,
++                                                            int comp_args_on_stack,
++                                                            const BasicType *sig_bt,
++                                                            const VMRegPair *regs,
++                                                            AdapterFingerPrint* fingerprint) {
++  address i2c_entry = __ pc();
++
++  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
++
++  // -------------------------------------------------------------------------
++  // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The
++  // args start out packed in the compiled layout.  They need to be unpacked
++  // into the interpreter layout.  This will almost always require some stack
++  // space.  We grow the current (compiled) stack, then repack the args.  We
++  // finally end in a jump to the generic interpreter entry point.  On exit
++  // from the interpreter, the interpreter will restore our SP (lest the
++  // compiled code, which relys solely on SP and not FP, get sick).
++
++  address c2i_unverified_entry = __ pc();
++  Label skip_fixup;
++  {
++    Register holder = T1;
++    Register receiver = T0;
++    Register temp = T8;
++    address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++    Label missed;
++
++    //add for compressedoops
++    __ load_klass(temp, receiver);
++
++    __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
++    __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset());
++    __ bne(AT, temp, missed);
++    __ delayed()->nop();
++    // Method might have been compiled since the call site was patched to
++    // interpreted if that is the case treat it as a miss so we can get
++    // the call site corrected.
++    __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++    __ beq(AT, R0, skip_fixup);
++    __ delayed()->nop();
++    __ bind(missed);
++
++    __ jmp(ic_miss, relocInfo::runtime_call_type);
++    __ delayed()->nop();
++  }
++
++  address c2i_entry = __ pc();
++
++  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
++
++  __ flush();
++  return  AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
++}
++
++int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
++                                         VMRegPair *regs,
++                                         VMRegPair *regs2,
++                                         int total_args_passed) {
++  assert(regs2 == NULL, "not needed on MIPS");
++  // Return the number of VMReg stack_slots needed for the args.
++  // This value does not include an abi space (like register window
++  // save area).
++
++  // We return the amount of VMReg stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots. Since we always
++  // have space for storing at least 6 registers to memory we start with that.
++  // See int_stk_helper for a further discussion.
++  // We return the amount of VMRegImpl stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots.
++  static const Register INT_ArgReg[Argument::n_register_parameters] = {
++    A0, A1, A2, A3, A4, A5, A6, A7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    F12, F13, F14, F15, F16, F17, F18, F19
++  };
++  uint args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++// Example:
++//    n   java.lang.UNIXProcess::forkAndExec
++//     private native int forkAndExec(byte[] prog,
++//                                    byte[] argBlock, int argc,
++//                                    byte[] envBlock, int envc,
++//                                    byte[] dir,
++//                                    boolean redirectErrorStream,
++//                                    FileDescriptor stdin_fd,
++//                                    FileDescriptor stdout_fd,
++//                                    FileDescriptor stderr_fd)
++// JNIEXPORT jint JNICALL
++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
++//                                        jobject process,
++//                                        jbyteArray prog,
++//                                        jbyteArray argBlock, jint argc,
++//                                        jbyteArray envBlock, jint envc,
++//                                        jbyteArray dir,
++//                                        jboolean redirectErrorStream,
++//                                        jobject stdin_fd,
++//                                        jobject stdout_fd,
++//                                        jobject stderr_fd)
++//
++// ::c_calling_convention
++//  0:      // env                 <--       a0
++//  1: L    // klass/obj           <-- t0 => a1
++//  2: [    // prog[]              <-- a0 => a2
++//  3: [    // argBlock[]          <-- a1 => a3
++//  4: I    // argc                <-- a2 => a4
++//  5: [    // envBlock[]          <-- a3 => a5
++//  6: I    // envc                <-- a4 => a5
++//  7: [    // dir[]               <-- a5 => a7
++//  8: Z    // redirectErrorStream <-- a6 => sp[0]
++//  9: L    // stdin               fp[16] => sp[8]
++// 10: L    // stdout              fp[24] => sp[16]
++// 11: L    // stderr              fp[32] => sp[24]
++//
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID: // Halves of longs and doubles
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++    case T_METADATA:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return round_to(stk_args, 2);
++}
++
++// ---------------------------------------------------------------------------
++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ swc1(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ sdc1(FSF, FP, -wordSize );
++      break;
++    case T_VOID:  break;
++    case T_LONG:
++      __ sd(V0, FP, -wordSize);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ sd(V0, FP, -wordSize);
++      break;
++    default: {
++      __ sw(V0, FP, -wordSize);
++      }
++  }
++}
++
++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ lwc1(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ ldc1(FSF, FP, -wordSize );
++      break;
++    case T_LONG:
++      __ ld(V0, FP, -wordSize);
++      break;
++    case T_VOID:  break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ ld(V0, FP, -wordSize);
++      break;
++    default: {
++      __ lw(V0, FP, -wordSize);
++      }
++  }
++}
++
++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      __ push(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ push(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
++    if (args[i].first()->is_Register()) {
++      __ pop(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ pop(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++// A simple move of integer like type
++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ lw(AT, FP, reg2offset_in(src.first()));
++      __ sd(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      // stack to reg
++      __ lw(dst.first()->as_Register(),  FP, reg2offset_in(src.first()));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
++  } else {
++    if (dst.first() != src.first()){
++      __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
++    }
++  }
++}
++
++// An oop arg. Must pass a handle not the oop itself
++static void object_move(MacroAssembler* masm,
++                        OopMap* map,
++                        int oop_handle_offset,
++                        int framesize_in_slots,
++                        VMRegPair src,
++                        VMRegPair dst,
++                        bool is_receiver,
++                        int* receiver_offset) {
++
++  // must pass a handle. First figure out the location we use as a handle
++
++  //FIXME, for mips, dst can be register
++  if (src.first()->is_stack()) {
++    // Oop is already on the stack as an argument
++    Register rHandle = V0;
++    Label nil;
++    __ xorr(rHandle, rHandle, rHandle);
++    __ ld(AT, FP, reg2offset_in(src.first()));
++    __ beq(AT, R0, nil);
++    __ delayed()->nop();
++    __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
++    __ bind(nil);
++    if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move( (dst.first())->as_Register(), rHandle);
++    //if dst is register
++    //FIXME, do mips need out preserve stack slots?
++    int offset_in_older_frame = src.first()->reg2stack()
++      + SharedRuntime::out_preserve_stack_slots();
++    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
++    if (is_receiver) {
++      *receiver_offset = (offset_in_older_frame
++          + framesize_in_slots) * VMRegImpl::stack_slot_size;
++    }
++  } else {
++    // Oop is in an a register we must store it to the space we reserve
++    // on the stack for oop_handles
++    const Register rOop = src.first()->as_Register();
++    assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
++    const Register rHandle = V0;
++    //Important: refer to java_calling_convertion
++    int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
++    int offset = oop_slot*VMRegImpl::stack_slot_size;
++    Label skip;
++    __ sd( rOop , SP, offset );
++    map->set_oop(VMRegImpl::stack2reg(oop_slot));
++    __ xorr( rHandle, rHandle, rHandle);
++    __ beq(rOop, R0, skip);
++    __ delayed()->nop();
++    __ lea(rHandle, Address(SP, offset));
++    __ bind(skip);
++    // Store the handle parameter
++    if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move((dst.first())->as_Register(), rHandle);
++    //if dst is register
++
++    if (is_receiver) {
++      *receiver_offset = offset;
++    }
++  }
++}
++
++// A float arg may have to do float reg int reg conversion
++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
++
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      __ lw(AT, FP, reg2offset_in(src.first()));
++      __ sw(AT, SP, reg2offset_out(dst.first()));
++    }
++    else
++      __ lwc1(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
++  } else {
++    // reg to stack
++    if(dst.first()->is_stack())
++      __ swc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    else
++      __ mov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++  }
++}
++
++// A long move
++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibility for a long_move VMRegPair is:
++  // 1: two stack slots (possibly unaligned)
++  // as neither the java  or C calling convention will use registers
++  // for longs.
++
++  if (src.first()->is_stack()) {
++    assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
++    if( dst.first()->is_stack()){
++      __ ld(AT, FP, reg2offset_in(src.first()));
++      __ sd(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
++    }
++  } else {
++    if( dst.first()->is_stack()){
++      __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
++    } else {
++      __ move( (dst.first())->as_Register() , (src.first())->as_Register());
++    }
++  }
++}
++
++// A double move
++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibilities for a double_move VMRegPair are:
++  // The painful thing here is that like long_move a VMRegPair might be
++
++  // Because of the calling convention we know that src is either
++  //   1: a single physical register (xmm registers only)
++  //   2: two stack slots (possibly unaligned)
++  // dst can only be a pair of stack slots.
++
++
++  if (src.first()->is_stack()) {
++    // source is all stack
++    if( dst.first()->is_stack()){
++      __ ld(AT, FP, reg2offset_in(src.first()));
++      __ sd(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
++    }
++
++  } else {
++    // reg to stack
++    // No worries about stack alignment
++    if( dst.first()->is_stack()){
++      __ sdc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    }
++    else
++      __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++
++  }
++}
++
++static void verify_oop_args(MacroAssembler* masm,
++                            methodHandle method,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {
++  Register temp_reg = T9;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < method->size_of_parameters(); i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
++  }
++}
++
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 methodHandle method,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {
++  verify_oop_args(masm, method, sig_bt, regs);
++  vmIntrinsics::ID iid = method->intrinsic_id();
++
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
++  if (ref_kind != 0) {
++    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
++    member_reg = S3;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (iid == vmIntrinsics::_invokeBasic) {
++    has_receiver = true;
++  } else {
++    fatal("unexpected intrinsic id %d", iid);
++  }
++
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
++    VMReg r = regs[member_arg_pos].first();
++    if (r->is_stack()) {
++      __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
++
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(method->size_of_parameters() > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      fatal("receiver always in a register");
++      receiver_reg = SSR;  // known to be free at this point
++      __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
++
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, iid,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
++
++// ---------------------------------------------------------------------------
++// Generate a native wrapper for a given method.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// convention (handlizes oops, etc), transitions to native, makes the call,
++// returns to java state (possibly blocking), unhandlizes any result and
++// returns.
++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
++                                                const methodHandle& method,
++                                                int compile_id,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
++                                                BasicType ret_type,
++                                                address critical_entry) {
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
++    // Make enough room for patch_verified_entry
++    __ nop();
++    __ nop();
++    gen_special_dispatch(masm,
++                         method,
++                         in_sig_bt,
++                         in_regs);
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
++  bool is_critical_native = true;
++  address native_func = critical_entry;
++  if (native_func == NULL) {
++    native_func = method->native_function();
++    is_critical_native = false;
++  }
++  assert(native_func != NULL, "must have function");
++
++  // Native nmethod wrappers never take possesion of the oop arguments.
++  // So the caller will gc the arguments. The only thing we need an
++  // oopMap for is if the call is static
++  //
++  // An OopMap for lock (and class if static), and one for the VM call itself
++  OopMapSet *oop_maps = new OopMapSet();
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the jni function will expect them. To figure out where they go
++  // we convert the java signature to a C signature by inserting
++  // the hidden arguments as arg[0] and possibly arg[1] (static method)
++
++  const int total_in_args = method->size_of_parameters();
++  int total_c_args = total_in_args;
++  if (!is_critical_native) {
++    total_c_args += 1;
++    if (method->is_static()) {
++      total_c_args++;
++    }
++  } else {
++    for (int i = 0; i < total_in_args; i++) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        total_c_args++;
++      }
++    }
++  }
++
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
++  BasicType* in_elem_bt = NULL;
++
++  int argc = 0;
++  if (!is_critical_native) {
++    out_sig_bt[argc++] = T_ADDRESS;
++    if (method->is_static()) {
++      out_sig_bt[argc++] = T_OBJECT;
++    }
++
++    for (int i = 0; i < total_in_args ; i++ ) {
++      out_sig_bt[argc++] = in_sig_bt[i];
++    }
++  } else {
++    Thread* THREAD = Thread::current();
++    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
++    SignatureStream ss(method->signature());
++    for (int i = 0; i < total_in_args ; i++ ) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        // Arrays are passed as int, elem* pair
++        out_sig_bt[argc++] = T_INT;
++        out_sig_bt[argc++] = T_ADDRESS;
++        Symbol* atype = ss.as_symbol(CHECK_NULL);
++        const char* at = atype->as_C_string();
++        if (strlen(at) == 2) {
++          assert(at[0] == '[', "must be");
++          switch (at[1]) {
++            case 'B': in_elem_bt[i]  = T_BYTE; break;
++            case 'C': in_elem_bt[i]  = T_CHAR; break;
++            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
++            case 'F': in_elem_bt[i]  = T_FLOAT; break;
++            case 'I': in_elem_bt[i]  = T_INT; break;
++            case 'J': in_elem_bt[i]  = T_LONG; break;
++            case 'S': in_elem_bt[i]  = T_SHORT; break;
++            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
++            default: ShouldNotReachHere();
++          }
++        }
++      } else {
++        out_sig_bt[argc++] = in_sig_bt[i];
++        in_elem_bt[i] = T_VOID;
++      }
++      if (in_sig_bt[i] != T_VOID) {
++        assert(in_sig_bt[i] == ss.type(), "must match");
++        ss.next();
++      }
++    }
++  }
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++  //
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Compute framesize for the wrapper.  We need to handlize all oops in
++  // registers. We must create space for them here that is disjoint from
++  // the windowed save area because we have no control over when we might
++  // flush the window again and overwrite values that gc has since modified.
++  // (The live window race)
++  //
++  // We always just allocate 6 word for storing down these object. This allow
++  // us to simply record the base and use the Ireg number to decide which
++  // slot to use. (Note that the reg number is the inbound number not the
++  // outbound number).
++  // We must shuffle args to match the native convention, and include var-args space.
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Now the space for the inbound oop handle area
++  int total_save_slots = 9 * VMRegImpl::slots_per_word;  // 9 arguments passed in registers
++  if (is_critical_native) {
++    // Critical natives may have to call out so they need a save area
++    // for register arguments.
++    int double_slots = 0;
++    int single_slots = 0;
++    for ( int i = 0; i < total_in_args; i++) {
++      if (in_regs[i].first()->is_Register()) {
++        const Register reg = in_regs[i].first()->as_Register();
++        switch (in_sig_bt[i]) {
++          case T_BOOLEAN:
++          case T_BYTE:
++          case T_SHORT:
++          case T_CHAR:
++          case T_INT:  single_slots++; break;
++          case T_ARRAY:
++          case T_LONG: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      } else if (in_regs[i].first()->is_FloatRegister()) {
++        switch (in_sig_bt[i]) {
++          case T_FLOAT:  single_slots++; break;
++          case T_DOUBLE: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      }
++    }
++    total_save_slots = double_slots * 2 + single_slots;
++    // align the save area
++    if (double_slots != 0) {
++      stack_slots = round_to(stack_slots, 2);
++    }
++  }
++
++  int oop_handle_offset = stack_slots;
++  stack_slots += total_save_slots;
++
++  // Now any space we need for handlizing a klass if static method
++
++  int klass_slot_offset = 0;
++  int klass_offset = -1;
++  int lock_slot_offset = 0;
++  bool is_static = false;
++
++  if (method->is_static()) {
++    klass_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
++    is_static = true;
++  }
++
++  // Plus a lock if needed
++
++  if (method->is_synchronized()) {
++    lock_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++  }
++
++  // Now a place to save return value or as a temporary for any gpr -> fpr moves
++  // + 2 for return address (which we own) and saved fp
++  stack_slots += 2 + 9 * VMRegImpl::slots_per_word;  // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | 2 slots for moves   |
++  //      |---------------------|
++  //      | lock box (if sync)  |
++  //      |---------------------| <- lock_slot_offset
++  //      | klass (if static)   |
++  //      |---------------------| <- klass_slot_offset
++  //      | oopHandle area      |
++  //      |---------------------| <- oop_handle_offset
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      | vararg area         |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++
++  intptr_t start = (intptr_t)__ pc();
++
++
++
++  // First thing make an ic check to see if we should even be here
++  address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++  // We are free to use all registers as temps without saving them and
++  // restoring them except fp. fp is the only callee save register
++  // as far as the interpreter and the compiler(s) are concerned.
++
++  //refer to register_mips.hpp:IC_Klass
++  const Register ic_reg = T1;
++  const Register receiver = T0;
++
++  Label hit;
++  Label exception_pending;
++
++  __ verify_oop(receiver);
++  //add for compressedoops
++  __ load_klass(T9, receiver);
++  __ beq(T9, ic_reg, hit);
++  __ delayed()->nop();
++  __ jmp(ic_miss, relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  __ bind(hit);
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++
++  // Make enough room for patch_verified_entry
++  __ nop();
++  __ nop();
++
++  // Generate stack overflow check
++  if (UseStackBanging) {
++    __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size());
++  }
++
++  // Generate a new frame for the wrapper.
++  // do mips need this ?
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++
++  __ enter();
++  // -2 because return address is already present and so is saved fp
++  __ addiu(SP, SP, -1 * (stack_size - 2*wordSize));
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++  // Calculate the difference between sp and fp. We need to know it
++  // after the native call because on windows Java Natives will pop
++  // the arguments and it is painful to do sp relative addressing
++  // in a platform independent way. So after the call we switch to
++  // fp relative addressing.
++  //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change
++  //the SP
++  int fp_adjustment = stack_size - 2*wordSize;
++
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // Compute the fp offset for any slots used after the jni call
++
++  int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
++  // We use TREG as a thread pointer because it is callee save and
++  // if we load it once it is usable thru the entire wrapper
++  const Register thread = TREG;
++
++  // We use S4 as the oop handle for the receiver/klass
++  // It is callee save so it survives the call to native
++
++  const Register oop_handle_reg = S4;
++  if (is_critical_native) {
++    Unimplemented();
++    // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
++    //                                   oop_handle_offset, oop_maps, in_regs, in_sig_bt);
++  }
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  //
++  // We immediately shuffle the arguments so that any vm call we have to
++  // make from here on out (sync slow path, jvmpi, etc.) we will have
++  // captured the oops from our caller and have a valid oopMap for
++  // them.
++
++  // -----------------
++  // The Grand Shuffle
++  //
++  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
++  // and, if static, the class mirror instead of a receiver.  This pretty much
++  // guarantees that register layout will not match (and mips doesn't use reg
++  // parms though amd does).  Since the native abi doesn't use register args
++  // and the java conventions does we don't have to worry about collisions.
++  // All of our moved are reg->stack or stack->stack.
++  // We ignore the extra arguments during the shuffle and handle them at the
++  // last moment. The shuffle is described by the two calling convention
++  // vectors we have in our possession. We simply walk the java vector to
++  // get the source locations and the c vector to get the destinations.
++
++  int c_arg = method->is_static() ? 2 : 1 ;
++
++  // Record sp-based slot for receiver on stack for non-static methods
++  int receiver_offset = -1;
++
++  // This is a trick. We double the stack slots so we can claim
++  // the oops in the caller's frame. Since we are sure to have
++  // more args than the caller doubling is enough to make
++  // sure we can capture all the incoming oop args from the
++  // caller.
++  //
++  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++
++  // Mark location of fp (someday)
++  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp));
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  // This may iterate in two different directions depending on the
++  // kind of native it is.  The reason is that for regular JNI natives
++  // the incoming and outgoing registers are offset upwards and for
++  // critical natives they are offset down.
++  GrowableArray<int> arg_order(2 * total_in_args);
++  VMRegPair tmp_vmreg;
++  tmp_vmreg.set2(T8->as_VMReg());
++
++  if (!is_critical_native) {
++    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
++      arg_order.push(i);
++      arg_order.push(c_arg);
++    }
++  } else {
++    // Compute a valid move order, using tmp_vmreg to break any cycles
++    Unimplemented();
++    // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
++  }
++
++  int temploc = -1;
++  for (int ai = 0; ai < arg_order.length(); ai += 2) {
++    int i = arg_order.at(ai);
++    int c_arg = arg_order.at(ai + 1);
++    __ block_comment(err_msg("move %d -> %d", i, c_arg));
++    if (c_arg == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // This arg needs to be moved to a temporary
++      __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
++      in_regs[i] = tmp_vmreg;
++      temploc = i;
++      continue;
++    } else if (i == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // Read from the temporary location
++      assert(temploc != -1, "must be valid");
++      i = temploc;
++      temploc = -1;
++    }
++#ifdef ASSERT
++    if (in_regs[i].first()->is_Register()) {
++      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
++    } else if (in_regs[i].first()->is_FloatRegister()) {
++      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
++    }
++    if (out_regs[c_arg].first()->is_Register()) {
++      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++    }
++#endif /* ASSERT */
++    switch (in_sig_bt[i]) {
++      case T_ARRAY:
++        if (is_critical_native) {
++          Unimplemented();
++          // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
++          c_arg++;
++#ifdef ASSERT
++          if (out_regs[c_arg].first()->is_Register()) {
++            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++          }
++#endif
++          break;
++        }
++      case T_OBJECT:
++        assert(!is_critical_native, "no oop arguments");
++        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
++                    ((i == 0) && (!is_static)),
++                    &receiver_offset);
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        float_move(masm, in_regs[i], out_regs[c_arg]);
++          break;
++
++      case T_DOUBLE:
++        assert( i + 1 < total_in_args &&
++                in_sig_bt[i + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        double_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_LONG :
++        long_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        simple_move32(masm, in_regs[i], out_regs[c_arg]);
++    }
++  }
++
++  // point c_arg at the first arg that is already loaded in case we
++  // need to spill before we call out
++  c_arg = total_c_args - total_in_args;
++  // Pre-load a static method's oop.  Used both by locking code and
++  // the normal JNI call code.
++
++  __ move(oop_handle_reg, A1);
++
++  if (method->is_static() && !is_critical_native) {
++
++    //  load opp into a register
++    int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
++          (method->method_holder())->java_mirror()));
++
++
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    __ relocate(rspec);
++    __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
++    // Now handlize the static class mirror it's known not-null.
++    __ sd( oop_handle_reg, SP, klass_offset);
++    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
++
++    // Now get the handle
++    __ lea(oop_handle_reg, Address(SP, klass_offset));
++    // store the klass handle as second argument
++    __ move(A1, oop_handle_reg);
++    // and protect the arg if we must spill
++    c_arg--;
++  }
++
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
++  // points into the right code segment. It does not have to be the correct return pc.
++  // We use the same pc/oopMap repeatedly when we call out
++
++  intptr_t the_pc = (intptr_t) __ pc();
++  oop_maps->add_gc_map(the_pc - start, map);
++
++  __ set_last_Java_frame(SP, noreg, NULL);
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)the_pc ;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++
++
++  // We have all of the arguments setup at this point. We must not touch any register
++  // argument registers at this point (what if we save/restore them there are no oop?
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    save_args(masm, total_c_args, c_arg, out_regs);
++    int metadata_index = __ oop_recorder()->find_index(method());
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_set48(AT, (long)(method()));
++
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++      thread, AT);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++  }
++
++  // These are register definitions we need for locking/unlocking
++  const Register swap_reg = T8;  // Must use T8 for cmpxchg instruction
++  const Register obj_reg  = T9;  // Will contain the oop
++  //const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
++  const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
++
++
++
++  Label slow_path_lock;
++  Label lock_done;
++
++  // Lock a synchronized method
++  if (method->is_synchronized()) {
++    assert(!is_critical_native, "unhandled");
++
++    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
++
++    // Get the handle (the 2nd argument)
++    __ move(oop_handle_reg, A1);
++
++    // Get address of the box
++    __ lea(lock_reg, Address(FP, lock_slot_fp_offset));
++
++    // Load the oop from the handle
++    __ ld(obj_reg, oop_handle_reg, 0);
++
++    if (UseBiasedLocking) {
++      // Note that oop_handle_reg is trashed during this call
++      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock);
++    }
++
++    // Load immediate 1 into swap_reg %T8
++    __ move(swap_reg, 1);
++
++    __ ld(AT, obj_reg, 0);
++    __ orr(swap_reg, swap_reg, AT);
++
++    __ sd(swap_reg, lock_reg, mark_word_offset);
++    __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done);
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) sp <= mark < mark + os::pagesize()
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg
++
++    __ dsubu(swap_reg, swap_reg, SP);
++    __ move(AT, 3 - os::vm_page_size());
++    __ andr(swap_reg , swap_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    __ sd(swap_reg, lock_reg, mark_word_offset);
++    __ bne(swap_reg, R0, slow_path_lock);
++    __ delayed()->nop();
++    // Slow path will re-enter here
++    __ bind(lock_done);
++
++    if (UseBiasedLocking) {
++      // Re-fetch oop_handle_reg as we trashed it above
++      __ move(A1, oop_handle_reg);
++    }
++  }
++
++
++  // Finally just about ready to make the JNI call
++
++
++  // get JNIEnv* which is first argument to native
++  if (!is_critical_native) {
++    __ addiu(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
++  }
++
++  // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
++  // Load the second arguments into A1
++  //__ ld(A1, SP , wordSize );   // klass
++
++  // Now set thread in native
++  __ addiu(AT, R0, _thread_in_native);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++  // do the call
++  __ call(native_func, relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  // WARNING - on Windows Java Natives use pascal calling convention and pop the
++  // arguments off of the stack. We could just re-adjust the stack pointer here
++  // and continue to do SP relative addressing but we instead switch to FP
++  // relative addressing.
++
++  // Unpack native results.
++  switch (ret_type) {
++  case T_BOOLEAN: __ c2bool(V0);            break;
++  case T_CHAR   : __ andi(V0, V0, 0xFFFF);      break;
++  case T_BYTE   : __ sign_extend_byte (V0); break;
++  case T_SHORT  : __ sign_extend_short(V0); break;
++  case T_INT    : // nothing to do         break;
++  case T_DOUBLE :
++  case T_FLOAT  :
++  // Result is in st0 we'll save as needed
++  break;
++  case T_ARRAY:                 // Really a handle
++  case T_OBJECT:                // Really a handle
++  break; // can't de-handlize until after safepoint check
++  case T_VOID: break;
++  case T_LONG: break;
++  default       : ShouldNotReachHere();
++  }
++  // Switch thread to "native transition" state before reading the synchronization state.
++  // This additional state is necessary because reading and testing the synchronization
++  // state is not atomic w.r.t. GC, as this scenario demonstrates:
++  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
++  //     VM thread changes sync state to synchronizing and suspends threads for GC.
++  //     Thread A is resumed to finish this native method, but doesn't block here since it
++  //     didn't see any synchronization is progress, and escapes.
++  __ addiu(AT, R0, _thread_in_native_trans);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) {
++    if (UseMembar) {
++      // Force this write out before the read below
++      __ sync();
++    } else {
++      // Write serialization page so VM thread can do a pseudo remote membar.
++      // We use the current thread pointer to calculate a thread specific
++      // offset to write to within the page. This minimizes bus traffic
++      // due to cache line collision.
++      __ serialize_memory(thread, A0);
++    }
++  }
++
++  Label after_transition;
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    Label Continue;
++    Label slow_path;
++
++    __ safepoint_poll_acquire(slow_path, thread);
++    __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ delayed()->nop();
++    __ bind(slow_path);
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    //
++    save_native_result(masm, ret_type, stack_slots);
++    __ move(A0, thread);
++    __ addiu(SP, SP, -wordSize);
++    __ push(S2);
++    __ move(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++    if (!is_critical_native) {
++      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
++      __ delayed()->nop();
++    } else {
++      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
++      __ delayed()->nop();
++    }
++    __ move(SP, S2);     // use S2 as a sender SP holder
++    __ pop(S2);
++    __ addiu(SP, SP, wordSize);
++    //add for compressedoops
++    __ reinit_heapbase();
++    // Restore any method result value
++    restore_native_result(masm, ret_type, stack_slots);
++
++    if (is_critical_native) {
++      // The call above performed the transition to thread_in_Java so
++      // skip the transition logic below.
++      __ beq(R0, R0, after_transition);
++      __ delayed()->nop();
++    }
++
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ addiu(AT, R0, _thread_in_Java);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(AT,  thread, in_bytes(JavaThread::thread_state_offset()));
++  __ bind(after_transition);
++  Label reguard;
++  Label reguard_done;
++  __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++  __ addiu(AT, AT, -JavaThread::stack_guard_yellow_reserved_disabled);
++  __ beq(AT, R0, reguard);
++  __ delayed()->nop();
++  // slow path reguard  re-enters here
++  __ bind(reguard_done);
++
++  // Handle possible exception (will unlock if necessary)
++
++  // native result if any is live
++
++  // Unlock
++  Label slow_path_unlock;
++  Label unlock_done;
++  if (method->is_synchronized()) {
++
++    Label done;
++
++    // Get locked oop from the handle we passed to jni
++    __ ld( obj_reg, oop_handle_reg, 0);
++    if (UseBiasedLocking) {
++      __ biased_locking_exit(obj_reg, T8, done);
++
++    }
++
++    // Simple recursive lock?
++
++    __ ld(AT, FP, lock_slot_fp_offset);
++    __ beq(AT, R0, done);
++    __ delayed()->nop();
++    // Must save FSF if if it is live now because cmpxchg must use it
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++
++    //  get old displaced header
++    __ ld (T8, FP, lock_slot_fp_offset);
++    // get address of the stack lock
++    __ addiu(c_rarg0, FP, lock_slot_fp_offset);
++    // Atomic swap old header if oop still contains the stack lock
++    __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock);
++
++    // slow path re-enters here
++    __ bind(unlock_done);
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++
++    __ bind(done);
++
++  }
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    // Tell dtrace about this method exit
++    save_native_result(masm, ret_type, stack_slots);
++    int metadata_index = __ oop_recorder()->find_index( (method()));
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_set48(AT, (long)(method()));
++
++    __ call_VM_leaf(
++         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++         thread, AT);
++    restore_native_result(masm, ret_type, stack_slots);
++  }
++
++  // We can finally stop using that last_Java_frame we setup ages ago
++
++  __ reset_last_Java_frame(false);
++
++  // Unpack oop result, e.g. JNIHandles::resolve value.
++  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
++    __ resolve_jobject(V0, thread, T9);
++  }
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  if (!is_critical_native) {
++    // reset handle block
++    __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset()));
++    __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes());
++  }
++
++  if (!is_critical_native) {
++    // Any exception pending?
++    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, exception_pending);
++    __ delayed()->nop();
++  }
++  // no exception, we're almost done
++
++  // check that only result value is on FPU stack
++  __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
++
++  // Return
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ leave();
++
++  __ jr(RA);
++  __ delayed()->nop();
++  // Unexpected paths are out of line and go here
++  // Slow path locking & unlocking
++  if (method->is_synchronized()) {
++
++    // BEGIN Slow path lock
++    __ bind(slow_path_lock);
++
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++
++    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
++    // args are (oop obj, BasicLock* lock, JavaThread* thread)
++
++    __ move(A0, obj_reg);
++    __ move(A1, lock_reg);
++    __ move(A2, thread);
++    __ addiu(SP, SP, - 3*wordSize);
++
++    __ move(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++                __ move(SP, S2);
++    __ addiu(SP, SP, 3*wordSize);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++
++#ifdef ASSERT
++    { Label L;
++      __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("no pending exception allowed on exit from monitorenter");
++      __ bind(L);
++    }
++#endif
++    __ b(lock_done);
++    __ delayed()->nop();
++    // END Slow path lock
++
++    // BEGIN Slow path unlock
++    __ bind(slow_path_unlock);
++
++    // Slow path unlock
++
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
++
++    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ push(AT);
++    __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++                __ move(AT, -(StackAlignmentInBytes));
++                __ move(S2, SP);     // use S2 as a sender SP holder
++                __ andr(SP, SP, AT); // align stack as required by ABI
++
++    // should be a peal
++    // +wordSize because of the push above
++    __ addiu(A1, FP, lock_slot_fp_offset);
++
++    __ move(A0, obj_reg);
++    __ move(A2, thread);
++    __ addiu(SP, SP, -2*wordSize);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
++        relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    __ addiu(SP, SP, 2*wordSize);
++                __ move(SP, S2);
++    //add for compressedoops
++    __ reinit_heapbase();
++#ifdef ASSERT
++    {
++      Label L;
++      __ ld( AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
++      __ bind(L);
++    }
++#endif /* ASSERT */
++
++    __ pop(AT);
++    __ sd(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++    __ b(unlock_done);
++    __ delayed()->nop();
++    // END Slow path unlock
++
++  }
++
++  // SLOW PATH Reguard the stack if needed
++
++  __ bind(reguard);
++  save_native_result(masm, ret_type, stack_slots);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
++      relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  //add for compressedoops
++  __ reinit_heapbase();
++  restore_native_result(masm, ret_type, stack_slots);
++  __ b(reguard_done);
++  __ delayed()->nop();
++
++  // BEGIN EXCEPTION PROCESSING
++  if (!is_critical_native) {
++    // Forward  the exception
++    __ bind(exception_pending);
++
++    // remove possible return value from FPU register stack
++    __ empty_FPU_stack();
++
++    // pop our frame
++    //forward_exception_entry need return address on stack
++    __ move(SP, FP);
++    __ pop(FP);
++
++    // and forward the exception
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++  }
++  __ flush();
++
++  nmethod *nm = nmethod::new_native_nmethod(method,
++                                            compile_id,
++                                            masm->code(),
++                                            vep_offset,
++                                            frame_complete,
++                                            stack_slots / VMRegImpl::slots_per_word,
++                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
++                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
++                                            oop_maps);
++
++  if (is_critical_native) {
++    nm->set_lazy_critical_native(true);
++  }
++
++  return nm;
++
++}
++
++#ifdef HAVE_DTRACE_H
++// ---------------------------------------------------------------------------
++// Generate a dtrace nmethod for a given signature.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// abi and then leaves nops at the position you would expect to call a native
++// function. When the probe is enabled the nops are replaced with a trap
++// instruction that dtrace inserts and the trace will cause a notification
++// to dtrace.
++//
++// The probes are only able to take primitive types and java/lang/String as
++// arguments.  No other java types are allowed. Strings are converted to utf8
++// strings so that from dtrace point of view java strings are converted to C
++// strings. There is an arbitrary fixed limit on the total space that a method
++// can use for converting the strings. (256 chars per string in the signature).
++// So any java string larger then this is truncated.
++
++static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
++static bool offsets_initialized = false;
++
++static VMRegPair reg64_to_VMRegPair(Register r) {
++  VMRegPair ret;
++  if (wordSize == 8) {
++    ret.set2(r->as_VMReg());
++  } else {
++    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
++  }
++  return ret;
++}
++
++
++nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
++                                                methodHandle method) {
++
++
++  // generate_dtrace_nmethod is guarded by a mutex so we are sure to
++  // be single threaded in this method.
++  assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
++
++  // Fill in the signature array, for the calling-convention call.
++  int total_args_passed = method->size_of_parameters();
++
++  BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
++  VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
++
++  // The signature we are going to use for the trap that dtrace will see
++  // java/lang/String is converted. We drop "this" and any other object
++  // is converted to NULL.  (A one-slot java/lang/Long object reference
++  // is converted to a two-slot long, which is why we double the allocation).
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
++
++  int i=0;
++  int total_strings = 0;
++  int first_arg_to_pass = 0;
++  int total_c_args = 0;
++
++  // Skip the receiver as dtrace doesn't want to see it
++  if( !method->is_static() ) {
++    in_sig_bt[i++] = T_OBJECT;
++    first_arg_to_pass = 1;
++  }
++
++  SignatureStream ss(method->signature());
++  for ( ; !ss.at_return_type(); ss.next()) {
++    BasicType bt = ss.type();
++    in_sig_bt[i++] = bt;  // Collect remaining bits of signature
++    out_sig_bt[total_c_args++] = bt;
++    if( bt == T_OBJECT) {
++      symbolOop s = ss.as_symbol_or_null();
++      if (s == vmSymbols::java_lang_String()) {
++        total_strings++;
++        out_sig_bt[total_c_args-1] = T_ADDRESS;
++      } else if (s == vmSymbols::java_lang_Boolean() ||
++                 s == vmSymbols::java_lang_Byte()) {
++        out_sig_bt[total_c_args-1] = T_BYTE;
++      } else if (s == vmSymbols::java_lang_Character() ||
++                 s == vmSymbols::java_lang_Short()) {
++        out_sig_bt[total_c_args-1] = T_SHORT;
++      } else if (s == vmSymbols::java_lang_Integer() ||
++                 s == vmSymbols::java_lang_Float()) {
++        out_sig_bt[total_c_args-1] = T_INT;
++      } else if (s == vmSymbols::java_lang_Long() ||
++                 s == vmSymbols::java_lang_Double()) {
++        out_sig_bt[total_c_args-1] = T_LONG;
++        out_sig_bt[total_c_args++] = T_VOID;
++      }
++    } else if ( bt == T_LONG || bt == T_DOUBLE ) {
++      in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
++      // We convert double to long
++      out_sig_bt[total_c_args-1] = T_LONG;
++      out_sig_bt[total_c_args++] = T_VOID;
++    } else if ( bt == T_FLOAT) {
++      // We convert float to int
++      out_sig_bt[total_c_args-1] = T_INT;
++    }
++  }
++
++  assert(i==total_args_passed, "validly parsed signature");
++
++  // Now get the compiled-Java layout as input arguments
++  int comp_args_on_stack;
++  comp_args_on_stack = SharedRuntime::java_calling_convention(
++      in_sig_bt, in_regs, total_args_passed, false);
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the a  native (non-jni) function would expect them. To figure out
++  // where they go we convert the java signature to a C signature and remove
++  // T_VOID for any long/double we might have received.
++
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Plus a temp for possible converion of float/double/long register args
++
++  int conversion_temp = stack_slots;
++  stack_slots += 2;
++
++
++  // Now space for the string(s) we must convert
++
++  int string_locs = stack_slots;
++  stack_slots += total_strings *
++                   (max_dtrace_string_size / VMRegImpl::stack_slot_size);
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | string[n]           |
++  //      |---------------------| <- string_locs[n]
++  //      | string[n-1]         |
++  //      |---------------------| <- string_locs[n-1]
++  //      | ...                 |
++  //      | ...                 |
++  //      |---------------------| <- string_locs[1]
++  //      | string[0]           |
++  //      |---------------------| <- string_locs[0]
++  //      | temp                |
++  //      |---------------------| <- conversion_temp
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++
++  intptr_t start = (intptr_t)__ pc();
++
++  // First thing make an ic check to see if we should even be here
++
++  {
++    Label L;
++    const Register temp_reg = G3_scratch;
++    Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
++    __ verify_oop(O0);
++    __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
++    __ cmp(temp_reg, G5_inline_cache_reg);
++    __ brx(Assembler::equal, true, Assembler::pt, L);
++    __ delayed()->nop();
++
++    __ jump_to(ic_miss, 0);
++    __ delayed()->nop();
++    __ align(CodeEntryAlignment);
++    __ bind(L);
++  }
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++
++  // Make enough room for patch_verified_entry
++  __ nop();
++  __ nop();
++
++  // Generate stack overflow check before creating frame
++  __ generate_stack_overflow_check(stack_size);
++
++  // Generate a new frame for the wrapper.
++  __ save(SP, -stack_size, SP);
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  VMRegPair zero;
++  const Register g0 = G0; // without this we get a compiler warning (why??)
++  zero.set2(g0->as_VMReg());
++
++  int c_arg, j_arg;
++
++  Register conversion_off = noreg;
++
++  for (j_arg = first_arg_to_pass, c_arg = 0 ;
++       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
++
++    VMRegPair src = in_regs[j_arg];
++    VMRegPair dst = out_regs[c_arg];
++
++#ifdef ASSERT
++    if (src.first()->is_Register()) {
++      assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
++    } else if (src.first()->is_FloatRegister()) {
++      assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
++                                               FloatRegisterImpl::S)], "ack!");
++    }
++    if (dst.first()->is_Register()) {
++      reg_destroyed[dst.first()->as_Register()->encoding()] = true;
++    } else if (dst.first()->is_FloatRegister()) {
++      freg_destroyed[dst.first()->as_FloatRegister()->encoding(
++                                                 FloatRegisterImpl::S)] = true;
++    }
++#endif /* ASSERT */
++
++    switch (in_sig_bt[j_arg]) {
++      case T_ARRAY:
++      case T_OBJECT:
++        {
++          if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||
++              out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
++            // need to unbox a one-slot value
++            Register in_reg = L0;
++            Register tmp = L2;
++            if ( src.first()->is_reg() ) {
++              in_reg = src.first()->as_Register();
++            } else {
++              assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
++                     "must be");
++              __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
++            }
++            // If the final destination is an acceptable register
++            if ( dst.first()->is_reg() ) {
++              if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
++                tmp = dst.first()->as_Register();
++              }
++            }
++
++            Label skipUnbox;
++            if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
++              __ mov(G0, tmp->successor());
++            }
++            __ br_null(in_reg, true, Assembler::pn, skipUnbox);
++            __ delayed()->mov(G0, tmp);
++
++            BasicType bt = out_sig_bt[c_arg];
++            int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
++            switch (bt) {
++                case T_BYTE:
++                  __ ldub(in_reg, box_offset, tmp); break;
++                case T_SHORT:
++                  __ lduh(in_reg, box_offset, tmp); break;
++                case T_INT:
++                  __ ld(in_reg, box_offset, tmp); break;
++                case T_LONG:
++                  __ ld_long(in_reg, box_offset, tmp); break;
++                default: ShouldNotReachHere();
++            }
++
++            __ bind(skipUnbox);
++            // If tmp wasn't final destination copy to final destination
++            if (tmp == L2) {
++              VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
++              if (out_sig_bt[c_arg] == T_LONG) {
++                long_move(masm, tmp_as_VM, dst);
++              } else {
++                move32_64(masm, tmp_as_VM, out_regs[c_arg]);
++              }
++            }
++            if (out_sig_bt[c_arg] == T_LONG) {
++              assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
++              ++c_arg; // move over the T_VOID to keep the loop indices in sync
++            }
++          } else if (out_sig_bt[c_arg] == T_ADDRESS) {
++            Register s =
++                src.first()->is_reg() ? src.first()->as_Register() : L2;
++            Register d =
++                dst.first()->is_reg() ? dst.first()->as_Register() : L2;
++
++            // We store the oop now so that the conversion pass can reach
++            // while in the inner frame. This will be the only store if
++            // the oop is NULL.
++            if (s != L2) {
++              // src is register
++              if (d != L2) {
++                // dst is register
++                __ mov(s, d);
++              } else {
++                assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                          STACK_BIAS), "must be");
++                __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
++              }
++            } else {
++                // src not a register
++                assert(Assembler::is_simm13(reg2offset(src.first()) +
++                           STACK_BIAS), "must be");
++                __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
++                if (d == L2) {
++                  assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                             STACK_BIAS), "must be");
++                  __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
++                }
++            }
++          } else if (out_sig_bt[c_arg] != T_VOID) {
++            // Convert the arg to NULL
++            if (dst.first()->is_reg()) {
++              __ mov(G0, dst.first()->as_Register());
++            } else {
++              assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                         STACK_BIAS), "must be");
++              __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
++            }
++          }
++        }
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        if (src.first()->is_stack()) {
++          // Stack to stack/reg is simple
++          move32_64(masm, src, dst);
++        } else {
++          if (dst.first()->is_reg()) {
++            // freg -> reg
++            int off =
++              STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++            Register d = dst.first()->as_Register();
++            if (Assembler::is_simm13(off)) {
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, off);
++              __ ld(SP, off, d);
++            } else {
++              if (conversion_off == noreg) {
++                __ set(off, L6);
++                conversion_off = L6;
++              }
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, conversion_off);
++              __ ld(SP, conversion_off , d);
++            }
++          } else {
++            // freg -> mem
++            int off = STACK_BIAS + reg2offset(dst.first());
++            if (Assembler::is_simm13(off)) {
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, off);
++            } else {
++              if (conversion_off == noreg) {
++                __ set(off, L6);
++                conversion_off = L6;
++              }
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, conversion_off);
++            }
++          }
++        }
++        break;
++
++      case T_DOUBLE:
++        assert( j_arg + 1 < total_args_passed &&
++                in_sig_bt[j_arg + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        if (src.first()->is_stack()) {
++          // Stack to stack/reg is simple
++          long_move(masm, src, dst);
++        } else {
++          Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
++
++          // Destination could be an odd reg on 32bit in which case
++          // we can't load direct to the destination.
++
++          if (!d->is_even() && wordSize == 4) {
++            d = L2;
++          }
++          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++          if (Assembler::is_simm13(off)) {
++            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
++                   SP, off);
++            __ ld_long(SP, off, d);
++          } else {
++            if (conversion_off == noreg) {
++              __ set(off, L6);
++              conversion_off = L6;
++            }
++            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
++                   SP, conversion_off);
++            __ ld_long(SP, conversion_off, d);
++          }
++          if (d == L2) {
++            long_move(masm, reg64_to_VMRegPair(L2), dst);
++          }
++        }
++        break;
++
++      case T_LONG :
++        // 32bit can't do a split move of something like g1 -> O0, O1
++        // so use a memory temp
++        if (src.is_single_phys_reg() && wordSize == 4) {
++          Register tmp = L2;
++          if (dst.first()->is_reg() &&
++              (wordSize == 8 || dst.first()->as_Register()->is_even())) {
++            tmp = dst.first()->as_Register();
++          }
++
++          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++          if (Assembler::is_simm13(off)) {
++            __ stx(src.first()->as_Register(), SP, off);
++            __ ld_long(SP, off, tmp);
++          } else {
++            if (conversion_off == noreg) {
++              __ set(off, L6);
++              conversion_off = L6;
++            }
++            __ stx(src.first()->as_Register(), SP, conversion_off);
++            __ ld_long(SP, conversion_off, tmp);
++          }
++
++          if (tmp == L2) {
++            long_move(masm, reg64_to_VMRegPair(L2), dst);
++          }
++        } else {
++          long_move(masm, src, dst);
++        }
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        move32_64(masm, src, dst);
++    }
++  }
++
++
++  // If we have any strings we must store any register based arg to the stack
++  // This includes any still live xmm registers too.
++
++  if (total_strings > 0 ) {
++
++    // protect all the arg registers
++    __ save_frame(0);
++    __ mov(G2_thread, L7_thread_cache);
++    const Register L2_string_off = L2;
++
++    // Get first string offset
++    __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
++
++    for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
++      if (out_sig_bt[c_arg] == T_ADDRESS) {
++
++        VMRegPair dst = out_regs[c_arg];
++        const Register d = dst.first()->is_reg() ?
++            dst.first()->as_Register()->after_save() : noreg;
++
++        // It's a string the oop and it was already copied to the out arg
++        // position
++        if (d != noreg) {
++          __ mov(d, O0);
++        } else {
++          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
++                 "must be");
++          __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);
++        }
++        Label skip;
++
++        __ br_null(O0, false, Assembler::pn, skip);
++        __ delayed()->addu(FP, L2_string_off, O1);
++
++        if (d != noreg) {
++          __ mov(O1, d);
++        } else {
++          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
++                 "must be");
++          __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);
++        }
++
++        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
++                relocInfo::runtime_call_type);
++        __ delayed()->addu(L2_string_off, max_dtrace_string_size, L2_string_off);
++
++        __ bind(skip);
++
++      }
++
++    }
++    __ mov(L7_thread_cache, G2_thread);
++    __ restore();
++
++  }
++
++
++  // Ok now we are done. Need to place the nop that dtrace wants in order to
++  // patch in the trap
++
++  int patch_offset = ((intptr_t)__ pc()) - start;
++
++  __ nop();
++
++
++  // Return
++
++  __ ret();
++  __ delayed()->restore();
++
++  __ flush();
++
++  nmethod *nm = nmethod::new_dtrace_nmethod(
++      method, masm->code(), vep_offset, patch_offset, frame_complete,
++      stack_slots / VMRegImpl::slots_per_word);
++  return nm;
++
++}
++
++#endif // HAVE_DTRACE_H
++
++// this function returns the adjust size (in number of words) to a c2i adapter
++// activation for use during deoptimization
++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
++  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
++}
++
++// "Top of Stack" slots that may be unused by the calling convention but must
++// otherwise be preserved.
++// On Intel these are not necessary and the value can be zero.
++// On Sparc this describes the words reserved for storing a register window
++// when an interrupt occurs.
++uint SharedRuntime::out_preserve_stack_slots() {
++   return 0;
++}
++
++//------------------------------generate_deopt_blob----------------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_deopt_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  //CodeBuffer     buffer ("deopt_blob", 4000, 2048);
++  CodeBuffer     buffer ("deopt_blob", 8000, 2048);
++  MacroAssembler* masm  = new MacroAssembler( & buffer);
++  int frame_size_in_words;
++  OopMap* map = NULL;
++  // Account for the extra args we place on the stack
++  // by the time we call fetch_unroll_info
++  const int additional_words = 2; // deopt kind, thread
++
++  OopMapSet *oop_maps = new OopMapSet();
++
++  address start = __ pc();
++  Label cont;
++  // we use S3 for DeOpt reason register
++  Register reason = S3;
++  // use S6 for thread register
++  Register thread = TREG;
++  // use S7 for fetch_unroll_info returned UnrollBlock
++  Register unroll = S7;
++  // Prolog for non exception case!
++  // Correct the return address we were given.
++  //FIXME, return address is on the tos or Ra?
++  __ addiu(RA, RA, - (NativeCall::return_address_offset_long));
++  // Save everything in sight.
++  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
++  // Normal deoptimization
++  __ move(reason, Deoptimization::Unpack_deopt);
++  __ b(cont);
++  __ delayed()->nop();
++
++  int reexecute_offset = __ pc() - start;
++
++  // Reexecute case
++  // return address is the pc describes what bci to do re-execute at
++
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
++  __ move(reason, Deoptimization::Unpack_reexecute);
++  __ b(cont);
++  __ delayed()->nop();
++
++  int   exception_offset = __ pc() - start;
++  // Prolog for exception case
++
++  // all registers are dead at this entry point, except for V0 and
++  // V1 which contain the exception oop and exception pc
++  // respectively.  Set them in TLS and fall thru to the
++  // unpack_with_exception_in_tls entry point.
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  int exception_in_tls_offset = __ pc() - start;
++  // new implementation because exception oop is now passed in JavaThread
++
++  // Prolog for exception case
++  // All registers must be preserved because they might be used by LinearScan
++  // Exceptiop oop and throwing PC are passed in JavaThread
++  // tos: stack at point of call to method that threw the exception (i.e. only
++  // args are on the stack, no return address)
++
++  // Return address will be patched later with the throwing pc. The correct value is not
++  // available now because loading it from memory would destroy registers.
++  // Save everything in sight.
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  __ addiu(RA, RA, - (NativeCall::return_address_offset_long));
++  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
++
++  // Now it is safe to overwrite any register
++  // store the correct deoptimization type
++  __ move(reason, Deoptimization::Unpack_exception);
++  // load throwing pc from JavaThread and patch it as the return address
++  // of the current frame. Then clear the field in JavaThread
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++
++
++#ifdef ASSERT
++  // verify that there is really an exception oop in JavaThread
++  __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ verify_oop(AT);
++  // verify that there is no pending exception
++  Label no_pending_exception;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, no_pending_exception);
++  __ delayed()->nop();
++  __ stop("must not have pending exception here");
++  __ bind(no_pending_exception);
++#endif
++  __ bind(cont);
++  // Compiled code leaves the floating point stack dirty, empty it.
++  __ empty_FPU_stack();
++
++
++  // Call C code.  Need thread and this frame, but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ move(A0, thread);
++  __ move(A1, reason); // exec_mode
++  __ addiu(SP, SP, -additional_words  * wordSize);
++
++  __ set_last_Java_frame(NOREG, NOREG, NULL);
++
++  // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.  Call should capture return values.
++
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++
++  __ call((address)Deoptimization::fetch_unroll_info);
++  //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  oop_maps->add_gc_map(__ pc() - start, map);
++  __ addiu(SP, SP, additional_words * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock into S7
++  __ move(unroll, V0);
++
++
++  // Move the unpack kind to a safe place in the UnrollBlock because
++  // we are very short of registers
++
++  Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++  __ sw(reason, unpack_kind);
++  // save the unpack_kind value
++  // Retrieve the possible live values (return values)
++  // All callee save registers representing jvm state
++  // are now in the vframeArray.
++
++  Label noException;
++  __ move(AT, Deoptimization::Unpack_exception);
++  __ bne(AT, reason, noException);// Was exception pending?
++  __ delayed()->nop();
++  __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
++
++  __ verify_oop(V0);
++
++  // Overwrite the result registers with the exception results.
++  __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize);
++  __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize);
++
++  __ bind(noException);
++
++
++  // Stack is back to only having register save data on the stack.
++  // Now restore the result registers. Everything else is either dead or captured
++  // in the vframeArray.
++
++  RegisterSaver::restore_result_registers(masm);
++  // All of the register save area has been popped of the stack. Only the
++  // return address remains.
++  // Pop all the frames we must move/replace.
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
++  //
++  // Note: by leaving the return address of self-frame on the stack
++  // and using the size of frame 2 to adjust the stack
++  // when we are done the return to frame 3 will still be on the stack.
++
++  // register for the sender's sp
++  Register sender_sp = Rsender;
++  // register for frame pcs
++  Register pcs = T0;
++  // register for frame sizes
++  Register sizes = T1;
++  // register for frame count
++  Register count = T3;
++
++  // Pop deoptimized frame
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ addu(SP, SP, AT);
++  // sp should be pointing at the return address to the caller (3)
++
++  // Load array of frame pcs into pcs
++  __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++  __ addiu(SP, SP, wordSize);  // trash the old pc
++  // Load array of frame sizes into T6
++  __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++
++
++
++  // Load count of frams into T3
++  __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++  // Pick up the initial fp we should save
++  __ ld(FP, unroll,  Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++   // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++  __ move(sender_sp, SP);
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ subu(SP, SP, AT);
++
++  // Push interpreter frames in a loop
++  //
++  //Loop:
++  //   0x000000555bd82d18: lw t2, 0x0(t1)           ; lw sizes[i]  <--- error lw->ld
++  //   0x000000555bd82d1c: ld at, 0x0(t0)           ; ld pcs[i]
++  //   0x000000555bd82d20: daddiu t2, t2, 0xfffffff0 ; t2 -= 16
++  //   0x000000555bd82d24: daddiu sp, sp, 0xfffffff0
++  //   0x000000555bd82d28: sd fp, 0x0(sp)           ; push fp
++  //   0x000000555bd82d2c: sd at, 0x8(sp)           ; push at
++  //   0x000000555bd82d30: daddu fp, sp, zero        ; fp <- sp
++  //   0x000000555bd82d34: dsubu sp, sp, t2          ; sp -= t2
++  //   0x000000555bd82d38: sd zero, 0xfffffff0(fp)  ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  //   0x000000555bd82d3c: sd s4, 0xfffffff8(fp)    ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  //   0x000000555bd82d40: daddu s4, sp, zero        ; move(sender_sp, SP);
++  //   0x000000555bd82d44: daddiu t3, t3, 0xffffffff ; count --
++  //   0x000000555bd82d48: daddiu t1, t1, 0x4        ; sizes += 4
++  //   0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18
++  //   0x000000555bd82d50: daddiu t0, t0, 0x4        ; <--- error    t0 += 8
++  //
++  // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split
++  Label loop;
++  __ bind(loop);
++  __ ld(T2, sizes, 0);    // Load frame size
++  __ ld_ptr(AT, pcs, 0);           // save return address
++  __ addiu(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ subu(SP, SP, T2);       // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);  // pass to next frame
++  __ addiu(count, count, -1);   // decrement counter
++  __ addiu(sizes, sizes, wordSize);   // Bump array pointer (sizes)
++  __ bne(count, R0, loop);
++  __ delayed()->addiu(pcs, pcs, wordSize);   // Bump array pointer (pcs)
++  __ ld(AT, pcs, 0);      // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
++  // Re-push self-frame
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  __ addiu(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
++
++  // Restore frame locals after moving the frame
++  __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize);
++  __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize);
++  __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
++  __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
++
++
++  // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.
++  __ move(A1, reason);  // exec_mode
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(A0, thread);  // thread
++  __ addiu(SP, SP, (-additional_words) *wordSize);
++
++  // set last_Java_sp, last_Java_fp
++  __ set_last_Java_frame(NOREG, FP, NULL);
++
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++
++  __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  // Revert SP alignment after call since we're going to do some SP relative addressing below
++  __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
++
++  __ push(V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(true);
++
++  // Collect return values
++  __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words + 1) * wordSize);
++  __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words + 1) * wordSize);
++  __ ldc1(F0, SP, (RegisterSaver::fpResultOffset() + additional_words + 1) * wordSize);// Pop float stack and store in local
++  __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + additional_words + 2) * wordSize);
++  //FIXME,
++  // Clear floating point stack before returning to interpreter
++  __ empty_FPU_stack();
++  //FIXME, we should consider about float and double
++  // Push a float or double return value if necessary.
++  __ leave();
++
++  // Jump to interpreter
++  __ jr(RA);
++  __ delayed()->nop();
++
++  masm->flush();
++  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
++  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
++}
++
++#ifdef COMPILER2
++
++//------------------------------generate_uncommon_trap_blob--------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_uncommon_trap_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 );
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++  enum frame_layout {
++    fp_off, fp_off2,
++    return_off, return_off2,
++    framesize
++  };
++  assert(framesize % 4 == 0, "sp not 16-byte aligned");
++
++  address start = __ pc();
++
++  // Push self-frame.
++  __ daddiu(SP, SP, -framesize * BytesPerInt);
++
++  __ sd(RA, SP, return_off * BytesPerInt);
++  __ sd(FP, SP, fp_off * BytesPerInt);
++
++  __ daddiu(FP, SP, fp_off * BytesPerInt);
++
++  // Clear the floating point exception stack
++  __ empty_FPU_stack();
++
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // set last_Java_sp
++  __ set_last_Java_frame(NOREG, FP, NULL);
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    long save_pc = (long)__ pc() + 56;
++    __ patchable_set48(AT, (long)save_pc);
++    __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // capture callee-saved registers as well as return values.
++  __ move(A0, thread);
++  // argument already in T0
++  __ move(A1, T0);
++  __ addiu(A2, R0, Deoptimization::Unpack_uncommon_trap);
++  __ patchable_call((address)Deoptimization::uncommon_trap);
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap( framesize, 0 );
++
++  //oop_maps->add_gc_map( __ offset(), true, map);
++  oop_maps->add_gc_map( __ offset(),  map);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock into S7
++  Register unroll = S7;
++  __ move(unroll, V0);
++
++#ifdef ASSERT
++  { Label L;
++    __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++    __ li(T9, Deoptimization::Unpack_uncommon_trap);
++    __ beq(AT, T9, L);
++    __ delayed()->nop();
++    __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
++    __ bind(L);
++  }
++#endif
++
++  // Pop all the frames we must move/replace.
++  //
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: possible-i2c-adapter-frame
++  // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
++  //    and c2i here)
++
++  __ daddiu(SP, SP, framesize * BytesPerInt);
++
++  // Pop deoptimized frame
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ daddu(SP, SP, AT);
++
++  // register for frame pcs
++  Register pcs = T8;
++  // register for frame sizes
++  Register sizes = T9;
++  // register for frame count
++  Register count = T3;
++  // register for the sender's sp
++  Register sender_sp = T1;
++
++  // sp should be pointing at the return address to the caller (4)
++  // Load array of frame pcs
++  __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++
++  // Load array of frame sizes
++  __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++  __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++
++  // Pick up the initial fp we should save
++  __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++  // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++
++  __ move(sender_sp, SP);
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ dsubu(SP, SP, AT);
++  // Push interpreter frames in a loop
++  Label loop;
++  __ bind(loop);
++  __ ld(T2, sizes, 0);          // Load frame size
++  __ ld(AT, pcs, 0);           // save return address
++  __ daddiu(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ dsubu(SP, SP, T2);                   // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);       // pass to next frame
++  __ daddiu(count, count, -1);    // decrement counter
++  __ daddiu(sizes, sizes, wordSize);     // Bump array pointer (sizes)
++  __ addiu(pcs, pcs, wordSize);      // Bump array pointer (pcs)
++  __ bne(count, R0, loop);
++  __ delayed()->nop();      // Bump array pointer (pcs)
++
++  __ ld(RA, pcs, 0);
++
++  // Re-push self-frame
++  // save old & set new FP
++  // save final return address
++  __ enter();
++
++  // Use FP because the frames look interpreted now
++  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  address the_pc = __ pc();
++  __ set_last_Java_frame(NOREG, FP, the_pc);
++
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  __ move(A0, thread);
++  __ addiu(A1, R0, Deoptimization::Unpack_uncommon_trap);
++  __ patchable_call((address)Deoptimization::unpack_frames);
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map( __ offset(),  new OopMap( framesize, 0 ) );
++
++  __ reset_last_Java_frame(true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // Jump to interpreter
++  __ jr(RA);
++  __ delayed()->nop();
++  // -------------
++  // make sure all code is generated
++  masm->flush();
++
++  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
++}
++
++#endif // COMPILER2
++
++//------------------------------generate_handler_blob-------------------
++//
++// Generate a special Compile2Runtime blob that saves all registers, and sets
++// up an OopMap and calls safepoint code to stop the compiled code for
++// a safepoint.
++//
++// This blob is jumped to (via a breakpoint and the signal handler) from a
++// safepoint in compiled code.
++
++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) {
++
++  // Account for thread arg in our frame
++  const int additional_words = 0;
++  int frame_size_in_words;
++
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  ResourceMark rm;
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map;
++
++  // allocate space for the code
++  // setup code generation tools
++  CodeBuffer  buffer ("handler_blob", 2048, 512);
++  MacroAssembler* masm = new MacroAssembler( &buffer);
++
++  const Register thread = TREG;
++  address start   = __ pc();
++  address call_pc = NULL;
++  bool cause_return = (pool_type == POLL_AT_RETURN);
++  bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  // The following is basically a call_VM. However, we need the precise
++  // address of the call in order to generate an oopmap. Hence, we do all the
++  // work outselvs.
++
++  __ set_last_Java_frame(NOREG, NOREG, NULL);
++
++  if (!cause_return) {
++    // overwrite the return address pushed by save_live_registers
++    // Additionally, TSR is a callee-saved register so we can look at
++    // it later to determine if someone changed the return address for
++    // us!
++    __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    __ st_ptr(TSR, SP, RegisterSaver::raOffset() * wordSize);
++  }
++
++  // Do the call
++  __ move(A0, thread);
++  __ call(call_ptr);
++  __ delayed()->nop();
++
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
++  oop_maps->add_gc_map(__ offset(),  map);
++
++  Label noException;
++
++  // Clear last_Java_sp again
++  __ reset_last_Java_frame(false);
++
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, noException);
++  __ delayed()->nop();
++
++  // Exception pending
++
++  RegisterSaver::restore_live_registers(masm, save_vectors);
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++  __ patchable_jump((address)StubRoutines::forward_exception_entry());
++
++  // No exception case
++  __ bind(noException);
++
++  Label no_adjust, bail;
++  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
++    // If our stashed return pc was modified by the runtime we avoid touching it
++    __ ld_ptr(AT, SP, RegisterSaver::raOffset() * wordSize);
++    __ bne(AT, TSR, no_adjust);
++    __ delayed()->nop();
++
++#ifdef ASSERT
++    // Verify the correct encoding of the poll we're about to skip.
++    // See NativeInstruction::is_safepoint_poll()
++    __ lwu(AT, TSR, 0);
++    __ dsrl(AT, AT, 16);
++    __ andi(AT, AT, 0xfc1f);
++    __ xori(AT, AT, 0x8c01);
++    __ bne(AT, R0, bail);
++    __ delayed()->nop();
++#endif
++    // Adjust return pc forward to step over the safepoint poll instruction
++     __ addiu(RA, TSR, 4);    // NativeInstruction::instruction_size=4
++     __ st_ptr(RA, SP, RegisterSaver::raOffset() * wordSize);
++  }
++
++  __ bind(no_adjust);
++  // Normal exit, register restoring and exit
++  RegisterSaver::restore_live_registers(masm, save_vectors);
++  __ jr(RA);
++  __ delayed()->nop();
++
++#ifdef ASSERT
++  __ bind(bail);
++  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
++#endif
++
++  // Make sure all code is generated
++  masm->flush();
++
++  // Fill-out other meta info
++  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
++}
++
++//
++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
++//
++// Generate a stub that calls into vm to find out the proper destination
++// of a java call. All the argument registers are live at this point
++// but since this is generic code we don't know what they are and the caller
++// must do any gc of the args.
++//
++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  // allocate space for the code
++  ResourceMark rm;
++
++  //CodeBuffer buffer(name, 1000, 512);
++  CodeBuffer buffer(name, 2000, 2048);
++  MacroAssembler* masm  = new MacroAssembler(&buffer);
++
++  int frame_size_words;
++  //we put the thread in A0
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map = NULL;
++
++  int start = __ offset();
++  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
++
++
++  int frame_complete = __ offset();
++
++#ifndef OPT_THREAD
++  const Register thread = T8;
++  __ get_thread(thread);
++#else
++  const Register thread = TREG;
++#endif
++
++  __ move(A0, thread);
++  __ set_last_Java_frame(noreg, FP, NULL);
++  //align the stack before invoke native
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++
++  __ call(destination);
++  __ delayed()->nop();
++
++  // Set an oopmap for the call site.
++  // We need this not only for callee-saved registers, but also for volatile
++  // registers that the compiler might be keeping live across a safepoint.
++  oop_maps->add_gc_map( __ offset() - start, map);
++  // V0 contains the address we are going to jump to assuming no exception got installed
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // clear last_Java_sp
++  __ reset_last_Java_frame(true);
++  // check for pending exceptions
++  Label pending;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ bne(AT, R0, pending);
++  __ delayed()->nop();
++  // get the returned Method*
++  //FIXME, do mips need this ?
++  __ get_vm_result_2(Rmethod, thread);  // Refer to OpenJDK8
++  __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize);
++  __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize);
++  RegisterSaver::restore_live_registers(masm);
++
++  // We are back the the original state on entry and ready to go the callee method.
++  __ jr(V0);
++  __ delayed()->nop();
++  // Pending exception after the safepoint
++
++  __ bind(pending);
++
++  RegisterSaver::restore_live_registers(masm);
++
++  // exception pending => remove activation and forward to exception handler
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
++  __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
++  __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  //
++  // make sure all code is generated
++  masm->flush();
++
++  RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
++  return tmp;
++}
++
++extern "C" int SpinPause() {return 0;}
++
++
++//------------------------------Montgomery multiplication------------------------
++//
++
++// Subtract 0:b from carry:a.  Return carry.
++static unsigned long
++sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
++  long borrow = 0, t = 0;
++  unsigned long tmp0, tmp1;
++  __asm__ __volatile__ (
++    "0:                                            \n"
++    "ld      %[tmp0],     0(%[a])                  \n"
++    "ld      %[tmp1],     0(%[b])                  \n"
++    "sltu    %[t],        %[tmp0],     %[borrow]   \n"
++    "dsubu   %[tmp0],     %[tmp0],     %[borrow]   \n"
++    "sltu    %[borrow],   %[tmp0],     %[tmp1]     \n"
++    "or      %[borrow],   %[borrow],   %[t]        \n"
++    "dsubu   %[tmp0],     %[tmp0],     %[tmp1]     \n"
++    "sd      %[tmp0],     0(%[a])                  \n"
++    "daddiu  %[a],        %[a],         8          \n"
++    "daddiu  %[b],        %[b],         8          \n"
++    "daddiu  %[len],      %[len],      -1          \n"
++    "bgtz    %[len],      0b                       \n"
++    "dsubu   %[tmp0],     %[carry],    %[borrow]   \n"
++    : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"+r"(t)
++    : [carry]"r"(carry)
++    : "memory"
++  );
++  return tmp0;
++}
++
++// Multiply (unsigned) Long A by Long B, accumulating the double-
++// length result into the accumulator formed of t0, t1, and t2.
++inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
++  unsigned long hi, lo, carry = 0, t = 0;
++  __asm__ __volatile__(
++    "dmultu  %[A],        %[B]                     \n"
++    "mfhi    %[hi]                                 \n"
++    "mflo    %[lo]                                 \n"
++    "daddu   %[t0],       %[t0],       %[lo]       \n"
++    "sltu    %[carry],    %[t0],       %[lo]       \n"
++    "daddu   %[t1],       %[t1],       %[carry]    \n"
++    "sltu    %[t],        %[t1],       %[carry]    \n"
++    "daddu   %[t1],       %[t1],       %[hi]       \n"
++    "sltu    %[carry],    %[t1],       %[hi]       \n"
++    "or      %[carry],    %[carry],    %[t]        \n"
++    "daddu   %[t2],       %[t2],       %[carry]    \n"
++    : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
++    : [A]"r"(A), [B]"r"(B)
++    :
++  );
++}
++
++// As above, but add twice the double-length result into the
++// accumulator.
++inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
++  unsigned long hi, lo, carry = 0, t = 0;
++  __asm__ __volatile__(
++    "dmultu  %[A],        %[B]                     \n"
++    "mfhi    %[hi]                                 \n"
++    "mflo    %[lo]                                 \n"
++    "daddu   %[t0],       %[t0],       %[lo]       \n"
++    "sltu    %[carry],    %[t0],       %[lo]       \n"
++    "daddu   %[t1],       %[t1],       %[carry]    \n"
++    "sltu    %[t],        %[t1],       %[carry]    \n"
++    "daddu   %[t1],       %[t1],       %[hi]       \n"
++    "sltu    %[carry],    %[t1],       %[hi]       \n"
++    "or      %[carry],    %[carry],    %[t]        \n"
++    "daddu   %[t2],       %[t2],       %[carry]    \n"
++    "daddu   %[t0],       %[t0],       %[lo]       \n"
++    "sltu    %[carry],    %[t0],       %[lo]       \n"
++    "daddu   %[t1],       %[t1],       %[carry]    \n"
++    "sltu    %[t],        %[t1],       %[carry]    \n"
++    "daddu   %[t1],       %[t1],       %[hi]       \n"
++    "sltu    %[carry],    %[t1],       %[hi]       \n"
++    "or      %[carry],    %[carry],    %[t]        \n"
++    "daddu   %[t2],       %[t2],       %[carry]    \n"
++    : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
++    : [A]"r"(A), [B]"r"(B)
++    :
++  );
++}
++
++// Fast Montgomery multiplication.  The derivation of the algorithm is
++// in  A Cryptographic Library for the Motorola DSP56000,
++// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
++
++static void __attribute__((noinline))
++montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
++                    unsigned long m[], unsigned long inv, int len) {
++  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++  int i;
++
++  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++
++  for (i = 0; i < len; i++) {
++    int j;
++    for (j = 0; j < i; j++) {
++      MACC(a[j], b[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    MACC(a[i], b[0], t0, t1, t2);
++    m[i] = t0 * inv;
++    MACC(m[i], n[0], t0, t1, t2);
++
++    assert(t0 == 0, "broken Montgomery multiply");
++
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  for (i = len; i < 2*len; i++) {
++    int j;
++    for (j = i-len+1; j < len; j++) {
++      MACC(a[j], b[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i-len] = t0;
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  while (t0)
++    t0 = sub(m, n, t0, len);
++}
++
++// Fast Montgomery squaring.  This uses asymptotically 25% fewer
++// multiplies so it should be up to 25% faster than Montgomery
++// multiplication.  However, its loop control is more complex and it
++// may actually run slower on some machines.
++
++static void __attribute__((noinline))
++montgomery_square(unsigned long a[], unsigned long n[],
++                  unsigned long m[], unsigned long inv, int len) {
++  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++  int i;
++
++  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++
++  for (i = 0; i < len; i++) {
++    int j;
++    int end = (i+1)/2;
++    for (j = 0; j < end; j++) {
++      MACC2(a[j], a[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    if ((i & 1) == 0) {
++      MACC(a[j], a[j], t0, t1, t2);
++    }
++    for (; j < i; j++) {
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i] = t0 * inv;
++    MACC(m[i], n[0], t0, t1, t2);
++
++    assert(t0 == 0, "broken Montgomery square");
++
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  for (i = len; i < 2*len; i++) {
++    int start = i-len+1;
++    int end = start + (len - start)/2;
++    int j;
++    for (j = start; j < end; j++) {
++      MACC2(a[j], a[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    if ((i & 1) == 0) {
++      MACC(a[j], a[j], t0, t1, t2);
++    }
++    for (; j < len; j++) {
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i-len] = t0;
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  while (t0)
++    t0 = sub(m, n, t0, len);
++}
++
++// Swap words in a longword.
++static unsigned long swap(unsigned long x) {
++  return (x << 32) | (x >> 32);
++}
++
++// Copy len longwords from s to d, word-swapping as we go.  The
++// destination array is reversed.
++static void reverse_words(unsigned long *s, unsigned long *d, int len) {
++  d += len;
++  while(len-- > 0) {
++    d--;
++    *d = swap(*s);
++    s++;
++  }
++}
++
++// The threshold at which squaring is advantageous was determined
++// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
++// Doesn't seem to be relevant for MIPS64 so we use the same value.
++#define MONTGOMERY_SQUARING_THRESHOLD 64
++
++void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
++                                        jint len, jlong inv,
++                                        jint *m_ints) {
++  assert(len % 2 == 0, "array length in montgomery_multiply must be even");
++  int longwords = len/2;
++
++  // Make very sure we don't use so much space that the stack might
++  // overflow.  512 jints corresponds to an 16384-bit integer and
++  // will use here a total of 8k bytes of stack space.
++  int total_allocation = longwords * sizeof (unsigned long) * 4;
++  guarantee(total_allocation <= 8192, "must be");
++  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
++
++  // Local scratch arrays
++  unsigned long
++    *a = scratch + 0 * longwords,
++    *b = scratch + 1 * longwords,
++    *n = scratch + 2 * longwords,
++    *m = scratch + 3 * longwords;
++
++  reverse_words((unsigned long *)a_ints, a, longwords);
++  reverse_words((unsigned long *)b_ints, b, longwords);
++  reverse_words((unsigned long *)n_ints, n, longwords);
++
++  ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
++
++  reverse_words(m, (unsigned long *)m_ints, longwords);
++}
++
++void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
++                                      jint len, jlong inv,
++                                      jint *m_ints) {
++  assert(len % 2 == 0, "array length in montgomery_square must be even");
++  int longwords = len/2;
++
++  // Make very sure we don't use so much space that the stack might
++  // overflow.  512 jints corresponds to an 16384-bit integer and
++  // will use here a total of 6k bytes of stack space.
++  int total_allocation = longwords * sizeof (unsigned long) * 3;
++  guarantee(total_allocation <= 8192, "must be");
++  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
++
++  // Local scratch arrays
++  unsigned long
++    *a = scratch + 0 * longwords,
++    *n = scratch + 1 * longwords,
++    *m = scratch + 2 * longwords;
++
++  reverse_words((unsigned long *)a_ints, a, longwords);
++  reverse_words((unsigned long *)n_ints, n, longwords);
++
++  if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
++    ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
++  } else {
++    ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
++  }
++
++  reverse_words(m, (unsigned long *)m_ints, longwords);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp
+--- a/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,2162 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/instanceOop.hpp"
++#include "oops/method.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++// Declaration and definition of StubGenerator (no .hpp file).
++// For a more detailed description of the stub routine structure
++// see the comment in stubRoutines.hpp
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++//#define a__ ((Assembler*)_masm)->
++
++//#ifdef PRODUCT
++//#define BLOCK_COMMENT(str) /* nothing */
++//#else
++//#define BLOCK_COMMENT(str) __ block_comment(str)
++//#endif
++
++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++const int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
++
++// Stub Code definitions
++
++class StubGenerator: public StubCodeGenerator {
++ private:
++
++  // ABI mips n64
++  // This fig is not MIPS ABI. It is call Java from C ABI.
++  // Call stubs are used to call Java from C
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //      ...
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S1)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
++  //  3 [ result               ] <--- a1
++  //  4 [ result_type          ] <--- a2
++  //  5 [ method               ] <--- a3
++  //  6 [ entry_point          ] <--- a4
++  //  7 [ parameters           ] <--- a5
++  //  8 [ parameter_size       ] <--- a6
++  //  9 [ thread               ] <--- a7
++
++  //
++  //  n64 does not save paras in sp.
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //      ...
++  //-13 [ thread               ]
++  //-12 [ result_type          ] <--- a2
++  //-11 [ result               ] <--- a1
++  //-10 [                      ]
++  // -9 [ ptr. to call wrapper ] <--- a0
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S1)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [                      ] <--- old sp
++  //
++  // Find a right place in the call_stub for GP.
++  // GP will point to the starting point of Interpreter::dispatch_table(itos).
++  // It should be saved/restored before/after Java calls.
++  //
++  enum call_stub_layout {
++    RA_off             = 1,
++    FP_off             = 0,
++    BCP_off            = -1,
++    LVP_off            = -2,
++    TSR_off            = -3,
++    S1_off             = -4,
++    S3_off             = -5,
++    S4_off             = -6,
++    S5_off             = -7,
++    S6_off             = -8,
++    call_wrapper_off   = -9,
++    result_off         = -11,
++    result_type_off    = -12,
++    thread_off         = -13,
++    total_off          = thread_off - 1,
++    GP_off             = -14,
++ };
++
++  address generate_call_stub(address& return_address) {
++
++    StubCodeMark mark(this, "StubRoutines", "call_stub");
++    address start = __ pc();
++
++    // same as in generate_catch_exception()!
++
++    // stub code
++    // save ra and fp
++    __ enter();
++    // I think 14 is the max gap between argument and callee saved register
++    assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code");
++    __ daddiu(SP, SP, total_off * wordSize);
++    __ sd(BCP, FP, BCP_off * wordSize);
++    __ sd(LVP, FP, LVP_off * wordSize);
++    __ sd(TSR, FP, TSR_off * wordSize);
++    __ sd(S1, FP, S1_off * wordSize);
++    __ sd(S3, FP, S3_off * wordSize);
++    __ sd(S4, FP, S4_off * wordSize);
++    __ sd(S5, FP, S5_off * wordSize);
++    __ sd(S6, FP, S6_off * wordSize);
++    __ sd(A0, FP, call_wrapper_off * wordSize);
++    __ sd(A1, FP, result_off * wordSize);
++    __ sd(A2, FP, result_type_off * wordSize);
++    __ sd(A7, FP, thread_off * wordSize);
++    __ sd(GP, FP, GP_off * wordSize);
++
++    __ set64(GP, (long)Interpreter::dispatch_table(itos));
++
++#ifdef OPT_THREAD
++    __ move(TREG, A7);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++
++#ifdef ASSERT
++    // make sure we have no pending exceptions
++    {
++      Label L;
++      __ ld(AT, A7, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      /* FIXME: I do not know how to realize stop in mips arch, do it in the future */
++      __ stop("StubRoutines::call_stub: entered with pending exception");
++      __ bind(L);
++    }
++#endif
++
++    // pass parameters if any
++    // A5: parameter
++    // A6: parameter_size
++    // T0: parameter_size_tmp(--)
++    // T2: offset(++)
++    // T3: tmp
++    Label parameters_done;
++    // judge if the parameter_size equals 0
++    __ beq(A6, R0, parameters_done);
++    __ delayed()->nop();
++    __ dsll(AT, A6, Interpreter::logStackElementSize);
++    __ dsubu(SP, SP, AT);
++    __ move(AT, -StackAlignmentInBytes);
++    __ andr(SP, SP , AT);
++    // Copy Java parameters in reverse order (receiver last)
++    // Note that the argument order is inverted in the process
++    Label loop;
++    __ move(T0, A6);
++    __ move(T2, R0);
++    __ bind(loop);
++
++    // get parameter
++    __ dsll(T3, T0, LogBytesPerWord);
++    __ daddu(T3, T3, A5);
++    __ ld(AT, T3,  -wordSize);
++    __ dsll(T3, T2, LogBytesPerWord);
++    __ daddu(T3, T3, SP);
++    __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0));
++    __ daddiu(T2, T2, 1);
++    __ daddiu(T0, T0, -1);
++    __ bne(T0, R0, loop);
++    __ delayed()->nop();
++    // advance to next parameter
++
++    // call Java function
++    __ bind(parameters_done);
++
++    // receiver in V0, methodOop in Rmethod
++
++    __ move(Rmethod, A3);
++    __ move(Rsender, SP);             //set sender sp
++    __ jalr(A4);
++    __ delayed()->nop();
++    return_address = __ pc();
++
++    Label common_return;
++    __ bind(common_return);
++
++    // store result depending on type
++    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
++    __ ld(T0, FP, result_off * wordSize);   // result --> T0
++    Label is_long, is_float, is_double, exit;
++    __ ld(T2, FP, result_type_off * wordSize);  // result_type --> T2
++    __ daddiu(T3, T2, (-1) * T_LONG);
++    __ beq(T3, R0, is_long);
++    __ delayed()->daddiu(T3, T2, (-1) * T_FLOAT);
++    __ beq(T3, R0, is_float);
++    __ delayed()->daddiu(T3, T2, (-1) * T_DOUBLE);
++    __ beq(T3, R0, is_double);
++    __ delayed()->nop();
++
++    // handle T_INT case
++    __ sd(V0, T0, 0 * wordSize);
++    __ bind(exit);
++
++    // restore
++    __ ld(BCP, FP, BCP_off * wordSize);
++    __ ld(LVP, FP, LVP_off * wordSize);
++    __ ld(GP, FP, GP_off * wordSize);
++    __ ld(TSR, FP, TSR_off * wordSize);
++
++    __ ld(S1, FP, S1_off * wordSize);
++    __ ld(S3, FP, S3_off * wordSize);
++    __ ld(S4, FP, S4_off * wordSize);
++    __ ld(S5, FP, S5_off * wordSize);
++    __ ld(S6, FP, S6_off * wordSize);
++
++    __ leave();
++
++    // return
++    __ jr(RA);
++    __ delayed()->nop();
++
++    // handle return types different from T_INT
++    __ bind(is_long);
++    __ sd(V0, T0, 0 * wordSize);
++    __ b(exit);
++    __ delayed()->nop();
++
++    __ bind(is_float);
++    __ swc1(F0, T0, 0 * wordSize);
++    __ b(exit);
++    __ delayed()->nop();
++
++    __ bind(is_double);
++    __ sdc1(F0, T0, 0 * wordSize);
++    __ b(exit);
++    __ delayed()->nop();
++    //FIXME, 1.6 mips version add operation of fpu here
++    StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
++    __ b(common_return);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Return point for a Java call if there's an exception thrown in
++  // Java code.  The exception is caught and transformed into a
++  // pending exception stored in JavaThread that can be tested from
++  // within the VM.
++  //
++  // Note: Usually the parameters are removed by the callee. In case
++  // of an exception crossing an activation frame boundary, that is
++  // not the case if the callee is compiled code => need to setup the
++  // sp.
++  //
++  // V0: exception oop
++
++  address generate_catch_exception() {
++    StubCodeMark mark(this, "StubRoutines", "catch_exception");
++    address start = __ pc();
++
++    Register thread = TREG;
++
++    // get thread directly
++#ifndef OPT_THREAD
++    __ ld(thread, FP, thread_off * wordSize);
++#endif
++
++#ifdef ASSERT
++    // verify that threads correspond
++    { Label L;
++      __ get_thread(T8);
++      __ beq(T8, thread, L);
++      __ delayed()->nop();
++      __ stop("StubRoutines::catch_exception: threads must correspond");
++      __ bind(L);
++    }
++#endif
++    // set pending exception
++    __ verify_oop(V0);
++    __ sd(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ li(AT, (long)__FILE__);
++    __ sd(AT, thread, in_bytes(Thread::exception_file_offset   ()));
++    __ li(AT, (long)__LINE__);
++    __ sd(AT, thread, in_bytes(Thread::exception_line_offset   ()));
++
++    // complete return to VM
++    assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
++    __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Continuation point for runtime calls returning with a pending
++  // exception.  The pending exception check happened in the runtime
++  // or native call stub.  The pending exception in Thread is
++  // converted into a Java-level exception.
++  //
++  // Contract with Java-level exception handlers:
++  // V0: exception
++  // V1: throwing pc
++  //
++  // NOTE: At entry of this stub, exception-pc must be on stack !!
++
++  address generate_forward_exception() {
++    StubCodeMark mark(this, "StubRoutines", "forward exception");
++    //Register thread = TREG;
++    Register thread = TREG;
++    address start = __ pc();
++
++    // Upon entry, the sp points to the return address returning into
++    // Java (interpreted or compiled) code; i.e., the return address
++    // throwing pc.
++    //
++    // Arguments pushed before the runtime call are still on the stack
++    // but the exception handler will reset the stack pointer ->
++    // ignore them.  A potential result in registers can be ignored as
++    // well.
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++#ifdef ASSERT
++    // make sure this code is only executed if there is a pending exception
++    {
++      Label L;
++      __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ bne(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("StubRoutines::forward exception: no pending exception (1)");
++      __ bind(L);
++    }
++#endif
++
++    // compute exception handler into T9
++    __ ld(A1, SP, 0);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++    __ move(T9, V0);
++    __ pop(V1);
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++#ifdef ASSERT
++    // make sure exception is set
++    {
++      Label L;
++      __ bne(V0, R0, L);
++      __ delayed()->nop();
++      __ stop("StubRoutines::forward exception: no pending exception (2)");
++      __ bind(L);
++    }
++#endif
++
++    // continue at exception handler (return address removed)
++    // V0: exception
++    // T9: exception handler
++    // V1: throwing pc
++    __ verify_oop(V0);
++    __ jr(T9);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Non-destructive plausibility checks for oops
++  //
++  address generate_verify_oop() {
++    StubCodeMark mark(this, "StubRoutines", "verify_oop");
++    address start = __ pc();
++    __ reinit_heapbase();
++    __ verify_oop_subroutine();
++    address end = __ pc();
++    return start;
++  }
++
++  //
++  //  Generate overlap test for array copy stubs
++  //
++  //  Input:
++  //     A0    -  array1
++  //     A1    -  array2
++  //     A2    -  element count
++  //
++
++ // use T9 as temp
++  void array_overlap_test(address no_overlap_target, int log2_elem_size) {
++    int elem_size = 1 << log2_elem_size;
++    Address::ScaleFactor sf = Address::times_1;
++
++    switch (log2_elem_size) {
++      case 0: sf = Address::times_1; break;
++      case 1: sf = Address::times_2; break;
++      case 2: sf = Address::times_4; break;
++      case 3: sf = Address::times_8; break;
++    }
++
++    __ dsll(AT, A2, sf);
++    __ daddu(AT, AT, A0);
++    __ daddiu(T9, AT, -elem_size);
++    __ dsubu(AT, A1, A0);
++    __ blez(AT, no_overlap_target);
++    __ delayed()->nop();
++    __ dsubu(AT, A1, T9);
++    __ bgtz(AT, no_overlap_target);
++    __ delayed()->nop();
++
++    // If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target
++    Label L;
++    __ bgez(A0, L);
++    __ delayed()->nop();
++    __ bgtz(A1, no_overlap_target);
++    __ delayed()->nop();
++    __ bind(L);
++
++  }
++
++  //
++  // Generate stub for array fill. If "aligned" is true, the
++  // "to" address is assumed to be heapword aligned.
++  //
++  // Arguments for generated stub:
++  //   to:    c_rarg0
++  //   value: c_rarg1
++  //   count: c_rarg2 treated as signed
++  //
++  address generate_fill(BasicType t, bool aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    const Register to        = A0;  // source array address
++    const Register value     = A1;  // value
++    const Register count     = A2;  // elements count
++
++    const Register cnt_words = T8;  // temp register
++
++    __ enter();
++
++    Label L_fill_elements, L_exit1;
++
++    int shift = -1;
++    switch (t) {
++      case T_BYTE:
++        shift = 0;
++        __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
++        __ dins(value, value, 8, 8);   // 8 bit -> 16 bit
++        __ dins(value, value, 16, 16); // 16 bit -> 32 bit
++        __ bne(AT, R0, L_fill_elements);
++        __ delayed()->nop();
++        break;
++      case T_SHORT:
++        shift = 1;
++        __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
++        __ dins(value, value, 16, 16); // 16 bit -> 32 bit
++        __ bne(AT, R0, L_fill_elements);
++        __ delayed()->nop();
++        break;
++      case T_INT:
++        shift = 2;
++        __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
++        __ bne(AT, R0, L_fill_elements);
++        __ delayed()->nop();
++        break;
++      default: ShouldNotReachHere();
++    }
++
++    // Align source address at 8 bytes address boundary.
++    Label L_skip_align1, L_skip_align2, L_skip_align4;
++    if (!aligned) {
++      switch (t) {
++        case T_BYTE:
++          // One byte misalignment happens only for byte arrays.
++          __ andi(AT, to, 1);
++          __ beq(AT, R0, L_skip_align1);
++          __ delayed()->nop();
++          __ sb(value, to, 0);
++          __ daddiu(to, to, 1);
++          __ addiu32(count, count, -1);
++          __ bind(L_skip_align1);
++          // Fallthrough
++        case T_SHORT:
++          // Two bytes misalignment happens only for byte and short (char) arrays.
++          __ andi(AT, to, 1 << 1);
++          __ beq(AT, R0, L_skip_align2);
++          __ delayed()->nop();
++          __ sh(value, to, 0);
++          __ daddiu(to, to, 2);
++          __ addiu32(count, count, -(2 >> shift));
++          __ bind(L_skip_align2);
++          // Fallthrough
++        case T_INT:
++          // Align to 8 bytes, we know we are 4 byte aligned to start.
++          __ andi(AT, to, 1 << 2);
++          __ beq(AT, R0, L_skip_align4);
++          __ delayed()->nop();
++          __ sw(value, to, 0);
++          __ daddiu(to, to, 4);
++          __ addiu32(count, count, -(4 >> shift));
++          __ bind(L_skip_align4);
++          break;
++        default: ShouldNotReachHere();
++      }
++    }
++
++    //
++    //  Fill large chunks
++    //
++    __ srl(cnt_words, count, 3 - shift); // number of words
++    __ dinsu(value, value, 32, 32);      // 32 bit -> 64 bit
++    __ sll(AT, cnt_words, 3 - shift);
++    __ subu32(count, count, AT);
++
++    Label L_loop_begin, L_loop_not_64bytes_fill, L_loop_end;
++    __ addiu32(AT, cnt_words, -8);
++    __ bltz(AT, L_loop_not_64bytes_fill);
++    __ delayed()->nop();
++    __ bind(L_loop_begin);
++    __ sd(value, to,  0);
++    __ sd(value, to,  8);
++    __ sd(value, to, 16);
++    __ sd(value, to, 24);
++    __ sd(value, to, 32);
++    __ sd(value, to, 40);
++    __ sd(value, to, 48);
++    __ sd(value, to, 56);
++    __ daddiu(to, to, 64);
++    __ addiu32(cnt_words, cnt_words, -8);
++    __ addiu32(AT, cnt_words, -8);
++    __ bgez(AT, L_loop_begin);
++    __ delayed()->nop();
++
++    __ bind(L_loop_not_64bytes_fill);
++    __ beq(cnt_words, R0, L_loop_end);
++    __ delayed()->nop();
++    __ sd(value, to, 0);
++    __ daddiu(to, to, 8);
++    __ addiu32(cnt_words, cnt_words, -1);
++    __ b(L_loop_not_64bytes_fill);
++    __ delayed()->nop();
++    __ bind(L_loop_end);
++
++    // Remaining count is less than 8 bytes. Fill it by a single store.
++    // Note that the total length is no less than 8 bytes.
++    if (t == T_BYTE || t == T_SHORT) {
++      Label L_exit1;
++      __ beq(count, R0, L_exit1);
++      __ delayed()->nop();
++      __ sll(AT, count, shift);
++      __ daddu(to, to, AT); // points to the end
++      __ sd(value, to, -8);    // overwrite some elements
++      __ bind(L_exit1);
++      __ leave();
++      __ jr(RA);
++      __ delayed()->nop();
++    }
++
++    // Handle copies less than 8 bytes.
++    Label L_fill_2, L_fill_4, L_exit2;
++    __ bind(L_fill_elements);
++    switch (t) {
++      case T_BYTE:
++        __ andi(AT, count, 1);
++        __ beq(AT, R0, L_fill_2);
++        __ delayed()->nop();
++        __ sb(value, to, 0);
++        __ daddiu(to, to, 1);
++        __ bind(L_fill_2);
++        __ andi(AT, count, 1 << 1);
++        __ beq(AT, R0, L_fill_4);
++        __ delayed()->nop();
++        __ sh(value, to, 0);
++        __ daddiu(to, to, 2);
++        __ bind(L_fill_4);
++        __ andi(AT, count, 1 << 2);
++        __ beq(AT, R0, L_exit2);
++        __ delayed()->nop();
++        __ sw(value, to, 0);
++        break;
++      case T_SHORT:
++        __ andi(AT, count, 1);
++        __ beq(AT, R0, L_fill_4);
++        __ delayed()->nop();
++        __ sh(value, to, 0);
++        __ daddiu(to, to, 2);
++        __ bind(L_fill_4);
++        __ andi(AT, count, 1 << 1);
++        __ beq(AT, R0, L_exit2);
++        __ delayed()->nop();
++        __ sw(value, to, 0);
++        break;
++      case T_INT:
++        __ beq(count, R0, L_exit2);
++        __ delayed()->nop();
++        __ sw(value, to, 0);
++        break;
++      default: ShouldNotReachHere();
++    }
++    __ bind(L_exit2);
++    __ leave();
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_byte_copy().
++  //
++  address generate_disjoint_byte_copy(bool aligned, const char * name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++
++    Register tmp1 = T0;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++
++    address start = __ pc();
++
++    __ push(tmp1);
++    __ push(tmp2);
++    __ push(tmp3);
++    __ move(tmp1, A0);
++    __ move(tmp2, A1);
++    __ move(tmp3, A2);
++
++
++    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11;
++    Label l_debug;
++
++    __ daddiu(AT, tmp3, -9); //why the number is 9 ?
++    __ blez(AT, l_9);
++    __ delayed()->nop();
++
++    if (!aligned) {
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 1);
++      __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy
++      __ delayed()->nop();
++
++      __ andi(AT, tmp1, 1);
++      __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes
++      __ delayed()->nop();
++
++      __ lb(AT, tmp1, 0);
++      __ daddiu(tmp1, tmp1, 1);
++      __ sb(AT, tmp2, 0);
++      __ daddiu(tmp2, tmp2, 1);
++      __ daddiu(tmp3, tmp3, -1);
++      __ bind(l_10);
++
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 3);
++      __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy
++      __ delayed()->nop();
++
++      // At this point it is guaranteed that both, from and to have the same alignment mod 4.
++
++      // Copy 2 elements if necessary to align to 4 bytes.
++      __ andi(AT, tmp1, 3);
++      __ beq(AT, R0, l_2);
++      __ delayed()->nop();
++
++      __ lhu(AT, tmp1, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(tmp3, tmp3, -2);
++      __ bind(l_2);
++
++      // At this point the positions of both, from and to, are at least 4 byte aligned.
++
++      // Copy 4 elements at a time.
++      // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 7);
++      __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
++      __ delayed()->nop();
++
++      // Copy a 4 elements if necessary to align to 8 bytes.
++      __ andi(AT, tmp1, 7);
++      __ beq(AT, R0, l_7);
++      __ delayed()->nop();
++
++      __ lw(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -4);
++      __ sw(AT, tmp2, 0);
++      { // FasterArrayCopy
++        __ daddiu(tmp1, tmp1, 4);
++        __ daddiu(tmp2, tmp2, 4);
++      }
++    }
++
++    __ bind(l_7);
++
++    // Copy 4 elements at a time; either the loads or the stores can
++    // be unaligned if aligned == false.
++
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -7);
++      __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain
++      __ delayed()->nop();
++
++      __ bind(l_8);
++      // For Loongson, there is 128-bit memory access. TODO
++      __ ld(AT, tmp1, 0);
++      __ sd(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 8);
++      __ daddiu(tmp2, tmp2, 8);
++      __ daddiu(tmp3, tmp3, -8);
++      __ daddiu(AT, tmp3, -8);
++      __ bgez(AT, l_8);
++      __ delayed()->nop();
++    }
++    __ bind(l_6);
++
++    // copy 4 bytes at a time
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -3);
++      __ blez(AT, l_1);
++      __ delayed()->nop();
++
++      __ bind(l_3);
++      __ lw(AT, tmp1, 0);
++      __ sw(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 4);
++      __ daddiu(tmp2, tmp2, 4);
++      __ daddiu(tmp3, tmp3, -4);
++      __ daddiu(AT, tmp3, -4);
++      __ bgez(AT, l_3);
++      __ delayed()->nop();
++
++    }
++
++    // do 2 bytes copy
++    __ bind(l_1);
++    {
++      __ daddiu(AT, tmp3, -1);
++      __ blez(AT, l_9);
++      __ delayed()->nop();
++
++      __ bind(l_5);
++      __ lhu(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -2);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(AT, tmp3, -2);
++      __ bgez(AT, l_5);
++      __ delayed()->nop();
++    }
++
++    //do 1 element copy--byte
++    __ bind(l_9);
++    __ beq(R0, tmp3, l_4);
++    __ delayed()->nop();
++
++    {
++      __ bind(l_11);
++      __ lb(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -1);
++      __ sb(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 1);
++      __ daddiu(tmp2, tmp2, 1);
++      __ daddiu(AT, tmp3, -1);
++      __ bgez(AT, l_11);
++      __ delayed()->nop();
++    }
++
++    __ bind(l_4);
++    __ pop(tmp3);
++    __ pop(tmp2);
++    __ pop(tmp1);
++
++    __ jr(RA);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0   - source array address
++  //   A1   - destination array address
++  //   A2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_byte_copy(bool aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit;
++    Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
++
++    address nooverlap_target = aligned ?
++      StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
++      StubRoutines::jbyte_disjoint_arraycopy();
++
++    array_overlap_test(nooverlap_target, 0);
++
++    const Register from      = A0;   // source array address
++    const Register to        = A1;   // destination array address
++    const Register count     = A2;   // elements count
++    const Register end_from  = T3;   // source array end address
++    const Register end_to    = T0;   // destination array end address
++    const Register end_count = T1;   // destination array end address
++
++    __ push(end_from);
++    __ push(end_to);
++    __ push(end_count);
++    __ push(T8);
++
++    // copy from high to low
++    __ move(end_count, count);
++    __ daddu(end_from, from, end_count);
++    __ daddu(end_to, to, end_count);
++
++    // If end_from and end_to has differante alignment, unaligned copy is performed.
++    __ andi(AT, end_from, 3);
++    __ andi(T8, end_to, 3);
++    __ bne(AT, T8, l_copy_byte);
++    __ delayed()->nop();
++
++    // First deal with the unaligned data at the top.
++    __ bind(l_unaligned);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++
++    __ andi(AT, end_from, 3);
++    __ bne(AT, R0, l_from_unaligned);
++    __ delayed()->nop();
++
++    __ andi(AT, end_to, 3);
++    __ beq(AT, R0, l_4_bytes_aligned);
++    __ delayed()->nop();
++
++    __ bind(l_from_unaligned);
++    __ lb(AT, end_from, -1);
++    __ sb(AT, end_to, -1);
++    __ daddiu(end_from, end_from, -1);
++    __ daddiu(end_to, end_to, -1);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_unaligned);
++    __ delayed()->nop();
++
++    // now end_to, end_from point to 4-byte aligned high-ends
++    //     end_count contains byte count that is not copied.
++    // copy 4 bytes at a time
++    __ bind(l_4_bytes_aligned);
++
++    __ move(T8, end_count);
++    __ daddiu(AT, end_count, -3);
++    __ blez(AT, l_copy_suffix);
++    __ delayed()->nop();
++
++    //__ andi(T8, T8, 3);
++    __ lea(end_from, Address(end_from, -4));
++    __ lea(end_to, Address(end_to, -4));
++
++    __ dsrl(end_count, end_count, 2);
++    __ align(16);
++    __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes
++    __ lw(AT, end_from, 0);
++    __ sw(AT, end_to, 0);
++    __ addiu(end_from, end_from, -4);
++    __ addiu(end_to, end_to, -4);
++    __ addiu(end_count, end_count, -1);
++    __ bne(end_count, R0, l_copy_4_bytes_loop);
++    __ delayed()->nop();
++
++    __ b(l_copy_suffix);
++    __ delayed()->nop();
++    // copy dwords aligned or not with repeat move
++    // l_copy_suffix
++    // copy suffix (0-3 bytes)
++    __ bind(l_copy_suffix);
++    __ andi(T8, T8, 3);
++    __ beq(T8, R0, l_exit);
++    __ delayed()->nop();
++    __ addiu(end_from, end_from, 3);
++    __ addiu(end_to, end_to, 3);
++    __ bind(l_copy_suffix_loop);
++    __ lb(AT, end_from, 0);
++    __ sb(AT, end_to, 0);
++    __ addiu(end_from, end_from, -1);
++    __ addiu(end_to, end_to, -1);
++    __ addiu(T8, T8, -1);
++    __ bne(T8, R0, l_copy_suffix_loop);
++    __ delayed()->nop();
++
++    __ bind(l_copy_byte);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++    __ lb(AT, end_from, -1);
++    __ sb(AT, end_to, -1);
++    __ daddiu(end_from, end_from, -1);
++    __ daddiu(end_to, end_to, -1);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_copy_byte);
++    __ delayed()->nop();
++
++    __ bind(l_exit);
++    __ pop(T8);
++    __ pop(end_count);
++    __ pop(end_to);
++    __ pop(end_from);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Generate stub for disjoint short copy.  If "aligned" is true, the
++  // "from" and "to" addresses are assumed to be heapword aligned.
++  //
++  // Arguments for generated stub:
++  //      from:  A0
++  //      to:    A1
++  //  elm.count: A2 treated as signed
++  //  one element: 2 bytes
++  //
++  // Strategy for aligned==true:
++  //
++  //  If length <= 9:
++  //     1. copy 1 elements at a time (l_5)
++  //
++  //  If length > 9:
++  //     1. copy 4 elements at a time until less than 4 elements are left (l_7)
++  //     2. copy 2 elements at a time until less than 2 elements are left (l_6)
++  //     3. copy last element if one was left in step 2. (l_1)
++  //
++  //
++  // Strategy for aligned==false:
++  //
++  //  If length <= 9: same as aligned==true case
++  //
++  //  If length > 9:
++  //     1. continue with step 7. if the alignment of from and to mod 4
++  //        is different.
++  //     2. align from and to to 4 bytes by copying 1 element if necessary
++  //     3. at l_2 from and to are 4 byte aligned; continue with
++  //        6. if they cannot be aligned to 8 bytes because they have
++  //        got different alignment mod 8.
++  //     4. at this point we know that both, from and to, have the same
++  //        alignment mod 8, now copy one element if necessary to get
++  //        8 byte alignment of from and to.
++  //     5. copy 4 elements at a time until less than 4 elements are
++  //        left; depending on step 3. all load/stores are aligned.
++  //     6. copy 2 elements at a time until less than 2 elements are
++  //        left. (l_6)
++  //     7. copy 1 element at a time. (l_5)
++  //     8. copy last element if one was left in step 6. (l_1)
++
++  address generate_disjoint_short_copy(bool aligned, const char * name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++    Register tmp1 = T0;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T8;
++    Register tmp5 = T9;
++    Register tmp6 = T2;
++
++    address start = __ pc();
++
++    __ push(tmp1);
++    __ push(tmp2);
++    __ push(tmp3);
++    __ move(tmp1, A0);
++    __ move(tmp2, A1);
++    __ move(tmp3, A2);
++
++    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14;
++    Label l_debug;
++    // don't try anything fancy if arrays don't have many elements
++    __ daddiu(AT, tmp3, -23);
++    __ blez(AT, l_14);
++    __ delayed()->nop();
++    // move push here
++    __ push(tmp4);
++    __ push(tmp5);
++    __ push(tmp6);
++
++    if (!aligned) {
++      __ xorr(AT, A0, A1);
++      __ andi(AT, AT, 1);
++      __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen?
++      __ delayed()->nop();
++
++      __ xorr(AT, A0, A1);
++      __ andi(AT, AT, 3);
++      __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy
++      __ delayed()->nop();
++
++      // At this point it is guaranteed that both, from and to have the same alignment mod 4.
++
++      // Copy 1 element if necessary to align to 4 bytes.
++      __ andi(AT, A0, 3);
++      __ beq(AT, R0, l_2);
++      __ delayed()->nop();
++
++      __ lhu(AT, tmp1, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(tmp3, tmp3, -1);
++      __ bind(l_2);
++
++      // At this point the positions of both, from and to, are at least 4 byte aligned.
++
++      // Copy 4 elements at a time.
++      // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 7);
++      __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
++      __ delayed()->nop();
++
++      // Copy a 2-element word if necessary to align to 8 bytes.
++      __ andi(AT, tmp1, 7);
++      __ beq(AT, R0, l_7);
++      __ delayed()->nop();
++
++      __ lw(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -2);
++      __ sw(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 4);
++      __ daddiu(tmp2, tmp2, 4);
++    }// end of if (!aligned)
++
++    __ bind(l_7);
++    // At this time the position of both, from and to, are at least 8 byte aligned.
++    // Copy 8 elemnets at a time.
++    // Align to 16 bytes, but only if both from and to have same alignment mod 8.
++    __ xorr(AT, tmp1, tmp2);
++    __ andi(AT, AT, 15);
++    __ bne(AT, R0, l_9);
++    __ delayed()->nop();
++
++    // Copy 4-element word if necessary to align to 16 bytes,
++    __ andi(AT, tmp1, 15);
++    __ beq(AT, R0, l_10);
++    __ delayed()->nop();
++
++    __ ld(AT, tmp1, 0);
++    __ daddiu(tmp3, tmp3, -4);
++    __ sd(AT, tmp2, 0);
++    __ daddiu(tmp1, tmp1, 8);
++    __ daddiu(tmp2, tmp2, 8);
++
++    __ bind(l_10);
++
++    // Copy 8 elements at a time; either the loads or the stores can
++    // be unalligned if aligned == false
++
++    { // FasterArrayCopy
++      __ bind(l_11);
++      // For loongson the 128-bit memory access instruction is gslq/gssq
++      if (UseLEXT1) {
++        __ gslq(AT, tmp4, tmp1, 0);
++        __ gslq(tmp5, tmp6, tmp1, 16);
++        __ daddiu(tmp1, tmp1, 32);
++        __ daddiu(tmp2, tmp2, 32);
++        __ gssq(AT, tmp4, tmp2, -32);
++        __ gssq(tmp5, tmp6, tmp2, -16);
++      } else {
++        __ ld(AT, tmp1, 0);
++        __ ld(tmp4, tmp1, 8);
++        __ ld(tmp5, tmp1, 16);
++        __ ld(tmp6, tmp1, 24);
++        __ daddiu(tmp1, tmp1, 32);
++        __ sd(AT, tmp2, 0);
++        __ sd(tmp4, tmp2, 8);
++        __ sd(tmp5, tmp2, 16);
++        __ sd(tmp6, tmp2, 24);
++        __ daddiu(tmp2, tmp2, 32);
++      }
++      __ daddiu(tmp3, tmp3, -16);
++      __ daddiu(AT, tmp3, -16);
++      __ bgez(AT, l_11);
++      __ delayed()->nop();
++    }
++    __ bind(l_9);
++
++    // Copy 4 elements at a time; either the loads or the stores can
++    // be unaligned if aligned == false.
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16
++      __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain
++      __ delayed()->nop();
++
++      __ bind(l_8);
++      __ ld(AT, tmp1, 0);
++      __ ld(tmp4, tmp1, 8);
++      __ ld(tmp5, tmp1, 16);
++      __ ld(tmp6, tmp1, 24);
++      __ sd(AT, tmp2, 0);
++      __ sd(tmp4, tmp2, 8);
++      __ sd(tmp5, tmp2,16);
++      __ daddiu(tmp1, tmp1, 32);
++      __ daddiu(tmp2, tmp2, 32);
++      __ daddiu(tmp3, tmp3, -16);
++      __ daddiu(AT, tmp3, -16);
++      __ bgez(AT, l_8);
++      __ delayed()->sd(tmp6, tmp2, -8);
++    }
++    __ bind(l_6);
++
++    // copy 2 element at a time
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -7);
++      __ blez(AT, l_4);
++      __ delayed()->nop();
++
++      __ bind(l_3);
++      __ lw(AT, tmp1, 0);
++      __ lw(tmp4, tmp1, 4);
++      __ lw(tmp5, tmp1, 8);
++      __ lw(tmp6, tmp1, 12);
++      __ sw(AT, tmp2, 0);
++      __ sw(tmp4, tmp2, 4);
++      __ sw(tmp5, tmp2, 8);
++      __ daddiu(tmp1, tmp1, 16);
++      __ daddiu(tmp2, tmp2, 16);
++      __ daddiu(tmp3, tmp3, -8);
++      __ daddiu(AT, tmp3, -8);
++      __ bgez(AT, l_3);
++      __ delayed()->sw(tmp6, tmp2, -4);
++    }
++
++    __ bind(l_1);
++    // do single element copy (8 bit), can this happen?
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -3);
++      __ blez(AT, l_4);
++      __ delayed()->nop();
++
++      __ bind(l_5);
++      __ lhu(AT, tmp1, 0);
++      __ lhu(tmp4, tmp1, 2);
++      __ lhu(tmp5, tmp1, 4);
++      __ lhu(tmp6, tmp1, 6);
++      __ sh(AT, tmp2, 0);
++      __ sh(tmp4, tmp2, 2);
++      __ sh(tmp5, tmp2, 4);
++      __ daddiu(tmp1, tmp1, 8);
++      __ daddiu(tmp2, tmp2, 8);
++      __ daddiu(tmp3, tmp3, -4);
++      __ daddiu(AT, tmp3, -4);
++      __ bgez(AT, l_5);
++      __ delayed()->sh(tmp6, tmp2, -2);
++    }
++    // single element
++    __ bind(l_4);
++
++    __ pop(tmp6);
++    __ pop(tmp5);
++    __ pop(tmp4);
++
++    __ bind(l_14);
++    { // FasterArrayCopy
++      __ beq(R0, tmp3, l_13);
++      __ delayed()->nop();
++
++      __ bind(l_12);
++      __ lhu(AT, tmp1, 0);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(tmp3, tmp3, -1);
++      __ daddiu(AT, tmp3, -1);
++      __ bgez(AT, l_12);
++      __ delayed()->nop();
++    }
++
++    __ bind(l_13);
++    __ pop(tmp3);
++    __ pop(tmp2);
++    __ pop(tmp1);
++
++    __ jr(RA);
++    __ delayed()->nop();
++
++    __ bind(l_debug);
++    __ stop("generate_disjoint_short_copy should not reach here");
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_short_copy(bool aligned, const char *name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    Label l_exit, l_copy_short, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
++
++    address nooverlap_target = aligned ?
++            StubRoutines::arrayof_jshort_disjoint_arraycopy() :
++            StubRoutines::jshort_disjoint_arraycopy();
++
++    array_overlap_test(nooverlap_target, 1);
++
++    const Register from      = A0;   // source array address
++    const Register to        = A1;   // destination array address
++    const Register count     = A2;   // elements count
++    const Register end_from  = T3;   // source array end address
++    const Register end_to    = T0;   // destination array end address
++    const Register end_count = T1;   // destination array end address
++
++    __ push(end_from);
++    __ push(end_to);
++    __ push(end_count);
++    __ push(T8);
++
++    // copy from high to low
++    __ move(end_count, count);
++    __ sll(AT, end_count, Address::times_2);
++    __ daddu(end_from, from, AT);
++    __ daddu(end_to, to, AT);
++
++    // If end_from and end_to has differante alignment, unaligned copy is performed.
++    __ andi(AT, end_from, 3);
++    __ andi(T8, end_to, 3);
++    __ bne(AT, T8, l_copy_short);
++    __ delayed()->nop();
++
++    // First deal with the unaligned data at the top.
++    __ bind(l_unaligned);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++
++    __ andi(AT, end_from, 3);
++    __ bne(AT, R0, l_from_unaligned);
++    __ delayed()->nop();
++
++    __ andi(AT, end_to, 3);
++    __ beq(AT, R0, l_4_bytes_aligned);
++    __ delayed()->nop();
++
++    // Copy 1 element if necessary to align to 4 bytes.
++    __ bind(l_from_unaligned);
++    __ lhu(AT, end_from, -2);
++    __ sh(AT, end_to, -2);
++    __ daddiu(end_from, end_from, -2);
++    __ daddiu(end_to, end_to, -2);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_unaligned);
++    __ delayed()->nop();
++
++    // now end_to, end_from point to 4-byte aligned high-ends
++    //     end_count contains byte count that is not copied.
++    // copy 4 bytes at a time
++    __ bind(l_4_bytes_aligned);
++
++    __ daddiu(AT, end_count, -1);
++    __ blez(AT, l_copy_short);
++    __ delayed()->nop();
++
++    __ lw(AT, end_from, -4);
++    __ sw(AT, end_to, -4);
++    __ addiu(end_from, end_from, -4);
++    __ addiu(end_to, end_to, -4);
++    __ addiu(end_count, end_count, -2);
++    __ b(l_4_bytes_aligned);
++    __ delayed()->nop();
++
++    // copy 1 element at a time
++    __ bind(l_copy_short);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++    __ lhu(AT, end_from, -2);
++    __ sh(AT, end_to, -2);
++    __ daddiu(end_from, end_from, -2);
++    __ daddiu(end_to, end_to, -2);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_copy_short);
++    __ delayed()->nop();
++
++    __ bind(l_exit);
++    __ pop(T8);
++    __ pop(end_count);
++    __ pop(end_to);
++    __ pop(end_from);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_3, l_4, l_5, l_6, l_7;
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    if(!aligned) {
++      __ xorr(AT, T3, T0);
++      __ andi(AT, AT, 7);
++      __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time
++      __ delayed()->nop();
++
++      __ andi(AT, T3, 7);
++      __ beq(AT, R0, l_6); //copy 2 elements each time
++      __ delayed()->nop();
++
++      __ lw(AT, T3, 0);
++      __ daddiu(T1, T1, -1);
++      __ sw(AT, T0, 0);
++      __ daddiu(T3, T3, 4);
++      __ daddiu(T0, T0, 4);
++    }
++
++    {
++      __ bind(l_6);
++      __ daddiu(AT, T1, -1);
++      __ blez(AT, l_5);
++      __ delayed()->nop();
++
++      __ bind(l_7);
++      __ ld(AT, T3, 0);
++      __ sd(AT, T0, 0);
++      __ daddiu(T3, T3, 8);
++      __ daddiu(T0, T0, 8);
++      __ daddiu(T1, T1, -2);
++      __ daddiu(AT, T1, -2);
++      __ bgez(AT, l_7);
++      __ delayed()->nop();
++    }
++
++    __ bind(l_5);
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_3);
++    __ lw(AT, T3, 0);
++    __ sw(AT, T0, 0);
++    __ addiu(T3, T3, 4);
++    __ addiu(T0, T0, 4);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_3);
++    __ delayed()->nop();
++
++    // exit
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_2, l_4;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    address nooverlap_target;
++
++    if (is_oop) {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_oop_disjoint_arraycopy() :
++              StubRoutines::oop_disjoint_arraycopy();
++    } else {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_jint_disjoint_arraycopy() :
++              StubRoutines::jint_disjoint_arraycopy();
++    }
++
++    array_overlap_test(nooverlap_target, 2);
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    // no registers are destroyed by this call
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    // T3: source array address
++    // T0: destination array address
++    // T1: element count
++
++    __ sll(AT, T1, Address::times_4);
++    __ addu(AT, T3, AT);
++    __ daddiu(T3, AT, -4);
++    __ sll(AT, T1, Address::times_4);
++    __ addu(AT, T0, AT);
++    __ daddiu(T0, AT, -4);
++
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_2);
++    __ lw(AT, T3, 0);
++    __ sw(AT, T0, 0);
++    __ addiu(T3, T3, -4);
++    __ addiu(T0, T0, -4);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_2);
++    __ delayed()->nop();
++
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_3, l_4;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    // T3: source array address
++    // T0: destination array address
++    // T1: element count
++
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_3);
++    __ ld(AT, T3, 0);
++    __ sd(AT, T0, 0);
++    __ addiu(T3, T3, 8);
++    __ addiu(T0, T0, 8);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_3);
++    __ delayed()->nop();
++
++    // exit
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_2, l_4;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    address nooverlap_target;
++
++    if (is_oop) {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_oop_disjoint_arraycopy() :
++              StubRoutines::oop_disjoint_arraycopy();
++    } else {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_jlong_disjoint_arraycopy() :
++              StubRoutines::jlong_disjoint_arraycopy();
++    }
++
++    array_overlap_test(nooverlap_target, 3);
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T3, AT);
++    __ daddiu(T3, AT, -8);
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T0, AT);
++    __ daddiu(T0, AT, -8);
++
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_2);
++    __ ld(AT, T3, 0);
++    __ sd(AT, T0, 0);
++    __ addiu(T3, T3, -8);
++    __ addiu(T0, T0, -8);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_2);
++    __ delayed()->nop();
++
++    // exit
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  //FIXME
++  address generate_disjoint_long_copy(bool aligned, const char *name) {
++    Label l_1, l_2;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ b(l_2);
++    __ delayed()->nop();
++    __ align(16);
++    __ bind(l_1);
++    __ ld(AT, T3, 0);
++    __ sd (AT, T0, 0);
++    __ addiu(T3, T3, 8);
++    __ addiu(T0, T0, 8);
++    __ bind(l_2);
++    __ addiu(T1, T1, -1);
++    __ bgez(T1, l_1);
++    __ delayed()->nop();
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++
++  address generate_conjoint_long_copy(bool aligned, const char *name) {
++    Label l_1, l_2;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    address nooverlap_target = aligned ?
++      StubRoutines::arrayof_jlong_disjoint_arraycopy() :
++      StubRoutines::jlong_disjoint_arraycopy();
++    array_overlap_test(nooverlap_target, 3);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T3, AT);
++    __ daddiu(T3, AT, -8);
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T0, AT);
++    __ daddiu(T0, AT, -8);
++
++    __ b(l_2);
++    __ delayed()->nop();
++    __ align(16);
++    __ bind(l_1);
++    __ ld(AT, T3, 0);
++    __ sd (AT, T0, 0);
++    __ addiu(T3, T3, -8);
++    __ addiu(T0, T0,-8);
++    __ bind(l_2);
++    __ addiu(T1, T1, -1);
++    __ bgez(T1, l_1);
++    __ delayed()->nop();
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  void generate_arraycopy_stubs() {
++    if (UseCompressedOops) {
++      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_int_oop_copy(false, true,
++                                                                                      "oop_disjoint_arraycopy");
++      StubRoutines::_oop_arraycopy                   = generate_conjoint_int_oop_copy(false, true,
++                                                                                      "oop_arraycopy");
++      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_int_oop_copy(false, true,
++                                                                                      "oop_disjoint_arraycopy_uninit", true);
++      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_int_oop_copy(false, true,
++                                                                                      "oop_arraycopy_uninit", true);
++    } else {
++      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_long_oop_copy(false, true,
++                                                                                       "oop_disjoint_arraycopy");
++      StubRoutines::_oop_arraycopy                   = generate_conjoint_long_oop_copy(false, true,
++                                                                                       "oop_arraycopy");
++      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_long_oop_copy(false, true,
++                                                                                       "oop_disjoint_arraycopy_uninit", true);
++      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_long_oop_copy(false, true,
++                                                                                       "oop_arraycopy_uninit", true);
++    }
++
++    StubRoutines::_jbyte_disjoint_arraycopy          = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
++    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
++    StubRoutines::_jint_disjoint_arraycopy           = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
++    StubRoutines::_jlong_disjoint_arraycopy          = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
++
++    StubRoutines::_jbyte_arraycopy  = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
++    StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
++    StubRoutines::_jint_arraycopy   = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
++    StubRoutines::_jlong_arraycopy  = generate_conjoint_long_copy(false, "jlong_arraycopy");
++
++    // We don't generate specialized code for HeapWord-aligned source
++    // arrays, so just use the code we've already generated
++    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
++    StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
++
++    StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
++    StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
++
++    StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
++    StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
++
++    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
++    StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
++    StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
++    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
++
++    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
++    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
++    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
++    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
++    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
++    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
++  }
++
++  // add a function to implement SafeFetch32 and SafeFetchN
++  void generate_safefetch(const char* name, int size, address* entry,
++                          address* fault_pc, address* continuation_pc) {
++    // safefetch signatures:
++    //   int      SafeFetch32(int*      adr, int      errValue);
++    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
++    //
++    // arguments:
++    //   A0 = adr
++    //   A1 = errValue
++    //
++    // result:
++    //   PPC_RET  = *adr or errValue
++
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    // Entry point, pc or function descriptor.
++    *entry = __ pc();
++
++    // Load *adr into A1, may fault.
++    *fault_pc = __ pc();
++    switch (size) {
++      case 4:
++        // int32_t
++        __ lw(A1, A0, 0);
++        break;
++      case 8:
++        // int64_t
++        __ ld(A1, A0, 0);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++
++    // return errValue or *adr
++    *continuation_pc = __ pc();
++    __ addu(V0,A1,R0);
++    __ jr(RA);
++    __ delayed()->nop();
++  }
++
++
++#undef __
++#define __ masm->
++
++  // Continuation point for throwing of implicit exceptions that are
++  // not handled in the current activation. Fabricates an exception
++  // oop and initiates normal exception dispatching in this
++  // frame. Since we need to preserve callee-saved values (currently
++  // only for C2, but done for C1 as well) we need a callee-saved oop
++  // map and therefore have to make these stubs into RuntimeStubs
++  // rather than BufferBlobs.  If the compiler needs all registers to
++  // be preserved between the fault point and the exception handler
++  // then it must assume responsibility for that in
++  // AbstractCompiler::continuation_for_implicit_null_exception or
++  // continuation_for_implicit_division_by_zero_exception. All other
++  // implicit exceptions (e.g., NullPointerException or
++  // AbstractMethodError on entry) are either at call sites or
++  // otherwise assume that stack unwinding will be initiated, so
++  // caller saved registers were assumed volatile in the compiler.
++  address generate_throw_exception(const char* name,
++                                   address runtime_entry,
++                                   bool restore_saved_exception_pc) {
++    // Information about frame layout at time of blocking runtime call.
++    // Note that we only have to preserve callee-saved registers since
++    // the compilers are responsible for supplying a continuation point
++    // if they expect all registers to be preserved.
++    enum layout {
++      thread_off,    // last_java_sp
++      S7_off,        // callee saved register      sp + 1
++      S6_off,        // callee saved register      sp + 2
++      S5_off,        // callee saved register      sp + 3
++      S4_off,        // callee saved register      sp + 4
++      S3_off,        // callee saved register      sp + 5
++      S2_off,        // callee saved register      sp + 6
++      S1_off,        // callee saved register      sp + 7
++      S0_off,        // callee saved register      sp + 8
++      FP_off,
++      ret_address,
++      framesize
++    };
++
++    int insts_size = 2048;
++    int locs_size  = 32;
++
++    //  CodeBuffer* code     = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
++    //  NULL, NULL, NULL, false, NULL, name, false);
++    CodeBuffer code (name , insts_size, locs_size);
++    OopMapSet* oop_maps  = new OopMapSet();
++    MacroAssembler* masm = new MacroAssembler(&code);
++
++    address start = __ pc();
++
++    // This is an inlined and slightly modified version of call_VM
++    // which has the ability to fetch the return PC out of
++    // thread-local storage and also sets up last_Java_sp slightly
++    // differently than the real call_VM
++#ifndef OPT_THREAD
++    Register java_thread = TREG;
++    __ get_thread(java_thread);
++#else
++    Register java_thread = TREG;
++#endif
++    if (restore_saved_exception_pc) {
++      __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    }
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ addiu(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
++    __ sd(S0, SP, S0_off * wordSize);
++    __ sd(S1, SP, S1_off * wordSize);
++    __ sd(S2, SP, S2_off * wordSize);
++    __ sd(S3, SP, S3_off * wordSize);
++    __ sd(S4, SP, S4_off * wordSize);
++    __ sd(S5, SP, S5_off * wordSize);
++    __ sd(S6, SP, S6_off * wordSize);
++    __ sd(S7, SP, S7_off * wordSize);
++
++    int frame_complete = __ pc() - start;
++    // push java thread (becomes first argument of C function)
++    __ sd(java_thread, SP, thread_off * wordSize);
++    if (java_thread != A0)
++      __ move(A0, java_thread);
++
++    // Set up last_Java_sp and last_Java_fp
++    __ set_last_Java_frame(java_thread, SP, FP, NULL);
++    // Align stack
++    __ set64(AT, -(StackAlignmentInBytes));
++    __ andr(SP, SP, AT);
++
++    __ relocate(relocInfo::internal_pc_type);
++    {
++      intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
++      __ patchable_set48(AT, save_pc);
++    }
++    __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
++
++    // Call runtime
++    __ call(runtime_entry);
++    __ delayed()->nop();
++    // Generate oop map
++    OopMap* map =  new OopMap(framesize, 0);
++    oop_maps->add_gc_map(__ offset(),  map);
++
++    // restore the thread (cannot use the pushed argument since arguments
++    // may be overwritten by C code generated by an optimizing compiler);
++    // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++    __ get_thread(java_thread);
++#endif
++
++    __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++    __ reset_last_Java_frame(java_thread, true);
++
++    // Restore callee save registers.  This must be done after resetting the Java frame
++    __ ld(S0, SP, S0_off * wordSize);
++    __ ld(S1, SP, S1_off * wordSize);
++    __ ld(S2, SP, S2_off * wordSize);
++    __ ld(S3, SP, S3_off * wordSize);
++    __ ld(S4, SP, S4_off * wordSize);
++    __ ld(S5, SP, S5_off * wordSize);
++    __ ld(S6, SP, S6_off * wordSize);
++    __ ld(S7, SP, S7_off * wordSize);
++
++    // discard arguments
++    __ move(SP, FP); // epilog
++    __ pop(FP);
++
++    // check for pending exceptions
++#ifdef ASSERT
++    Label L;
++    __ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ should_not_reach_here();
++    __ bind(L);
++#endif //ASSERT
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name,
++                                                      &code,
++                                                      frame_complete,
++                                                      framesize,
++                                                      oop_maps, false);
++    return stub->entry_point();
++  }
++
++  // Initialization
++  void generate_initial() {
++    // Generates all stubs and initializes the entry points
++
++    //-------------------------------------------------------------
++    //-----------------------------------------------------------
++    // entry points that exist in all platforms
++    // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
++    // than the disadvantage of having a much more complicated generator structure.
++    // See also comment in stubRoutines.hpp.
++    StubRoutines::_forward_exception_entry = generate_forward_exception();
++    StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
++    // is referenced by megamorphic call
++    StubRoutines::_catch_exception_entry = generate_catch_exception();
++
++    StubRoutines::_throw_StackOverflowError_entry =
++      generate_throw_exception("StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),
++                               false);
++    StubRoutines::_throw_delayed_StackOverflowError_entry =
++      generate_throw_exception("delayed StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError),
++                               false);
++  }
++
++  void generate_all() {
++    // Generates all stubs and initializes the entry points
++
++    // These entry points require SharedInfo::stack0 to be set up in
++    // non-core builds and need to be relocatable, so they each
++    // fabricate a RuntimeStub internally.
++    StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
++
++    StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false);
++
++    StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception",
++                                                                                        CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
++
++    // entry points that are platform specific
++
++    // support for verify_oop (must happen after universe_init)
++    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
++#ifndef CORE
++    // arraycopy stubs used by compilers
++    generate_arraycopy_stubs();
++#endif
++
++    // Safefetch stubs.
++    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
++                                                       &StubRoutines::_safefetch32_fault_pc,
++                                                       &StubRoutines::_safefetch32_continuation_pc);
++    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
++                                                       &StubRoutines::_safefetchN_fault_pc,
++                                                       &StubRoutines::_safefetchN_continuation_pc);
++
++#ifdef COMPILER2
++    if (UseMontgomeryMultiplyIntrinsic) {
++      StubRoutines::_montgomeryMultiply
++        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
++    }
++    if (UseMontgomerySquareIntrinsic) {
++      StubRoutines::_montgomerySquare
++        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
++    }
++#endif
++  }
++
++ public:
++  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
++    if (all) {
++      generate_all();
++    } else {
++      generate_initial();
++    }
++  }
++}; // end class declaration
++
++void StubGenerator_generate(CodeBuffer* code, bool all) {
++  StubGenerator g(code, all);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp
+--- a/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++
++// a description of how to extend it, see the stubRoutines.hpp file.
++
++//find the last fp value
++address StubRoutines::gs2::_call_stub_compiled_return                        = NULL;
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/stubRoutines_mips.hpp b/src/hotspot/cpu/mips/stubRoutines_mips.hpp
+--- a/src/hotspot/cpu/mips/stubRoutines_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/stubRoutines_mips.hpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
++#define CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
++
++// This file holds the platform specific parts of the StubRoutines
++// definition. See stubRoutines.hpp for a description on how to
++// extend it.
++
++static bool    returns_to_call_stub(address return_pc){
++  return return_pc == _call_stub_return_address||return_pc == gs2::get_call_stub_compiled_return();
++}
++
++enum platform_dependent_constants {
++  code_size1 = 20000,    // simply increase if too small (assembler will crash if too small)
++  code_size2 = 40000    // simply increase if too small (assembler will crash if too small)
++};
++
++class gs2 {
++  friend class StubGenerator;
++  friend class VMStructs;
++ private:
++  // If we call compiled code directly from the call stub we will
++  // need to adjust the return back to the call stub to a specialized
++  // piece of code that can handle compiled results and cleaning the fpu
++  // stack. The variable holds that location.
++  static address _call_stub_compiled_return;
++
++public:
++  // Call back points for traps in compiled code
++  static address get_call_stub_compiled_return()    { return _call_stub_compiled_return; }
++  static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; }
++
++};
++
++#endif // CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp
+--- a/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,2149 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateInterpreterGenerator.hpp"
++#include "interpreter/templateTable.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++int TemplateInterpreter::InterpreterCodeSize = 500 * K;
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++address TemplateInterpreterGenerator::generate_slow_signature_handler() {
++  address entry = __ pc();
++
++  // Rmethod: method
++  // LVP: pointer to locals
++  // A3: first stack arg
++  __ move(A3, SP);
++  __ daddiu(SP, SP, -10 * wordSize);
++  __ sd(RA, SP, 0);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::slow_signature_handler),
++             Rmethod, LVP, A3);
++
++  // V0: result handler
++
++  // Stack layout:
++  //        ...
++  //     10 stack arg0      <--- old sp
++  //      9 float/double identifiers
++  //      8 register arg7
++  //        ...
++  //      2 register arg1
++  //      1 aligned slot
++  // SP:  0 return address
++
++  // Do FP first so we can use T3 as temp
++  __ ld(T3, Address(SP, 9 * wordSize)); // float/double identifiers
++
++  // A0 is for env.
++  // If the mothed is not static, A1 will be corrected in generate_native_entry.
++  for ( int i = 1; i < Argument::n_register_parameters; i++ ) {
++    Register reg = as_Register(i + A0->encoding());
++    FloatRegister floatreg = as_FloatRegister(i + F12->encoding());
++    Label isfloatordouble, isdouble, next;
++
++    __ andi(AT, T3, 1 << (i*2)); // Float or Double?
++    __ bne(AT, R0, isfloatordouble);
++    __ delayed()->nop();
++
++    // Do Int register here
++    __ ld(reg, SP, (1 + i) * wordSize);
++    __ b (next);
++    __ delayed()->nop();
++
++    __ bind(isfloatordouble);
++    __ andi(AT, T3, 1 << ((i*2)+1)); // Double?
++    __ bne(AT, R0, isdouble);
++    __ delayed()->nop();
++
++    // Do Float Here
++    __ lwc1(floatreg, SP, (1 + i) * wordSize);
++    __ b(next);
++    __ delayed()->nop();
++
++    // Do Double here
++    __ bind(isdouble);
++    __ ldc1(floatreg, SP, (1 + i) * wordSize);
++
++    __ bind(next);
++  }
++
++  __ ld(RA, SP, 0);
++  __ daddiu(SP, SP, 10 * wordSize);
++  __ jr(RA);
++  __ delayed()->nop();
++  return entry;
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.update(int crc, int b)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++    Unimplemented();
++    return entry;
++  }
++  return NULL;
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
++ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++    Unimplemented();
++    return entry;
++  }
++  return NULL;
++}
++
++/**
++* Method entry for static (non-native) methods:
++*   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
++*   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end)
++*/
++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  if (UseCRC32CIntrinsics) {
++    address entry = __ pc();
++    Unimplemented();
++    return entry;
++  }
++  return NULL;
++}
++
++//
++// Various method entries
++//
++
++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
++  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
++
++  // These don't need a safepoint check because they aren't virtually
++  // callable. We won't enter these intrinsics from compiled code.
++  // If in the future we added an intrinsic which was virtually callable
++  // we'd have to worry about how to safepoint so that this code is used.
++
++  // mathematical functions inlined by compiler
++  // (interpreter must provide identical implementation
++  // in order to avoid monotonicity bugs when switching
++  // from interpreter to compiler in the middle of some
++  // computation)
++  //
++  // stack:
++  //        [ arg ] <-- sp
++  //        [ arg ]
++  // retaddr in ra
++
++  address entry_point = NULL;
++  switch (kind) {
++  case Interpreter::java_lang_math_abs:
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 0);
++    __ abs_d(F0, F12);
++    __ move(SP, Rsender);
++    break;
++  case Interpreter::java_lang_math_sqrt:
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 0);
++    __ sqrt_d(F0, F12);
++    __ move(SP, Rsender);
++    break;
++  case Interpreter::java_lang_math_sin :
++  case Interpreter::java_lang_math_cos :
++  case Interpreter::java_lang_math_tan :
++  case Interpreter::java_lang_math_log :
++  case Interpreter::java_lang_math_log10 :
++  case Interpreter::java_lang_math_exp :
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 0);
++    __ move(SP, Rsender);
++    __ dmtc1(RA, F24);
++    __ dmtc1(SP, F25);
++    __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes));
++    generate_transcendental_entry(kind, 1);
++    __ dmfc1(SP, F25);
++    __ dmfc1(RA, F24);
++    break;
++  case Interpreter::java_lang_math_pow :
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 2 * Interpreter::stackElementSize);
++    __ ldc1(F13, SP, 0);
++    __ move(SP, Rsender);
++    __ dmtc1(RA, F24);
++    __ dmtc1(SP, F25);
++    __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes));
++    generate_transcendental_entry(kind, 2);
++    __ dmfc1(SP, F25);
++    __ dmfc1(RA, F24);
++    break;
++  case Interpreter::java_lang_math_fmaD :
++    if (UseFMA) {
++      entry_point = __ pc();
++      __ ldc1(F12, SP, 4 * Interpreter::stackElementSize);
++      __ ldc1(F13, SP, 2 * Interpreter::stackElementSize);
++      __ ldc1(F14, SP, 0);
++      __ madd_d(F0, F14, F13, F12);
++      __ move(SP, Rsender);
++    }
++    break;
++  case Interpreter::java_lang_math_fmaF :
++    if (UseFMA) {
++      entry_point = __ pc();
++      __ lwc1(F12, SP, 2 * Interpreter::stackElementSize);
++      __ lwc1(F13, SP, Interpreter::stackElementSize);
++      __ lwc1(F14, SP, 0);
++      __ madd_s(F0, F14, F13, F12);
++      __ move(SP, Rsender);
++    }
++    break;
++  default:
++    ;
++  }
++  if (entry_point) {
++    __ jr(RA);
++    __ delayed()->nop();
++  }
++
++  return entry_point;
++}
++
++  // double trigonometrics and transcendentals
++  // static jdouble dsin(jdouble x);
++  // static jdouble dcos(jdouble x);
++  // static jdouble dtan(jdouble x);
++  // static jdouble dlog(jdouble x);
++  // static jdouble dlog10(jdouble x);
++  // static jdouble dexp(jdouble x);
++  // static jdouble dpow(jdouble x, jdouble y);
++
++void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) {
++  address fn;
++  switch (kind) {
++  case Interpreter::java_lang_math_sin :
++    if (StubRoutines::dsin() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin());
++    }
++    break;
++  case Interpreter::java_lang_math_cos :
++    if (StubRoutines::dcos() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos());
++    }
++    break;
++  case Interpreter::java_lang_math_tan :
++    if (StubRoutines::dtan() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan());
++    }
++    break;
++  case Interpreter::java_lang_math_log :
++    if (StubRoutines::dlog() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog());
++    }
++    break;
++  case Interpreter::java_lang_math_log10 :
++    if (StubRoutines::dlog10() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10());
++    }
++    break;
++  case Interpreter::java_lang_math_exp :
++    if (StubRoutines::dexp() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp());
++    }
++    break;
++  case Interpreter::java_lang_math_pow :
++    if (StubRoutines::dpow() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow());
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++    fn = NULL;  // unreachable
++  }
++  __ li(T9, fn);
++  __ jalr(T9);
++  __ delayed()->nop();
++}
++
++// Abstract method entry
++// Attempt to execute abstract method. Throw exception
++address TemplateInterpreterGenerator::generate_abstract_entry(void) {
++
++  // Rmethod: methodOop
++  // V0: receiver (unused)
++  // Rsender : sender 's sp
++  address entry_point = __ pc();
++
++  // abstract method entry
++  // throw exception
++  // adjust stack to what a normal return would do
++  __ empty_expression_stack();
++  __ restore_bcp();
++  __ restore_locals();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  return entry_point;
++}
++
++
++const int method_offset = frame::interpreter_frame_method_offset * wordSize;
++const int bci_offset    = frame::interpreter_frame_bcp_offset    * wordSize;
++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize;
++
++//-----------------------------------------------------------------------------
++
++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
++  address entry = __ pc();
++
++#ifdef ASSERT
++  {
++    Label L;
++    __ addiu(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ subu(T1, T1, SP); // T1 = maximal sp for current fp
++    __ bgez(T1, L);     // check if frame is complete
++    __ delayed()->nop();
++    __ stop("interpreter frame not set up");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // Restore bcp under the assumption that the current frame is still
++  // interpreted
++  // FIXME: please change the func restore_bcp
++  // S0 is the conventional register for bcp
++  __ restore_bcp();
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // throw exception
++  // FIXME: why do not pass parameter thread ?
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
++  address entry = __ pc();
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // ??? convention: expect array in register A1
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
++  address entry = __ pc();
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException),  FSR);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_exception_handler_common(
++        const char* name, const char* message, bool pass_oop) {
++  assert(!pass_oop || message == NULL, "either oop or message but not both");
++  address entry = __ pc();
++
++  // expression stack must be empty before entering the VM if an exception happened
++  __ empty_expression_stack();
++  // setup parameters
++  __ li(A1, (long)name);
++  if (pass_oop) {
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR);
++  } else {
++    __ li(A2, (long)message);
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2);
++  }
++  // throw exception
++  __ jmp(Interpreter::throw_exception_entry(), relocInfo::none);
++  __ delayed()->nop();
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
++
++  address entry = __ pc();
++
++  // Restore stack bottom in case i2c adjusted stack
++  __ ld(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
++  // and NULL it as marker that sp is now tos until next java call
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  __ restore_bcp();
++  __ restore_locals();
++
++  // mdp: T8
++  // ret: FSR
++  // tmp: T9
++  if (state == atos) {
++    Register mdp = T8;
++    Register tmp = T9;
++    __ profile_return_type(mdp, FSR, tmp);
++  }
++
++
++  const Register cache = T9;
++  const Register index = T3;
++  __ get_cache_and_index_at_bcp(cache, index, 1, index_size);
++
++  const Register flags = cache;
++  __ dsll(AT, index, Address::times_ptr);
++  __ daddu(AT, cache, AT);
++  __ lw(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++  __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask);
++  __ dsll(AT, flags, Interpreter::logStackElementSize);
++  __ daddu(SP, SP, AT);
++
++  Register java_thread;
++#ifndef OPT_THREAD
++    java_thread = T9;
++    __ get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++
++  __ check_and_handle_popframe(java_thread);
++  __ check_and_handle_earlyret(java_thread);
++
++  __ dispatch_next(state, step);
++
++  return entry;
++}
++
++
++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
++                                                               int step,
++                                                               address continuation) {
++  address entry = __ pc();
++  // NULL last_sp until next java call
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ restore_bcp();
++  __ restore_locals();
++  // handle exceptions
++  {
++    Label L;
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++  if (continuation == NULL) {
++    __ dispatch_next(state, step);
++  } else {
++    __ jump_to_entry(continuation);
++    __ delayed()->nop();
++  }
++  return entry;
++}
++
++int AbstractInterpreter::BasicType_as_index(BasicType type) {
++  int i = 0;
++  switch (type) {
++    case T_BOOLEAN: i = 0; break;
++    case T_CHAR   : i = 1; break;
++    case T_BYTE   : i = 2; break;
++    case T_SHORT  : i = 3; break;
++    case T_INT    : // fall through
++    case T_LONG   : // fall through
++    case T_VOID   : i = 4; break;
++    case T_FLOAT  : i = 5; break;
++    case T_DOUBLE : i = 6; break;
++    case T_OBJECT : // fall through
++    case T_ARRAY  : i = 7; break;
++    default       : ShouldNotReachHere();
++  }
++  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
++         "index out of bounds");
++  return i;
++}
++
++
++address TemplateInterpreterGenerator::generate_result_handler_for(
++        BasicType type) {
++  address entry = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ c2bool(V0);             break;
++    case T_CHAR   : __ andi(V0, V0, 0xFFFF);   break;
++    case T_BYTE   : __ sign_extend_byte (V0);  break;
++    case T_SHORT  : __ sign_extend_short(V0);  break;
++    case T_INT    : /* nothing to do */        break;
++    case T_FLOAT  : /* nothing to do */        break;
++    case T_DOUBLE : /* nothing to do */        break;
++    case T_OBJECT :
++    {
++       __ ld(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++      __ verify_oop(V0);         // and verify it
++    }
++                 break;
++    default       : ShouldNotReachHere();
++  }
++  __ jr(RA);                                  // return from result handler
++  __ delayed()->nop();
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_safept_entry_for(
++        TosState state,
++        address runtime_entry) {
++  address entry = __ pc();
++  __ push(state);
++  __ call_VM(noreg, runtime_entry);
++  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
++  return entry;
++}
++
++
++
++// Helpers for commoning out cases in the various type of method entries.
++//
++
++
++// increment invocation count & check for overflow
++//
++// Note: checking for negative value instead of overflow
++//       so we have a 'sticky' overflow test
++//
++// prerequisites : method in T0, invocation counter in T3
++void TemplateInterpreterGenerator::generate_counter_incr(
++        Label* overflow,
++        Label* profile_method,
++        Label* profile_method_continue) {
++  Label done;
++  const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset())
++      + in_bytes(InvocationCounter::counter_offset()));
++  const Address backedge_counter  (FSR, in_bytes(MethodCounters::backedge_counter_offset())
++      + in_bytes(InvocationCounter::counter_offset()));
++
++  __ get_method_counters(Rmethod, FSR, done);
++
++  if (ProfileInterpreter) { // %%% Merge this into methodDataOop
++    __ lw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
++    __ incrementl(T9, 1);
++    __ sw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
++  }
++  // Update standard invocation counters
++  __ lw(T3, invocation_counter);
++  __ increment(T3, InvocationCounter::count_increment);
++  __ sw(T3, invocation_counter);  // save invocation count
++
++  __ lw(FSR, backedge_counter);  // load backedge counter
++  __ li(AT, InvocationCounter::count_mask_value);   // mask out the status bits
++  __ andr(FSR, FSR, AT);
++
++  __ daddu(T3, T3, FSR);          // add both counters
++
++  if (ProfileInterpreter && profile_method != NULL) {
++    // Test to see if we should create a method data oop
++    if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) {
++      __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit);
++    } else {
++      __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
++      __ lw(AT, AT, 0);
++      __ slt(AT, T3, AT);
++    }
++
++    __ bne_far(AT, R0, *profile_method_continue);
++    __ delayed()->nop();
++
++    // if no method data exists, go to profile_method
++    __ test_method_data_pointer(FSR, *profile_method);
++  }
++
++  if (Assembler::is_simm16(CompileThreshold)) {
++    __ srl(AT, T3, InvocationCounter::count_shift);
++    __ slti(AT, AT, CompileThreshold);
++  } else {
++    __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit);
++    __ lw(AT, AT, 0);
++    __ slt(AT, T3, AT);
++  }
++
++  __ beq_far(AT, R0, *overflow);
++  __ delayed()->nop();
++  __ bind(done);
++}
++
++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
++
++  // Asm interpreter on entry
++  // S7 - locals
++  // S0 - bcp
++  // Rmethod - method
++  // FP - interpreter frame
++
++  // On return (i.e. jump to entry_point)
++  // Rmethod - method
++  // RA - return address of interpreter caller
++  // tos - the last parameter to Java method
++  // SP - sender_sp
++
++  // the bcp is valid if and only if it's not null
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), R0);
++  __ ld(Rmethod, FP, method_offset);
++  // Preserve invariant that S0/S7 contain bcp/locals of sender frame
++  __ b_far(do_continue);
++  __ delayed()->nop();
++}
++
++// See if we've got enough room on the stack for locals plus overhead.
++// The expression stack grows down incrementally, so the normal guard
++// page mechanism will work for that.
++//
++// NOTE: Since the additional locals are also always pushed (wasn't
++// obvious in generate_method_entry) so the guard should work for them
++// too.
++//
++// Args:
++//      T2: number of additional locals this frame needs (what we must check)
++//      T0: Method*
++//
++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
++  // see if we've got enough room on the stack for locals plus overhead.
++  // the expression stack grows down incrementally, so the normal guard
++  // page mechanism will work for that.
++  //
++  // Registers live on entry:
++  //
++  // T0: Method*
++  // T2: number of additional locals this frame needs (what we must check)
++
++  // NOTE:  since the additional locals are also always pushed (wasn't obvious in
++  // generate_method_entry) so the guard should work for them too.
++  //
++
++  const int entry_size    = frame::interpreter_frame_monitor_size() * wordSize;
++
++  // total overhead size: entry_size + (saved fp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize)
++    + entry_size;
++
++  const int page_size = os::vm_page_size();
++
++  Label after_frame_check;
++
++  // see if the frame is greater than one page in size. If so,
++  // then we need to verify there is enough stack space remaining
++  // for the additional locals.
++  __ move(AT, (page_size - overhead_size) / Interpreter::stackElementSize);
++  __ slt(AT, AT, T2);
++  __ beq(AT, R0, after_frame_check);
++  __ delayed()->nop();
++
++  // compute sp as if this were going to be the last frame on
++  // the stack before the red zone
++#ifndef OPT_THREAD
++  Register thread = T1;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++
++  // locals + overhead, in bytes
++  __ dsll(T3, T2, Interpreter::logStackElementSize);
++  __ daddiu(T3, T3, overhead_size);   // locals * 4 + overhead_size --> T3
++
++#ifdef ASSERT
++  Label stack_base_okay, stack_size_okay;
++  // verify that thread stack base is non-zero
++  __ ld(AT, thread, in_bytes(Thread::stack_base_offset()));
++  __ bne(AT, R0, stack_base_okay);
++  __ delayed()->nop();
++  __ stop("stack base is zero");
++  __ bind(stack_base_okay);
++  // verify that thread stack size is non-zero
++  __ ld(AT, thread, in_bytes(Thread::stack_size_offset()));
++  __ bne(AT, R0, stack_size_okay);
++  __ delayed()->nop();
++  __ stop("stack size is zero");
++  __ bind(stack_size_okay);
++#endif
++
++  // Add stack base to locals and subtract stack size
++  __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT
++  __ daddu(T3, T3, AT);   // locals * 4 + overhead_size + stack_base--> T3
++  __ ld(AT, thread, in_bytes(Thread::stack_size_offset()));  // stack_size --> AT
++  __ dsubu(T3, T3, AT);  // locals * 4 + overhead_size + stack_base - stack_size --> T3
++
++  // Use the bigger size for banging.
++  const int max_bang_size = (int)MAX2(JavaThread::stack_shadow_zone_size(), JavaThread::stack_guard_zone_size());
++
++  // add in the redzone and yellow size
++  __ move(AT, max_bang_size);
++  __ addu(T3, T3, AT);
++
++  // check against the current stack bottom
++  __ slt(AT, T3, SP);
++  __ bne(AT, R0, after_frame_check);
++  __ delayed()->nop();
++
++  // Note: the restored frame is not necessarily interpreted.
++  // Use the shared runtime version of the StackOverflowError.
++  __ move(SP, Rsender);
++  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
++  __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++
++  // all done with frame size check
++  __ bind(after_frame_check);
++}
++
++// Allocate monitor and lock method (asm interpreter)
++// Rmethod - Method*
++void TemplateInterpreterGenerator::lock_method(void) {
++  // synchronize method
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++
++#ifdef ASSERT
++  { Label L;
++    __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T0, T0, JVM_ACC_SYNCHRONIZED);
++    __ bne(T0, R0, L);
++    __ delayed()->nop();
++    __ stop("method doesn't need synchronization");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // get synchronization object
++  {
++    Label done;
++    __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T2, T0, JVM_ACC_STATIC);
++    __ ld(T0, LVP, Interpreter::local_offset_in_bytes(0));
++    __ beq(T2, R0, done);
++    __ delayed()->nop();
++    __ load_mirror(T0, Rmethod, T9);
++    __ bind(done);
++  }
++  // add space for monitor & lock
++  __ daddiu(SP, SP, (-1) * entry_size);           // add space for a monitor entry
++  __ sd(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  // set new monitor block top
++  __ sd(T0, SP, BasicObjectLock::obj_offset_in_bytes());   // store object
++  // FIXME: I do not know what lock_object will do and what it will need
++  __ move(c_rarg0, SP);      // object address
++  __ lock_object(c_rarg0);
++}
++
++// Generate a fixed interpreter frame. This is identical setup for
++// interpreted methods and for native methods hence the shared code.
++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
++
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argumnet word n-1  ] <--- T0(sender's sp)
++  //   ...
++  // [ argument word 0    ] <--- S7
++
++  // initialize fixed part of activation frame
++  // sender's sp in Rsender
++  int i = 0;
++  int frame_size = 10;
++#ifndef CORE
++  ++frame_size;
++#endif
++  __ daddiu(SP, SP, (-frame_size) * wordSize);
++  __ sd(RA, SP, (frame_size - 1) * wordSize);   // save return address
++  __ sd(FP, SP, (frame_size - 2) * wordSize);  // save sender's fp
++  __ daddiu(FP, SP, (frame_size - 2) * wordSize);
++  __ sd(Rsender, FP, (-++i) * wordSize);  // save sender's sp
++  __ sd(R0, FP,(-++i) * wordSize);       //save last_sp as null
++  __ sd(LVP, FP, (-++i) * wordSize);  // save locals offset
++  __ ld(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop
++  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase
++  __ sd(Rmethod, FP, (-++i) * wordSize);                              // save Method*
++  // Get mirror and store it in the frame as GC root for this Method*
++  __ load_mirror(T2, Rmethod, T9);
++  __ sd(T2, FP, (-++i) * wordSize); // Mirror
++#ifndef CORE
++  if (ProfileInterpreter) {
++    Label method_data_continue;
++    __ ld(AT, Rmethod,  in_bytes(Method::method_data_offset()));
++    __ beq(AT, R0, method_data_continue);
++    __ delayed()->nop();
++    __ daddiu(AT, AT, in_bytes(MethodData::data_offset()));
++    __ bind(method_data_continue);
++    __ sd(AT, FP,  (-++i) * wordSize);
++  } else {
++    __ sd(R0, FP, (-++i) * wordSize);
++  }
++#endif // !CORE
++
++  __ ld(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld(T2, T2, ConstantPool::cache_offset_in_bytes());
++  __ sd(T2, FP, (-++i) * wordSize);                    // set constant pool cache
++  if (native_call) {
++    __ sd(R0, FP, (-++i) * wordSize);          // no bcp
++  } else {
++    __ sd(BCP, FP, (-++i) * wordSize);          // set bcp
++  }
++  __ sd(SP, FP, (-++i) * wordSize);               // reserve word for pointer to expression stack bottom
++  assert(i + 2 == frame_size, "i + 2 should be equal to frame_size");
++}
++
++// End of helpers
++
++// Various method entries
++//------------------------------------------------------------------------------------------------------------------------
++//
++//
++
++// Method entry for java.lang.ref.Reference.get.
++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
++  address entry = __ pc();
++  Label slow_path;
++  __ b(slow_path);
++  __ delayed()->nop();
++
++  // generate a vanilla interpreter entry as the slow path
++  __ bind(slow_path);
++  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
++  __ delayed()->nop();
++  return entry;
++}
++
++// Interpreter stub for calling a native method. (asm interpreter)
++// This sets up a somewhat different looking stack for calling the
++// native method than the typical interpreter frame setup.
++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++  // Rsender: sender's sp
++  // Rmethod: Method*
++  address entry_point = __ pc();
++
++#ifndef CORE
++  const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() +
++  InvocationCounter::counter_offset()));
++#endif
++
++  // get parameter size (always needed)
++  // the size in the java stack
++  __ ld(V0, Rmethod, in_bytes(Method::const_offset()));
++  __ lhu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // native calls don't need the stack size check since they have no expression stack
++  // and the arguments are already on the stack and we only add a handful of words
++  // to the stack
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++
++  // for natives the size of locals is zero
++
++  // compute beginning of parameters (S7)
++  __ dsll(LVP, V0, Address::times_8);
++  __ daddiu(LVP, LVP, (-1) * wordSize);
++  __ daddu(LVP, LVP, SP);
++
++
++  // add 2 zero-initialized slots for native calls
++  // 1 slot for native oop temp offset (setup via runtime)
++  // 1 slot for static native result handler3 (setup via runtime)
++  __ push2(R0, R0);
++
++  // Layout of frame at this point
++  // [ method holder mirror  ] <--- sp
++  // [ result type info      ]
++  // [ argument word n-1     ] <--- T0
++  //   ...
++  // [ argument word 0      ] <--- LVP
++
++
++#ifndef CORE
++  if (inc_counter) __ lw(T3, invocation_counter);  // (pre-)fetch invocation count
++#endif
++
++  // initialize fixed part of activation frame
++  generate_fixed_frame(true);
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- sender's sp
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++
++  // make sure method is native & not abstract
++#ifdef ASSERT
++  __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_NATIVE);
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_ABSTRACT);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(AT, (int)true);
++  __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++#endif // CORE
++
++  bang_stack_shadow_pages(true);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  if (synchronized) {
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    {
++      Label L;
++      __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(AT, T0, JVM_ACC_SYNCHRONIZED);
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // after method_lock, the layout of frame is as following
++  //
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ delayed()->nop();
++    __ stop("broken stack frame setup in interpreter in asm");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  // work registers
++  const Register method = Rmethod;
++  const Register t      = T8;
++
++  __ get_method(method);
++  {
++    Label L, Lstatic;
++    __ ld(t,method,in_bytes(Method::const_offset()));
++    __ lhu(t, t, in_bytes(ConstMethod::size_of_parameters_offset()));
++    // MIPS n64 ABI: caller does not reserve space for the register auguments.
++    // A0 and A1(if needed)
++    __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, AT, JVM_ACC_STATIC);
++    __ beq(AT, R0, Lstatic);
++    __ delayed()->nop();
++    __ daddiu(t, t, 1);
++    __ bind(Lstatic);
++    __ daddiu(t, t, -7);
++    __ blez(t, L);
++    __ delayed()->nop();
++    __ dsll(t, t, Address::times_8);
++    __ dsubu(SP, SP, t);
++    __ bind(L);
++  }
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  __ move(AT, SP);
++  // [        ] <--- sp
++  //   ...                        (size of parameters - 8 )
++  // [ monitor entry            ]
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++  // get signature handler
++  {
++    Label L;
++    __ ld(T9, method, in_bytes(Method::signature_handler_offset()));
++    __ bne(T9, R0, L);
++    __ delayed()->nop();
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++               InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld(T9, method, in_bytes(Method::signature_handler_offset()));
++    __ bind(L);
++  }
++
++  // call signature handler
++  // FIXME: when change codes in InterpreterRuntime, note this point
++  // from: begin of parameters
++  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code");
++  // to: current sp
++  assert(InterpreterRuntime::SignatureHandlerGenerator::to  () == SP, "adjust this code");
++  // temp: T3
++  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t  , "adjust this code");
++
++  __ jalr(T9);
++  __ delayed()->nop();
++  __ get_method(method);
++
++  //
++  // if native function is static, and its second parameter has type length of double word,
++  // and first parameter has type length of word, we have to reserve one word
++  // for the first parameter, according to mips o32 abi.
++  // if native function is not static, and its third parameter has type length of double word,
++  // and second parameter has type length of word, we have to reserve one word for the second
++  // parameter.
++  //
++
++
++  // result handler is in V0
++  // set result handler
++  __ sd(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize);
++
++#define FIRSTPARA_SHIFT_COUNT 5
++#define SECONDPARA_SHIFT_COUNT 9
++#define THIRDPARA_SHIFT_COUNT 13
++#define PARA_MASK  0xf
++
++  // pass mirror handle if static call
++  {
++    Label L;
++    __ lw(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, t, JVM_ACC_STATIC);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++
++    // get mirror
++    __ load_mirror(t, method, T9);
++    // copy mirror into activation frame
++    __ sd(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    // pass handle to mirror
++    __ daddiu(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    __ move(A1, t);
++    __ bind(L);
++  }
++
++  // [ mthd holder mirror ptr   ] <--- sp  --------------------| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters(or +1)    |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Mirror                   ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // get native function entry point
++  { Label L;
++    __ ld(T9, method, in_bytes(Method::native_function_offset()));
++    __ li(V1, SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
++    __ bne(V1, T9, L);
++    __ delayed()->nop();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld(T9, method, in_bytes(Method::native_function_offset()));
++    __ bind(L);
++  }
++
++  // pass JNIEnv
++  // native function in T9
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ daddiu(t, thread, in_bytes(JavaThread::jni_environment_offset()));
++  __ move(A0, t);
++  // [ jni environment          ] <--- sp
++  // [ mthd holder mirror ptr   ] ---------------------------->| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters           |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Mirror                   ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // set_last_Java_frame_before_call
++  __ sd(FP, thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
++  // points into the right code segment. It does not have to be the correct return pc.
++  __ li(t, __ pc());
++  __ sd(t, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++  __ sd(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++
++  // change thread state
++#ifdef ASSERT
++  {
++    Label L;
++    __ lw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++    __ daddiu(t, t, (-1) * _thread_in_Java);
++    __ beq(t, R0, L);
++    __ delayed()->nop();
++    __ stop("Wrong thread state in native stub");
++    __ bind(L);
++  }
++#endif
++
++  __ move(t, _thread_in_native);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  // call native method
++  __ jalr(T9);
++  __ delayed()->nop();
++  // result potentially in V0 or F0
++
++
++  // via _last_native_pc and not via _last_jave_sp
++  // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result.
++  //  If the order changes or anything else is added to the stack the code in
++  // interpreter_frame_result will have to be changed.
++  //FIXME, should modify here
++  // save return value to keep the value from being destroyed by other calls
++  __ push(dtos);
++  __ push(ltos);
++
++  // change thread state
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(t, _thread_in_native_trans);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) {
++    if (UseMembar) {
++      // Force this write out before the read below
++      __ sync();
++    } else {
++      // Write serialization page so VM thread can do a pseudo remote membar.
++      // We use the current thread pointer to calculate a thread specific
++      // offset to write to within the page. This minimizes bus traffic
++      // due to cache line collision.
++      __ serialize_memory(thread, A0);
++    }
++  }
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  { Label Continue;
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are
++    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
++    // by hand.
++    //
++    Label slow_path;
++
++    __ safepoint_poll_acquire(slow_path, thread);
++    __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ delayed()->nop();
++    __ bind(slow_path);
++    __ move(A0, thread);
++    __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
++                             relocInfo::runtime_call_type);
++    __ delayed()->nop();
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ move(t, _thread_in_Java);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++  __ reset_last_Java_frame(thread, true);
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  // reset handle block
++  __ ld(t, thread, in_bytes(JavaThread::active_handles_offset()));
++  __ sw(R0, t, JNIHandleBlock::top_offset_in_bytes());
++
++  // If result was an oop then unbox and save it in the frame
++  {
++    Label no_oop;
++    //FIXME, addi only support 16-bit imeditate
++    __ ld(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize);
++    __ li(T0, AbstractInterpreter::result_handler(T_OBJECT));
++    __ bne(AT, T0, no_oop);
++    __ delayed()->nop();
++    __ pop(ltos);
++    // Unbox oop result, e.g. JNIHandles::resolve value.
++    __ resolve_jobject(V0, thread, T9);
++    __ sd(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize);
++    // keep stack depth as expected by pushing oop which will eventually be discarded
++    __ push(ltos);
++    __ bind(no_oop);
++  }
++  {
++    Label no_reguard;
++    __ lw(t, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++    __ move(AT, (u1)JavaThread::stack_guard_yellow_reserved_disabled);
++    __ bne(t, AT, no_reguard);
++    __ delayed()->nop();
++    __ pushad();
++    __ move(S5_heapbase, SP);
++    __ move(AT, -StackAlignmentInBytes);
++    __ andr(SP, SP, AT);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    __ move(SP, S5_heapbase);
++    __ popad();
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(no_reguard);
++  }
++  // restore BCP to have legal interpreter frame,
++  // i.e., bci == 0 <=> BCP == code_base()
++  // Can't call_VM until bcp is within reasonable.
++  __ get_method(method);      // method is junk from thread_in_native to now.
++  __ ld(BCP, method, in_bytes(Method::const_offset()));
++  __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset())));
++  // handle exceptions (exception handling will handle unlocking!)
++  {
++    Label L;
++    __ ld(t, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(t, R0, L);
++    __ delayed()->nop();
++    // Note: At some point we may want to unify this with the code used in
++    // call_VM_base();
++    // i.e., we should use the StubRoutines::forward_exception code. For now this
++    // doesn't work here because the sp is not correctly set at this point.
++    __ MacroAssembler::call_VM(noreg,
++                               CAST_FROM_FN_PTR(address,
++                               InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++
++  // do unlocking if necessary
++  {
++    Label L;
++    __ lw(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(t, t, JVM_ACC_SYNCHRONIZED);
++    __ beq(t, R0, L);
++    // the code below should be shared with interpreter macro assembler implementation
++    {
++      Label unlock;
++      // BasicObjectLock will be first in list,
++      // since this is a synchronized method. However, need
++      // to check that the object has not been unlocked by
++      // an explicit monitorexit bytecode.
++      __ delayed()->daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock));
++      // address of first monitor
++
++      __ ld(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++      __ bne(t, R0, unlock);
++      __ delayed()->nop();
++
++      // Entry already unlocked, need to throw exception
++      __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::throw_illegal_monitor_state_exception));
++      __ should_not_reach_here();
++
++      __ bind(unlock);
++      __ unlock_object(c_rarg0);
++    }
++    __ bind(L);
++  }
++
++  // jvmti/jvmpi support
++  // Note: This must happen _after_ handling/throwing any exceptions since
++  //       the exception handler code notifies the runtime of method exits
++  //       too. If this happens before, method entry/exit notifications are
++  //       not properly paired (was bug - gri 11/22/99).
++  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
++
++  // restore potential result in V0,
++  // call result handler to restore potential result in ST0 & handle result
++
++  __ pop(ltos);
++  __ pop(dtos);
++
++  __ ld(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize);
++  __ jalr(t);
++  __ delayed()->nop();
++
++
++  // remove activation
++  __ ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp
++  __ ld(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address
++  __ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp
++  __ jr(RA);
++  __ delayed()->nop();
++
++#ifndef CORE
++  if (inc_counter) {
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++    // entry_point is the beginning of this
++    // function and checks again for compiled code
++  }
++#endif
++  return entry_point;
++}
++
++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
++  // Quick & dirty stack overflow checking: bang the stack & handle trap.
++  // Note that we do the banging after the frame is setup, since the exception
++  // handling code expects to find a valid interpreter frame on the stack.
++  // Doing the banging earlier fails if the caller frame is not an interpreter
++  // frame.
++  // (Also, the exception throwing code expects to unlock any synchronized
++  // method receiever, so do the banging after locking the receiver.)
++
++  // Bang each page in the shadow zone. We can't assume it's been done for
++  // an interpreter frame with greater than a page of locals, so each page
++  // needs to be checked.  Only true for non-native.
++  if (UseStackBanging) {
++    const int page_size = os::vm_page_size();
++    const int n_shadow_pages = ((int)JavaThread::stack_shadow_zone_size()) / page_size;
++    const int start_page = native_call ? n_shadow_pages : 1;
++    BLOCK_COMMENT("bang_stack_shadow_pages:");
++    for (int pages = start_page; pages <= n_shadow_pages; pages++) {
++      __ bang_stack_with_offset(pages*page_size);
++    }
++  }
++}
++
++//
++// Generic interpreted method entry to (asm) interpreter
++//
++// Layout of frame just at the entry
++//
++//   [ argument word n-1  ] <--- sp
++//     ...
++//   [ argument word 0    ]
++// assume Method* in Rmethod before call this method.
++// prerequisites to the generated stub : the callee Method* in Rmethod
++// note you must save the caller bcp before call the generated stub
++//
++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++
++  // Rmethod: Method*
++  // Rsender: sender 's sp
++  address entry_point = __ pc();
++
++  const Address invocation_counter(Rmethod,
++      in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset()));
++
++  // get parameter size (always needed)
++  __ ld(T3, Rmethod, in_bytes(Method::const_offset()));  //T3 --> Rmethod._constMethod
++  __ lhu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i
++  // get size of locals in words to T2
++  __ lhu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset()));
++  // T2 = no. of additional locals, locals include parameters
++  __ dsubu(T2, T2, V0);
++
++  // see if we've got enough room on the stack for locals plus overhead.
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++  generate_stack_overflow_check();
++  // after this function, the layout of frame does not change
++
++  // compute beginning of parameters (LVP)
++  __ dsll(LVP, V0, LogBytesPerWord);
++  __ daddiu(LVP, LVP, (-1) * wordSize);
++  __ daddu(LVP, LVP, SP);
++
++  // T2 - # of additional locals
++  // allocate space for locals
++  // explicitly initialize locals
++  {
++    Label exit, loop;
++    __ beq(T2, R0, exit);
++    __ delayed()->nop();
++
++    __ bind(loop);
++    __ daddiu(SP, SP, (-1) * wordSize);
++    __ daddiu(T2, T2, -1);               // until everything initialized
++    __ bne(T2, R0, loop);
++    __ delayed()->sd(R0, SP, 0);     // initialize local variables
++
++    __ bind(exit);
++  }
++
++  //
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argument word n-1  ] <--- T0?
++  //   ...
++  // [ argument word 0    ] <--- LVP
++
++  // initialize fixed part of activation frame
++
++  generate_fixed_frame(false);
++
++
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ] <--- fp
++  // [ return address           ]
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // make sure method is not native & not abstract
++#ifdef ASSERT
++  __ ld(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_NATIVE);
++    __ beq(T2, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_ABSTRACT);
++    __ beq(T2, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ move(AT, (int)true);
++  __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++
++  // mdp : T8
++  // tmp1: T9
++  // tmp2: T2
++   __ profile_parameters_type(T8, T9, T2);
++
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  Label profile_method;
++  Label profile_method_continue;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow,
++                          &profile_method,
++                          &profile_method_continue);
++    if (ProfileInterpreter) {
++      __ bind(profile_method_continue);
++    }
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++
++#endif // CORE
++
++  bang_stack_shadow_pages(false);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  //
++  if (synchronized) {
++    // Allocate monitor and lock method
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    { Label L;
++      __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(T2, AT, JVM_ACC_SYNCHRONIZED);
++      __ beq(T2, R0, L);
++      __ delayed()->nop();
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // layout of frame after lock_method
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ delayed()->nop();
++    __ stop("broken stack frame setup in interpreter in native");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  __ dispatch_next(vtos);
++
++  // invocation counter overflow
++  if (inc_counter) {
++    if (ProfileInterpreter) {
++      // We have decided to profile this method in the interpreter
++      __ bind(profile_method);
++      __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                 InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      __ get_method(Rmethod);
++      __ b(profile_method_continue);
++      __ delayed()->nop();
++    }
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++  }
++
++  return entry_point;
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateInterpreterGenerator::generate_throw_exception() {
++  // Entry point in previous activation (i.e., if the caller was
++  // interpreted)
++  Interpreter::_rethrow_exception_entry = __ pc();
++  // Restore sp to interpreter_frame_last_sp even though we are going
++  // to empty the expression stack for the exception processing.
++  __ sd(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  // V0: exception
++  // V1: return address/pc that threw exception
++  __ restore_bcp();                              // BCP points to call/send
++  __ restore_locals();
++
++  //add for compressedoops
++  __ reinit_heapbase();
++  // Entry point for exceptions thrown within interpreter code
++  Interpreter::_throw_exception_entry = __ pc();
++  // expression stack is undefined here
++  // V0: exception
++  // BCP: exception bcp
++  __ verify_oop(V0);
++
++  // expression stack must be empty before entering the VM in case of an exception
++  __ empty_expression_stack();
++  // find exception handler address and preserve exception oop
++  __ move(A1, V0);
++  __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1);
++  // V0: exception handler entry point
++  // V1: preserved exception oop
++  // S0: bcp for exception handler
++  __ push(V1);                                 // push exception which is now the only value on the stack
++  __ jr(V0);                                   // jump to exception handler (may be _remove_activation_entry!)
++  __ delayed()->nop();
++
++  // If the exception is not handled in the current frame the frame is removed and
++  // the exception is rethrown (i.e. exception continuation is _rethrow_exception).
++  //
++  // Note: At this point the bci is still the bxi for the instruction which caused
++  //       the exception and the expression stack is empty. Thus, for any VM calls
++  //       at this point, GC will find a legal oop map (with empty expression stack).
++
++  // In current activation
++  // V0: exception
++  // BCP: exception bcp
++
++  //
++  // JVMTI PopFrame support
++  //
++
++  Interpreter::_remove_activation_preserving_args_entry = __ pc();
++  __ empty_expression_stack();
++  // Set the popframe_processing bit in pending_popframe_condition indicating that we are
++  // currently handling popframe, so that call_VMs that may happen later do not trigger new
++  // popframe handling cycles.
++#ifndef OPT_THREAD
++  Register thread = T2;
++  __ get_thread(T2);
++#else
++  Register thread = TREG;
++#endif
++  __ lw(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++  __ ori(T3, T3, JavaThread::popframe_processing_bit);
++  __ sw(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#ifndef CORE
++  {
++    // Check to see whether we are returning to a deoptimized frame.
++    // (The PopFrame call ensures that the caller of the popped frame is
++    // either interpreted or compiled and deoptimizes it if compiled.)
++    // In this case, we can't call dispatch_next() after the frame is
++    // popped, but instead must save the incoming arguments and restore
++    // them after deoptimization has occurred.
++    //
++    // Note that we don't compare the return PC against the
++    // deoptimization blob's unpack entry because of the presence of
++    // adapter frames in C2.
++    Label caller_not_deoptimized;
++    __ ld(A0, FP, frame::return_addr_offset * wordSize);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0);
++    __ bne(V0, R0, caller_not_deoptimized);
++    __ delayed()->nop();
++
++    // Compute size of arguments for saving when returning to deoptimized caller
++    __ get_method(A1);
++    __ verify_oop(A1);
++    __ ld( A1, A1, in_bytes(Method::const_offset()));
++    __ lhu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset()));
++    __ shl(A1, Interpreter::logStackElementSize);
++    __ restore_locals();
++    __ dsubu(A2, LVP, A1);
++    __ daddiu(A2, A2, wordSize);
++    // Save these arguments
++#ifndef OPT_THREAD
++    __ get_thread(A0);
++#else
++    __ move(A0, TREG);
++#endif
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2);
++
++    __ remove_activation(vtos, T9, false, false, false);
++
++    // Inform deoptimization that it is responsible for restoring these arguments
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ move(AT, JavaThread::popframe_force_deopt_reexecution_bit);
++    __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++    // Continue in deoptimization handler
++    __ jr(T9);
++    __ delayed()->nop();
++
++    __ bind(caller_not_deoptimized);
++  }
++#endif /* !CORE */
++
++  __ remove_activation(vtos, T3,
++                       /* throw_monitor_exception */ false,
++                       /* install_monitor_exception */ false,
++                       /* notify_jvmdi */ false);
++
++  // Clear the popframe condition flag
++  // Finish with popframe handling
++  // A previous I2C followed by a deoptimization might have moved the
++  // outgoing arguments further up the stack. PopFrame expects the
++  // mutations to those outgoing arguments to be preserved and other
++  // constraints basically require this frame to look exactly as
++  // though it had previously invoked an interpreted activation with
++  // no space between the top of the expression stack (current
++  // last_sp) and the top of stack. Rather than force deopt to
++  // maintain this kind of invariant all the time we call a small
++  // fixup routine to move the mutated arguments onto the top of our
++  // expression stack if necessary.
++  __ move(T8, SP);
++  __ ld(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // PC must point into interpreter here
++  __ set_last_Java_frame(thread, noreg, FP, __ pc());
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2);
++  __ reset_last_Java_frame(thread, true);
++  // Restore the last_sp and null it out
++  __ ld(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++
++
++  __ move(AT, JavaThread::popframe_inactive);
++  __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++  // Finish with popframe handling
++  __ restore_bcp();
++  __ restore_locals();
++#ifndef CORE
++  // The method data pointer was incremented already during
++  // call profiling. We have to restore the mdp for the current bcp.
++  if (ProfileInterpreter) {
++    __ set_method_data_pointer_for_bcp();
++  }
++#endif // !CORE
++  // Clear the popframe condition flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(AT, JavaThread::popframe_inactive);
++  __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#if INCLUDE_JVMTI
++  {
++    Label L_done;
++
++    __ lbu(AT, BCP, 0);
++    __ daddiu(AT, AT, -1 * Bytecodes::_invokestatic);
++    __ bne(AT, R0, L_done);
++    __ delayed()->nop();
++
++    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
++    // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
++
++    __ get_method(T9);
++    __ ld(T8, LVP, 0);
++    __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T9, BCP);
++
++    __ beq(T8, R0, L_done);
++    __ delayed()->nop();
++
++    __ sd(T8, SP, 0);
++    __ bind(L_done);
++  }
++#endif // INCLUDE_JVMTI
++
++  __ dispatch_next(vtos);
++  // end of PopFrame support
++
++  Interpreter::_remove_activation_entry = __ pc();
++
++  // preserve exception over this code sequence
++  __ pop(T0);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ sd(T0, thread, in_bytes(JavaThread::vm_result_offset()));
++  // remove the activation (without doing throws on illegalMonitorExceptions)
++  __ remove_activation(vtos, T3, false, true, false);
++  // restore exception
++  __ get_vm_result(T0, thread);
++  __ verify_oop(T0);
++
++  // In between activations - previous activation type unknown yet
++  // compute continuation point - the continuation point expects
++  // the following registers set up:
++  //
++  // T0: exception
++  // T1: return address/pc that threw exception
++  // SP: expression stack of caller
++  // FP: fp of caller
++  __ push2(T0, T3);             // save exception and return address
++  __ move(A1, T3);
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++  __ move(T9, V0);                             // save exception handler
++  __ pop2(V0, V1);                   // restore return address and exception
++
++  // Note that an "issuing PC" is actually the next PC after the call
++  __ jr(T9);                                   // jump to exception handler of caller
++  __ delayed()->nop();
++}
++
++
++//
++// JVMTI ForceEarlyReturn support
++//
++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
++  address entry = __ pc();
++  __ restore_bcp();
++  __ restore_locals();
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ load_earlyret_value(state);
++
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ ld_ptr(T9, TREG, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address cond_addr(T9, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++  // Clear the earlyret state
++  __ move(AT, JvmtiThreadState::earlyret_inactive);
++  __ sw(AT, cond_addr);
++  __ sync();
++
++
++  __ remove_activation(state, T0,
++                         false, /* throw_monitor_exception */
++                         false, /* install_monitor_exception */
++                         true); /* notify_jvmdi */
++  __ sync();
++  __ jr(T0);
++  __ delayed()->nop();
++  return entry;
++} // end of ForceEarlyReturn support
++
++
++//-----------------------------------------------------------------------------
++// Helper for vtos entry point generation
++
++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
++                                                         address& bep,
++                                                         address& cep,
++                                                         address& sep,
++                                                         address& aep,
++                                                         address& iep,
++                                                         address& lep,
++                                                         address& fep,
++                                                         address& dep,
++                                                         address& vep) {
++  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
++  Label L;
++  fep = __ pc(); __ push(ftos); __ b(L); __ delayed()->nop();
++  dep = __ pc(); __ push(dtos); __ b(L); __ delayed()->nop();
++  lep = __ pc(); __ push(ltos); __ b(L); __ delayed()->nop();
++  aep  =__ pc(); __ push(atos); __ b(L); __ delayed()->nop();
++  bep = cep = sep =
++  iep = __ pc(); __ push(itos);
++  vep = __ pc();
++  __ bind(L);
++  generate_and_dispatch(t);
++}
++
++
++/*
++//-----------------------------------------------------------------------------
++// Generation of individual instructions
++
++// helpers for generate_and_dispatch
++
++
++InterpreterGenerator::InterpreterGenerator(StubQueue* code)
++  : TemplateInterpreterGenerator(code) {
++   generate_all(); // down here so it can be "virtual"
++}
++*/
++
++//-----------------------------------------------------------------------------
++
++// Non-product code
++#ifndef PRODUCT
++address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
++  address entry = __ pc();
++
++  // prepare expression stack
++  __ push(state);       // save tosca
++
++  // tos & tos2
++  // trace_bytecode need actually 4 args, the last two is tos&tos2
++  // this work fine for x86. but mips o32 call convention will store A2-A3
++  // to the stack position it think is the tos&tos2
++  // when the expression stack have no more than 2 data, error occur.
++  __ ld(A2, SP, 0);
++  __ ld(A3, SP, 1 * wordSize);
++
++  // pass arguments & call tracer
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3);
++  __ move(RA, V0);    // make sure return address is not destroyed by pop(state)
++
++  // restore expression stack
++  __ pop(state);        // restore tosca
++
++  // return
++  __ jr(RA);
++  __ delayed()->nop();
++
++  return entry;
++}
++
++void TemplateInterpreterGenerator::count_bytecode() {
++  __ li(T8, (long)&BytecodeCounter::_counter_value);
++  __ lw(AT, T8, 0);
++  __ daddiu(AT, AT, 1);
++  __ sw(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {
++  __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]);
++  __ lw(AT, T8, 0);
++  __ daddiu(AT, AT, 1);
++  __ sw(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ lw(T9, T8, 0);
++  __ dsrl(T9, T9, BytecodePairHistogram::log2_number_of_codes);
++  __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes);
++  __ orr(T9, T9, T8);
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ sw(T9, T8, 0);
++  __ dsll(T9, T9, 2);
++  __ li(T8, (long)BytecodePairHistogram::_counters);
++  __ daddu(T8, T8, T9);
++  __ lw(AT, T8, 0);
++  __ daddiu(AT, AT, 1);
++  __ sw(AT, T8, 0);
++}
++
++
++void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
++  // Call a little run-time stub to avoid blow-up for each bytecode.
++  // The run-time runtime saves the right registers, depending on
++  // the tosca in-state for the given template.
++
++  address entry = Interpreter::trace_code(t->tos_in());
++  assert(entry != NULL, "entry must have been generated");
++  __ call(entry, relocInfo::none);
++  __ delayed()->nop();
++  //add for compressedoops
++  __ reinit_heapbase();
++}
++
++
++void TemplateInterpreterGenerator::stop_interpreter_at() {
++  Label L;
++  __ li(T8, long(&BytecodeCounter::_counter_value));
++  __ lw(T8, T8, 0);
++  __ move(AT, StopInterpreterAt);
++  __ bne(T8, AT, L);
++  __ delayed()->nop();
++  __ brk(5);
++  __ delayed()->nop();
++  __ bind(L);
++}
++#endif // !PRODUCT
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/templateTable_mips_64.cpp b/src/hotspot/cpu/mips/templateTable_mips_64.cpp
+--- a/src/hotspot/cpu/mips/templateTable_mips_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/templateTable_mips_64.cpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,4688 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/templateTable.hpp"
++#include "memory/universe.hpp"
++#include "oops/methodData.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "utilities/macros.hpp"
++
++
++#ifndef CC_INTERP
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Platform-dependent initialization
++
++void TemplateTable::pd_initialize() {
++  // No mips specific initialization
++}
++
++// Address computation: local variables
++
++static inline Address iaddress(int n) {
++  return Address(LVP, Interpreter::local_offset_in_bytes(n));
++}
++
++static inline Address laddress(int n) {
++  return iaddress(n + 1);
++}
++
++static inline Address faddress(int n) {
++  return iaddress(n);
++}
++
++static inline Address daddress(int n) {
++  return laddress(n);
++}
++
++static inline Address aaddress(int n) {
++  return iaddress(n);
++}
++static inline Address haddress(int n)            { return iaddress(n + 0); }
++
++
++static inline Address at_sp()             {  return Address(SP,   0); }
++static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
++static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
++
++// At top of Java expression stack which may be different than sp().  It
++// isn't for category 1 objects.
++static inline Address at_tos   () {
++  Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
++  return tos;
++}
++
++static inline Address at_tos_p1() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(1));
++}
++
++static inline Address at_tos_p2() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(2));
++}
++
++static inline Address at_tos_p3() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(3));
++}
++
++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
++Address TemplateTable::at_bcp(int offset) {
++  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
++  return Address(BCP, offset);
++}
++
++// Miscelaneous helper routines
++// Store an oop (or NULL) at the address described by obj.
++// If val == noreg this means store a NULL
++
++static void do_oop_store(InterpreterMacroAssembler* _masm,
++                         Address dst,
++                         Register val,
++                         DecoratorSet decorators = 0) {
++  assert(val == noreg || val == V0, "parameter is just for looks");
++  __ store_heap_oop(dst, val, T9, T1, decorators);
++}
++
++static void do_oop_load(InterpreterMacroAssembler* _masm,
++                        Address src,
++                        Register dst,
++                        DecoratorSet decorators = 0) {
++  __ load_heap_oop(dst, src, T9, T1, decorators);
++}
++
++// bytecode folding
++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
++                                   Register tmp_reg, bool load_bc_into_bc_reg/*=true*/,
++                                   int byte_no) {
++  if (!RewriteBytecodes)  return;
++  Label L_patch_done;
++
++  switch (bc) {
++  case Bytecodes::_fast_aputfield:
++  case Bytecodes::_fast_bputfield:
++  case Bytecodes::_fast_zputfield:
++  case Bytecodes::_fast_cputfield:
++  case Bytecodes::_fast_dputfield:
++  case Bytecodes::_fast_fputfield:
++  case Bytecodes::_fast_iputfield:
++  case Bytecodes::_fast_lputfield:
++  case Bytecodes::_fast_sputfield:
++    {
++      // We skip bytecode quickening for putfield instructions when
++      // the put_code written to the constant pool cache is zero.
++      // This is required so that every execution of this instruction
++      // calls out to InterpreterRuntime::resolve_get_put to do
++      // additional, required work.
++      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
++      __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
++      __ daddiu(bc_reg, R0, bc);
++      __ beq(tmp_reg, R0, L_patch_done);
++      __ delayed()->nop();
++    }
++    break;
++  default:
++    assert(byte_no == -1, "sanity");
++    // the pair bytecodes have already done the load.
++    if (load_bc_into_bc_reg) {
++      __ move(bc_reg, bc);
++    }
++  }
++
++  if (JvmtiExport::can_post_breakpoint()) {
++    Label L_fast_patch;
++    // if a breakpoint is present we can't rewrite the stream directly
++    __ lbu(tmp_reg, at_bcp(0));
++    __ move(AT, Bytecodes::_breakpoint);
++    __ bne(tmp_reg, AT, L_fast_patch);
++    __ delayed()->nop();
++
++    __ get_method(tmp_reg);
++    // Let breakpoint table handling rewrite to quicker bytecode
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
++
++    __ b(L_patch_done);
++    __ delayed()->nop();
++    __ bind(L_fast_patch);
++  }
++
++#ifdef ASSERT
++  Label L_okay;
++  __ lbu(tmp_reg, at_bcp(0));
++  __ move(AT, (int)Bytecodes::java_code(bc));
++  __ beq(tmp_reg, AT, L_okay);
++  __ delayed()->nop();
++  __ beq(tmp_reg, bc_reg, L_patch_done);
++  __ delayed()->nop();
++  __ stop("patching the wrong bytecode");
++  __ bind(L_okay);
++#endif
++
++  // patch bytecode
++  __ sb(bc_reg, at_bcp(0));
++  __ bind(L_patch_done);
++}
++
++
++// Individual instructions
++
++void TemplateTable::nop() {
++  transition(vtos, vtos);
++  // nothing to do
++}
++
++void TemplateTable::shouldnotreachhere() {
++  transition(vtos, vtos);
++  __ stop("shouldnotreachhere bytecode");
++}
++
++void TemplateTable::aconst_null() {
++  transition(vtos, atos);
++  __ move(FSR, R0);
++}
++
++void TemplateTable::iconst(int value) {
++  transition(vtos, itos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ move(FSR, value);
++  }
++}
++
++void TemplateTable::lconst(int value) {
++  transition(vtos, ltos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ move(FSR, value);
++  }
++}
++
++void TemplateTable::fconst(int value) {
++  transition(vtos, ftos);
++  switch( value ) {
++    case 0:  __ mtc1(R0, FSF);    return;
++    case 1:  __ addiu(AT, R0, 1); break;
++    case 2:  __ addiu(AT, R0, 2); break;
++    default: ShouldNotReachHere();
++  }
++  __ mtc1(AT, FSF);
++  __ cvt_s_w(FSF, FSF);
++}
++
++void TemplateTable::dconst(int value) {
++  transition(vtos, dtos);
++  switch( value ) {
++    case 0:  __ dmtc1(R0, FSF);
++             return;
++    case 1:  __ daddiu(AT, R0, 1);
++             __ dmtc1(AT, FSF);
++             __ cvt_d_w(FSF, FSF);
++             break;
++    default: ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::bipush() {
++  transition(vtos, itos);
++  __ lb(FSR, at_bcp(1));
++}
++
++void TemplateTable::sipush() {
++  transition(vtos, itos);
++  __ lb(FSR, BCP, 1);
++  __ lbu(AT, BCP, 2);
++  __ dsll(FSR, FSR, 8);
++  __ orr(FSR, FSR, AT);
++}
++
++// T1 : tags
++// T2 : index
++// T3 : cpool
++// T8 : tag
++void TemplateTable::ldc(bool wide) {
++  transition(vtos, vtos);
++  Label call_ldc, notFloat, notClass, notInt, Done;
++  // get index in cpool
++  if (wide) {
++    __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++  } else {
++    __ lbu(T2, at_bcp(1));
++  }
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type
++  if (UseLEXT1 && Assembler::is_simm(sizeof(tags_offset), 8)) {
++    __ gslbx(T1, T1, T2, tags_offset);
++  } else {
++    __ daddu(AT, T1, T2);
++    __ lb(T1, AT, tags_offset);
++  }
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  //now T1 is the tag
++
++  // unresolved class - get the resolved class
++  __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass);
++  __ beq(AT, R0, call_ldc);
++  __ delayed()->nop();
++
++  // unresolved class in error (resolution failed) - call into runtime
++  // so that the same error from first resolution attempt is thrown.
++  __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
++  __ beq(AT, R0, call_ldc);
++  __ delayed()->nop();
++
++  // resolved class - need to call vm to get java mirror of the class
++  __ daddiu(AT, T1, - JVM_CONSTANT_Class);
++  __ bne(AT, R0, notClass);
++  __ delayed()->dsll(T2, T2, Address::times_8);
++
++  __ bind(call_ldc);
++  __ move(A1, wide);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
++  //__ push(atos);
++  __ daddiu(SP, SP, - Interpreter::stackElementSize);
++  __ b(Done);
++  __ delayed()->sd(FSR, SP, 0); // added for performance issue
++
++  __ bind(notClass);
++  __ daddiu(AT, T1, -JVM_CONSTANT_Float);
++  __ bne(AT, R0, notFloat);
++  __ delayed()->nop();
++  // ftos
++  if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) {
++    __ gslwxc1(FSF, T3, T2, base_offset);
++  } else {
++    __ daddu(AT, T3, T2);
++    __ lwc1(FSF, AT, base_offset);
++  }
++  //__ push_f();
++  __ daddiu(SP, SP, - Interpreter::stackElementSize);
++  __ b(Done);
++  __ delayed()->swc1(FSF, SP, 0);
++
++  __ bind(notFloat);
++  __ daddiu(AT, T1, -JVM_CONSTANT_Integer);
++  __ bne(AT, R0, notInt);
++  __ delayed()->nop();
++  // itos
++  if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) {
++    __ gslwx(FSR, T3, T2, base_offset);
++  } else {
++    __ daddu(T0, T3, T2);
++    __ lw(FSR, T0, base_offset);
++  }
++  __ push(itos);
++  __ b(Done);
++  __ delayed()->nop();
++
++  // assume the tag is for condy; if not, the VM runtime will tell us
++  __ bind(notInt);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++void TemplateTable::condy_helper(Label& Done) {
++  const Register obj = FSR;
++  const Register off = SSR;
++  const Register flags = T3;
++  const Register rarg = A1;
++  __ move(rarg, (int)bytecode());
++  __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg);
++  __ get_vm_result_2(flags, TREG);
++  // VMr = obj = base address to find primitive value to push
++  // VMr2 = flags = (tos, off) using format of CPCE::_flags
++  __ andi(off, flags, ConstantPoolCacheEntry::field_index_mask);
++  __ daddu(obj, off, obj);
++  const Address field(obj, 0 * wordSize);
++
++  // What sort of thing are we loading?
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++
++  switch (bytecode()) {
++  case Bytecodes::_ldc:
++  case Bytecodes::_ldc_w:
++    {
++      // tos in (itos, ftos, stos, btos, ctos, ztos)
++      Label notInt, notFloat, notShort, notByte, notChar, notBool;
++      __ daddiu(AT, flags, -itos);
++      __ bne(AT, R0, notInt);
++      __ delayed()->nop();
++      // itos
++      __ ld(obj, field);
++      __ push(itos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notInt);
++      __ daddiu(AT, flags, -ftos);
++      __ bne(AT, R0, notFloat);
++      __ delayed()->nop();
++      // ftos
++      __ lwc1(FSF, field);
++      __ push(ftos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notFloat);
++      __ daddiu(AT, flags, -stos);
++      __ bne(AT, R0, notShort);
++      __ delayed()->nop();
++      // stos
++      __ lh(obj, field);
++      __ push(stos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notShort);
++      __ daddiu(AT, flags, -btos);
++      __ bne(AT, R0, notByte);
++      __ delayed()->nop();
++      // btos
++      __ lb(obj, field);
++      __ push(btos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notByte);
++      __ daddiu(AT, flags, -ctos);
++      __ bne(AT, R0, notChar);
++      __ delayed()->nop();
++      // ctos
++      __ lhu(obj, field);
++      __ push(ctos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notChar);
++      __ daddiu(AT, flags, -ztos);
++      __ bne(AT, R0, notBool);
++      __ delayed()->nop();
++      // ztos
++      __ lbu(obj, field);
++      __ push(ztos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notBool);
++      break;
++    }
++
++  case Bytecodes::_ldc2_w:
++    {
++      Label notLong, notDouble;
++      __ daddiu(AT, flags, -ltos);
++      __ bne(AT, R0, notLong);
++      __ delayed()->nop();
++      // ltos
++      __ ld(obj, field);
++      __ push(ltos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notLong);
++      __ daddiu(AT, flags, -dtos);
++      __ bne(AT, R0, notDouble);
++      __ delayed()->nop();
++      // dtos
++      __ ldc1(FSF, field);
++      __ push(dtos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notDouble);
++      break;
++    }
++
++  default:
++    ShouldNotReachHere();
++  }
++
++  __ stop("bad ldc/condy");
++}
++
++// Fast path for caching oop constants.
++void TemplateTable::fast_aldc(bool wide) {
++  transition(vtos, atos);
++
++  Register result = FSR;
++  Register tmp = SSR;
++  Register rarg = A1;
++  int index_size = wide ? sizeof(u2) : sizeof(u1);
++
++  Label resolved;
++
++  // We are resolved if the resolved reference cache entry contains a
++  // non-null object (String, MethodType, etc.)
++  assert_different_registers(result, tmp);
++  __ get_cache_index_at_bcp(tmp, 1, index_size);
++  __ load_resolved_reference_at_index(result, tmp, T9);
++  __ bne(result, R0, resolved);
++  __ delayed()->nop();
++
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
++  // first time invocation - must resolve first
++  int i = (int)bytecode();
++  __ move(rarg, i);
++  __ call_VM(result, entry, rarg);
++
++  __ bind(resolved);
++
++  { // Check for the null sentinel.
++    // If we just called the VM, it already did the mapping for us,
++    // but it's harmless to retry.
++    Label notNull;
++    __ set64(rarg, (long)Universe::the_null_sentinel_addr());
++    __ ld_ptr(tmp, Address(rarg));
++    __ bne(tmp, result, notNull);
++    __ delayed()->nop();
++    __ xorr(result, result, result);  // NULL object reference
++    __ bind(notNull);
++  }
++
++  if (VerifyOops) {
++    __ verify_oop(result);
++  }
++}
++
++
++// used register: T2, T3, T1
++// T2 : index
++// T3 : cpool
++// T1 : tag
++void TemplateTable::ldc2_w() {
++  transition(vtos, vtos);
++  Label notDouble, notLong, Done;
++
++  // get index in cpool
++  __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type in T1
++  if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) {
++    __ gslbx(T1, T1, T2, tags_offset);
++  } else {
++    __ daddu(AT, T1, T2);
++    __ lb(T1, AT, tags_offset);
++  }
++
++  __ daddiu(AT, T1, -JVM_CONSTANT_Double);
++  __ bne(AT, R0, notDouble);
++  __ delayed()->nop();
++
++  // dtos
++  __ dsll(T2, T2, Address::times_8);
++  if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) {
++    __ gsldxc1(FSF, T3, T2, base_offset);
++  } else {
++    __ daddu(AT, T3, T2);
++    __ ldc1(FSF, AT, base_offset);
++  }
++  __ push(dtos);
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notDouble);
++  __ daddiu(AT, T1, -JVM_CONSTANT_Long);
++  __ bne(AT, R0, notLong);
++  __ delayed()->nop();
++
++  // ltos
++  __ dsll(T2, T2, Address::times_8);
++  if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) {
++    __ gsldx(FSR, T3, T2, base_offset);
++  } else {
++    __ daddu(AT, T3, T2);
++    __ ld(FSR, AT, base_offset);
++  }
++  __ push(ltos);
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notLong);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++// we compute the actual local variable address here
++// the x86 dont do so for it has scaled index memory access model, we dont have, so do here
++void TemplateTable::locals_index(Register reg, int offset) {
++  __ lbu(reg, at_bcp(offset));
++  __ dsll(reg, reg, Address::times_8);
++  __ dsubu(reg, LVP, reg);
++}
++
++void TemplateTable::iload() {
++  iload_internal();
++}
++
++void TemplateTable::nofast_iload() {
++  iload_internal(may_not_rewrite);
++}
++
++// this method will do bytecode folding of the two form:
++// iload iload      iload caload
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::iload_internal(RewriteControl rc) {
++  transition(vtos, itos);
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
++    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
++    // last two iloads in a pair.  Comparing against fast_iload means that
++    // the next bytecode is neither an iload or a caload, and therefore
++    // an iload pair.
++    __ move(AT, Bytecodes::_iload);
++    __ beq(AT, T2, done);
++    __ delayed()->nop();
++
++    __ move(T3, Bytecodes::_fast_iload2);
++    __ move(AT, Bytecodes::_fast_iload);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // if _caload, rewrite to fast_icaload
++    __ move(T3, Bytecodes::_fast_icaload);
++    __ move(AT, Bytecodes::_caload);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // rewrite so iload doesn't check again.
++    __ move(T3, Bytecodes::_fast_iload);
++
++    // rewrite
++    // T3 : fast bytecode
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_iload, T3, T2, false);
++    __ bind(done);
++  }
++
++  // Get the local value into tos
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload2() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++  __ push(itos);
++  locals_index(T2, 3);
++  __ lw(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::lload() {
++  transition(vtos, ltos);
++  locals_index(T2);
++  __ ld(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fload() {
++  transition(vtos, ftos);
++  locals_index(T2);
++  __ lwc1(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::dload() {
++  transition(vtos, dtos);
++  locals_index(T2);
++  __ ldc1(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::aload() {
++  transition(vtos, atos);
++  locals_index(T2);
++  __ ld(FSR, T2, 0);
++}
++
++void TemplateTable::locals_index_wide(Register reg) {
++  __ get_unsigned_2_byte_index_at_bcp(reg, 2);
++  __ dsll(reg, reg, Address::times_8);
++  __ dsubu(reg, LVP, reg);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_iload() {
++  transition(vtos, itos);
++  locals_index_wide(T2);
++  __ ld(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_lload() {
++  transition(vtos, ltos);
++  locals_index_wide(T2);
++  __ ld(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_fload() {
++  transition(vtos, ftos);
++  locals_index_wide(T2);
++  __ lwc1(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_dload() {
++  transition(vtos, dtos);
++  locals_index_wide(T2);
++  __ ldc1(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_aload() {
++  transition(vtos, atos);
++  locals_index_wide(T2);
++  __ ld(FSR, T2, 0);
++}
++
++// we use A2 as the regiser for index, BE CAREFUL!
++// we dont use our tge 29 now, for later optimization
++void TemplateTable::index_check(Register array, Register index) {
++  // Pop ptr into array
++  __ pop_ptr(array);
++  index_check_without_pop(array, index);
++}
++
++void TemplateTable::index_check_without_pop(Register array, Register index) {
++  // destroys A2
++  // check array
++  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
++
++  // sign extend since tos (index) might contain garbage in upper bits
++  __ sll(index, index, 0);
++
++  // check index
++  Label ok;
++  __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
++#ifndef OPT_RANGECHECK
++  __ sltu(AT, index, AT);
++  __ bne(AT, R0, ok);
++  __ delayed()->nop();
++
++  //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
++  if (A1 != array) __ move(A1, array);
++  if (A2 != index) __ move(A2, index);
++  __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
++  __ delayed()->nop();
++  __ bind(ok);
++#else
++  __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
++  __ move(A2, index);
++  __ tgeu(A2, AT, 29);
++#endif
++}
++
++void TemplateTable::iaload() {
++  transition(itos, itos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, 2);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    __ warn("iaload Unimplemented yet");
++    __ gslwle(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(FSR, FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg);
++  }
++}
++
++void TemplateTable::laload() {
++  transition(itos, ltos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, Address::times_8);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
++
++    __ warn("laload Unimplemented yet");
++    __ gsldle(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(AT, FSR, Address::times_8);
++    __ daddu(T9, SSR, AT);
++    __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T9, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg);
++  }
++}
++
++void TemplateTable::faload() {
++  transition(itos, ftos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ shl(FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ shl(AT, 2);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++
++    __ warn("faload Unimplemented yet");
++    __ gslwlec1(FSF, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ shl(FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg);
++  }
++}
++
++void TemplateTable::daload() {
++  transition(itos, dtos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, 3);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, 3);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
++
++    __ warn("daload Unimplemented yet");
++    __ gsldlec1(FSF, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(AT, FSR, 3);
++    __ daddu(T9, SSR, AT);
++    __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T9, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg);
++  }
++}
++
++void TemplateTable::aaload() {
++  transition(itos, atos);
++  index_check(SSR, FSR);
++  __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8);
++  __ daddu(FSR, SSR, FSR);
++  //add for compressedoops
++  do_oop_load(_masm,
++              Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
++              FSR,
++              IS_ARRAY);
++}
++
++void TemplateTable::baload() {
++  transition(itos, itos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array   FSR:index
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound
++
++    __ warn("baload Unimplemented yet");
++    __ gslble(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg);
++  }
++}
++
++void TemplateTable::caload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ dsll(FSR, FSR, Address::times_2);
++  __ daddu(FSR, SSR, FSR);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg);
++}
++
++// iload followed by caload frequent pair
++// used register : T2
++// T2 : index
++void TemplateTable::fast_icaload() {
++  transition(vtos, itos);
++  // load index out of locals
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++  index_check(SSR, FSR);
++  __ dsll(FSR, FSR, 1);
++  __ daddu(FSR, SSR, FSR);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg);
++}
++
++void TemplateTable::saload() {
++  transition(itos, itos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, Address::times_2);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT));
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, Address::times_2);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT));
++
++    __ warn("saload Unimplemented yet");
++    __ gslhle(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(FSR, FSR, Address::times_2);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg);
++  }
++}
++
++void TemplateTable::iload(int n) {
++  transition(vtos, itos);
++  __ lw(FSR, iaddress(n));
++}
++
++void TemplateTable::lload(int n) {
++  transition(vtos, ltos);
++  __ ld(FSR, laddress(n));
++}
++
++void TemplateTable::fload(int n) {
++  transition(vtos, ftos);
++  __ lwc1(FSF, faddress(n));
++}
++
++void TemplateTable::dload(int n) {
++  transition(vtos, dtos);
++  __ ldc1(FSF, laddress(n));
++}
++
++void TemplateTable::aload(int n) {
++  transition(vtos, atos);
++  __ ld(FSR, aaddress(n));
++}
++
++void TemplateTable::aload_0() {
++  aload_0_internal();
++}
++
++void TemplateTable::nofast_aload_0() {
++  aload_0_internal(may_not_rewrite);
++}
++
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::aload_0_internal(RewriteControl rc) {
++  transition(vtos, atos);
++  // According to bytecode histograms, the pairs:
++  //
++  // _aload_0, _fast_igetfield
++  // _aload_0, _fast_agetfield
++  // _aload_0, _fast_fgetfield
++  //
++  // occur frequently. If RewriteFrequentPairs is set, the (slow)
++  // _aload_0 bytecode checks if the next bytecode is either
++  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
++  // rewrites the current bytecode into a pair bytecode; otherwise it
++  // rewrites the current bytecode into _fast_aload_0 that doesn't do
++  // the pair check anymore.
++  //
++  // Note: If the next bytecode is _getfield, the rewrite must be
++  //       delayed, otherwise we may miss an opportunity for a pair.
++  //
++  // Also rewrite frequent pairs
++  //   aload_0, aload_1
++  //   aload_0, iload_1
++  // These bytecodes with a small amount of code are most profitable
++  // to rewrite
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
++
++    // do actual aload_0
++    aload(0);
++
++    // if _getfield then wait with rewrite
++    __ move(AT, Bytecodes::_getfield);
++    __ beq(AT, T2, done);
++    __ delayed()->nop();
++
++    // if _igetfield then reqrite to _fast_iaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_iaccess_0);
++    __ move(AT, Bytecodes::_fast_igetfield);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // if _agetfield then reqrite to _fast_aaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_aaccess_0);
++    __ move(AT, Bytecodes::_fast_agetfield);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // if _fgetfield then reqrite to _fast_faccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_faccess_0);
++    __ move(AT, Bytecodes::_fast_fgetfield);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // else rewrite to _fast_aload0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_aload_0);
++
++    // rewrite
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
++
++    __ bind(done);
++  } else {
++    aload(0);
++  }
++}
++
++void TemplateTable::istore() {
++  transition(itos, vtos);
++  locals_index(T2);
++  __ sw(FSR, T2, 0);
++}
++
++void TemplateTable::lstore() {
++  transition(ltos, vtos);
++  locals_index(T2);
++  __ sd(FSR, T2, -wordSize);
++}
++
++void TemplateTable::fstore() {
++  transition(ftos, vtos);
++  locals_index(T2);
++  __ swc1(FSF, T2, 0);
++}
++
++void TemplateTable::dstore() {
++  transition(dtos, vtos);
++  locals_index(T2);
++  __ sdc1(FSF, T2, -wordSize);
++}
++
++void TemplateTable::astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index(T2);
++  __ sd(FSR, T2, 0);
++}
++
++void TemplateTable::wide_istore() {
++  transition(vtos, vtos);
++  __ pop_i(FSR);
++  locals_index_wide(T2);
++  __ sd(FSR, T2, 0);
++}
++
++void TemplateTable::wide_lstore() {
++  transition(vtos, vtos);
++  __ pop_l(FSR);
++  locals_index_wide(T2);
++  __ sd(FSR, T2, -wordSize);
++}
++
++void TemplateTable::wide_fstore() {
++  wide_istore();
++}
++
++void TemplateTable::wide_dstore() {
++  wide_lstore();
++}
++
++void TemplateTable::wide_astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index_wide(T2);
++  __ sd(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::iastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);   // T2: array  SSR: index
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T2);
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_4);
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));  //bound
++
++    __ warn("iastore Unimplemented yet");
++    __ gsswle(FSR, SSR, AT);
++  } else {
++    index_check(T2, SSR);  // prefer index in SSR
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(T2, T2, SSR);
++    __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg);
++  }
++}
++
++
++
++// used register T2, T3
++void TemplateTable::lastore() {
++  transition(ltos, vtos);
++  __ pop_i (T2);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T3);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T2, T3, T2);
++    __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  // base
++
++    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(AT, T3, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  //bound
++
++    __ warn("lastore Unimplemented yet");
++    __ gssdle(FSR, T2, AT);
++  } else {
++    index_check(T3, T2);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T3, T3, T2);
++    __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg);
++  }
++}
++
++// used register T2
++void TemplateTable::fastore() {
++  transition(ftos, vtos);
++  __ pop_i(SSR);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T2);
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_4);
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  //bound
++
++    __ warn("fastore Unimplemented yet");
++    __ gsswlec1(FSF, SSR, AT);
++  } else {
++    index_check(T2, SSR);
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(T2, T2, SSR);
++    __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg);
++  }
++}
++
++// used register T2, T3
++void TemplateTable::dastore() {
++  transition(dtos, vtos);
++  __ pop_i (T2);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T3);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T2, T3, T2);
++    __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  // base
++
++    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(AT, T3, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  //bound
++
++    __ warn("dastore Unimplemented yet");
++    __ gssdlec1(FSF, T2, AT);
++  } else {
++    index_check(T3, T2);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T3, T3, T2);
++    __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg);
++  }
++}
++
++// used register : T2, T3, T8
++// T2 : array
++// T3 : subklass
++// T8 : supklass
++void TemplateTable::aastore() {
++  Label is_null, ok_is_subtype, done;
++  transition(vtos, vtos);
++  // stack: ..., array, index, value
++  __ ld(FSR, at_tos());     // Value
++  __ lw(SSR, at_tos_p1());  // Index
++  __ ld(T2, at_tos_p2());  // Array
++
++  // index_check(T2, SSR);
++  index_check_without_pop(T2, SSR);
++  // do array store check - check for NULL value first
++  __ beq(FSR, R0, is_null);
++  __ delayed()->nop();
++
++  // Move subklass into T3
++  //add for compressedoops
++  __ load_klass(T3, FSR);
++  // Move superklass into T8
++  //add for compressedoops
++  __ load_klass(T8, T2);
++  __ ld(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
++  // Compress array+index*4+12 into a single register. T2
++  __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
++  __ daddu(T2, T2, AT);
++  __ daddiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++
++  // Generate subtype check.
++  // Superklass in T8.  Subklass in T3.
++  __ gen_subtype_check(T8, T3, ok_is_subtype);
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ArrayStoreException_entry);
++  __ delayed()->nop();
++  // Come here on success
++  __ bind(ok_is_subtype);
++  do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY);
++  __ b(done);
++  __ delayed()->nop();
++
++  // Have a NULL in FSR, T2=array, SSR=index.  Store NULL at ary[idx]
++  __ bind(is_null);
++  __ profile_null_seen(T9);
++  __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
++  __ daddu(T2, T2, AT);
++  do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY);
++
++  __ bind(done);
++  __ daddiu(SP, SP, 3 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::bastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  if(UseBoundCheckInstruction) {
++    guarantee(false, "unimplemented yet!");
++    __ pop_ptr(T2);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE));  //bound
++
++    __ warn("bastore Unimplemented yet");
++    __ gssble(FSR, SSR, AT);
++  } else {
++    index_check(T2, SSR);
++
++    // Need to check whether array is boolean or byte
++    // since both types share the bastore bytecode.
++    __ load_klass(T9, T2);
++    __ lw(T9, T9, in_bytes(Klass::layout_helper_offset()));
++
++    int diffbit = Klass::layout_helper_boolean_diffbit();
++    __ move(AT, diffbit);
++
++    Label L_skip;
++    __ andr(AT, T9, AT);
++    __ beq(AT, R0, L_skip);
++    __ delayed()->nop();
++    __ andi(FSR, FSR, 0x1);
++    __ bind(L_skip);
++
++    __ daddu(SSR, T2, SSR);
++    __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg);
++  }
++}
++
++void TemplateTable::castore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T2);
++    __ dsll(SSR, SSR, Address::times_2);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_2);
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR));  //bound
++
++    __ warn("castore Unimplemented yet");
++    __ gsshle(FSR, SSR, AT);
++  } else {
++    index_check(T2, SSR);
++    __ dsll(SSR, SSR, Address::times_2);
++    __ daddu(SSR, T2, SSR);
++    __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg);
++  }
++}
++
++void TemplateTable::sastore() {
++  castore();
++}
++
++void TemplateTable::istore(int n) {
++  transition(itos, vtos);
++  __ sw(FSR, iaddress(n));
++}
++
++void TemplateTable::lstore(int n) {
++  transition(ltos, vtos);
++  __ sd(FSR, laddress(n));
++}
++
++void TemplateTable::fstore(int n) {
++  transition(ftos, vtos);
++  __ swc1(FSF, faddress(n));
++}
++
++void TemplateTable::dstore(int n) {
++  transition(dtos, vtos);
++  __ sdc1(FSF, laddress(n));
++}
++
++void TemplateTable::astore(int n) {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  __ sd(FSR, aaddress(n));
++}
++
++void TemplateTable::pop() {
++  transition(vtos, vtos);
++  __ daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void TemplateTable::pop2() {
++  transition(vtos, vtos);
++  __ daddiu(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::dup() {
++  transition(vtos, vtos);
++  // stack: ..., a
++  __ load_ptr(0, FSR);
++  __ push_ptr(FSR);
++  // stack: ..., a, a
++}
++
++// blows FSR
++void TemplateTable::dup_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(0, FSR);  // load b
++  __ load_ptr(1, A5);  // load a
++  __ store_ptr(1, FSR); // store b
++  __ store_ptr(0, A5); // store a
++  __ push_ptr(FSR);             // push b
++  // stack: ..., b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, FSR);  // load c
++  __ load_ptr(2, A5);  // load a
++  __ store_ptr(2, FSR); // store c in a
++  __ push_ptr(FSR);             // push c
++  // stack: ..., c, b, c, c
++  __ load_ptr(2, FSR);  // load b
++  __ store_ptr(2, A5); // store a in b
++  // stack: ..., c, a, c, c
++  __ store_ptr(1, FSR); // store b in c
++  // stack: ..., c, a, b, c
++}
++
++// blows FSR
++void TemplateTable::dup2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(1, FSR);  // load a
++  __ push_ptr(FSR);             // push a
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  // stack: ..., a, b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup2_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, T2);  // load c
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  __ push_ptr(T2);             // push c
++  // stack: ..., a, b, c, b, c
++  __ store_ptr(3, T2); // store c in b
++  // stack: ..., a, c, c, b, c
++  __ load_ptr(4, T2);  // load a
++  __ store_ptr(2, T2); // store a in 2nd c
++  // stack: ..., a, c, a, b, c
++  __ store_ptr(4, FSR); // store b in a
++  // stack: ..., b, c, a, b, c
++
++  // stack: ..., b, c, a, b, c
++}
++
++// blows FSR, SSR
++void TemplateTable::dup2_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c, d
++  // stack: ..., a, b, c, d
++  __ load_ptr(0, T2);  // load d
++  __ load_ptr(1, FSR);  // load c
++  __ push_ptr(FSR);             // push c
++  __ push_ptr(T2);             // push d
++  // stack: ..., a, b, c, d, c, d
++  __ load_ptr(4, FSR);  // load b
++  __ store_ptr(2, FSR); // store b in d
++  __ store_ptr(4, T2); // store d in b
++  // stack: ..., a, d, c, b, c, d
++  __ load_ptr(5, T2);  // load a
++  __ load_ptr(3, FSR);  // load c
++  __ store_ptr(3, T2); // store a in c
++  __ store_ptr(5, FSR); // store c in a
++  // stack: ..., c, d, a, b, c, d
++
++  // stack: ..., c, d, a, b, c, d
++}
++
++// blows FSR
++void TemplateTable::swap() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++
++  __ load_ptr(1, A5);  // load a
++  __ load_ptr(0, FSR);  // load b
++  __ store_ptr(0, A5); // store a in b
++  __ store_ptr(1, FSR); // store b in a
++
++  // stack: ..., b, a
++}
++
++void TemplateTable::iop2(Operation op) {
++  transition(itos, itos);
++
++  __ pop_i(SSR);
++  switch (op) {
++    case add  : __ addu32(FSR, SSR, FSR); break;
++    case sub  : __ subu32(FSR, SSR, FSR); break;
++    case mul  : __ mul(FSR, SSR, FSR);    break;
++    case _and : __ andr(FSR, SSR, FSR);   break;
++    case _or  : __ orr(FSR, SSR, FSR);    break;
++    case _xor : __ xorr(FSR, SSR, FSR);   break;
++    case shl  : __ sllv(FSR, SSR, FSR);   break;
++    case shr  : __ srav(FSR, SSR, FSR);   break;
++    case ushr : __ srlv(FSR, SSR, FSR);   break;
++    default   : ShouldNotReachHere();
++  }
++}
++
++// the result stored in FSR, SSR,
++// used registers : T2, T3
++void TemplateTable::lop2(Operation op) {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++
++  switch (op) {
++    case add : __ daddu(FSR, T2, FSR); break;
++    case sub : __ dsubu(FSR, T2, FSR); break;
++    case _and: __ andr(FSR, T2, FSR);  break;
++    case _or : __ orr(FSR, T2, FSR);   break;
++    case _xor: __ xorr(FSR, T2, FSR);  break;
++    default : ShouldNotReachHere();
++  }
++}
++
++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
++// the result is 0x80000000
++// the godson2 cpu do the same, so we need not handle this specially like x86
++void TemplateTable::idiv() {
++  transition(itos, itos);
++  Label not_zero;
++
++  __ bne(FSR, R0, not_zero);
++  __ delayed()->nop();
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++  __ bind(not_zero);
++
++  __ pop_i(SSR);
++  if (UseLEXT1) {
++    __ gsdiv(FSR, SSR, FSR);
++  } else {
++    __ div(SSR, FSR);
++    __ mflo(FSR);
++  }
++}
++
++void TemplateTable::irem() {
++  transition(itos, itos);
++  Label not_zero;
++  __ pop_i(SSR);
++  __ div(SSR, FSR);
++
++  __ bne(FSR, R0, not_zero);
++  __ delayed()->nop();
++  //__ brk(7);
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++
++  __ bind(not_zero);
++  __ mfhi(FSR);
++}
++
++void TemplateTable::lmul() {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++  if (UseLEXT1) {
++    __ gsdmult(FSR, T2, FSR);
++  } else {
++    __ dmult(T2, FSR);
++    __ mflo(FSR);
++  }
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::ldiv() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++  __ delayed()->nop();
++
++  //__ brk(7);    //generate FPE
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++
++  __ bind(normal);
++  __ pop_l(A2);
++  if (UseLEXT1) {
++    __ gsddiv(FSR, A2, FSR);
++  } else {
++    __ ddiv(A2, FSR);
++    __ mflo(FSR);
++  }
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::lrem() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++  __ delayed()->nop();
++
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++
++  __ bind(normal);
++  __ pop_l (A2);
++
++  if (UseLEXT1) {
++    __ gsdmod(FSR, A2, FSR);
++  } else {
++    __ ddiv(A2, FSR);
++    __ mfhi(FSR);
++  }
++}
++
++// result in FSR
++// used registers : T0
++void TemplateTable::lshl() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ dsllv(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lshr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ dsrav(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lushr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ dsrlv(FSR, T0, FSR);
++}
++
++// result in FSF
++void TemplateTable::fop2(Operation op) {
++  transition(ftos, ftos);
++  switch (op) {
++    case add:
++      __ lwc1(FTF, at_sp());
++      __ add_s(FSF, FTF, FSF);
++      break;
++    case sub:
++      __ lwc1(FTF, at_sp());
++      __ sub_s(FSF, FTF, FSF);
++      break;
++    case mul:
++      __ lwc1(FTF, at_sp());
++      __ mul_s(FSF, FTF, FSF);
++      break;
++    case div:
++      __ lwc1(FTF, at_sp());
++      __ div_s(FSF, FTF, FSF);
++      break;
++    case rem:
++      __ mov_s(F13, FSF);
++      __ lwc1(F12, at_sp());
++       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ daddiu(SP, SP, 1 * wordSize);
++}
++
++// result in SSF||FSF
++// i dont handle the strict flags
++void TemplateTable::dop2(Operation op) {
++  transition(dtos, dtos);
++  switch (op) {
++    case add:
++      __ ldc1(FTF, at_sp());
++      __ add_d(FSF, FTF, FSF);
++      break;
++    case sub:
++      __ ldc1(FTF, at_sp());
++      __ sub_d(FSF, FTF, FSF);
++      break;
++    case mul:
++      __ ldc1(FTF, at_sp());
++      __ mul_d(FSF, FTF, FSF);
++      break;
++    case div:
++      __ ldc1(FTF, at_sp());
++      __ div_d(FSF, FTF, FSF);
++      break;
++    case rem:
++      __ mov_d(F13, FSF);
++      __ ldc1(F12, at_sp());
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ daddiu(SP, SP, 2 * wordSize);
++}
++
++void TemplateTable::ineg() {
++  transition(itos, itos);
++  __ subu32(FSR, R0, FSR);
++}
++
++void TemplateTable::lneg() {
++  transition(ltos, ltos);
++  __ dsubu(FSR, R0, FSR);
++}
++
++void TemplateTable::fneg() {
++  transition(ftos, ftos);
++  __ neg_s(FSF, FSF);
++}
++
++void TemplateTable::dneg() {
++  transition(dtos, dtos);
++  __ neg_d(FSF, FSF);
++}
++
++// used registers : T2
++void TemplateTable::iinc() {
++  transition(vtos, vtos);
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++  __ lb(AT, at_bcp(2));           // get constant
++  __ daddu(FSR, FSR, AT);
++  __ sw(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::wide_iinc() {
++  transition(vtos, vtos);
++  locals_index_wide(T2);
++  __ get_2_byte_integer_at_bcp(FSR, AT, 4);
++  __ hswap(FSR);
++  __ lw(AT, T2, 0);
++  __ daddu(FSR, AT, FSR);
++  __ sw(FSR, T2, 0);
++}
++
++void TemplateTable::convert() {
++  // Checking
++#ifdef ASSERT
++  {
++    TosState tos_in  = ilgl;
++    TosState tos_out = ilgl;
++    switch (bytecode()) {
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_in = itos; break;
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_l2d: tos_in = ltos; break;
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_f2d: tos_in = ftos; break;
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_d2l: // fall through
++      case Bytecodes::_d2f: tos_in = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    switch (bytecode()) {
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_out = itos; break;
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_d2l: tos_out = ltos; break;
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_d2f: tos_out = ftos; break;
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_l2d: // fall through
++      case Bytecodes::_f2d: tos_out = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    transition(tos_in, tos_out);
++  }
++#endif // ASSERT
++
++  // Conversion
++  switch (bytecode()) {
++    case Bytecodes::_i2l:
++      __ sll(FSR, FSR, 0);
++      break;
++    case Bytecodes::_i2f:
++      __ mtc1(FSR, FSF);
++      __ cvt_s_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2d:
++      __ mtc1(FSR, FSF);
++      __ cvt_d_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2b:
++      __ seb(FSR, FSR);
++      break;
++    case Bytecodes::_i2c:
++      __ andi(FSR, FSR, 0xFFFF);  // truncate upper 56 bits
++      break;
++    case Bytecodes::_i2s:
++      __ seh(FSR, FSR);
++      break;
++    case Bytecodes::_l2i:
++      __ sll(FSR, FSR, 0);
++      break;
++    case Bytecodes::_l2f:
++      __ dmtc1(FSR, FSF);
++      __ cvt_s_l(FSF, FSF);
++      break;
++    case Bytecodes::_l2d:
++      __ dmtc1(FSR, FSF);
++      __ cvt_d_l(FSF, FSF);
++      break;
++    case Bytecodes::_f2i:
++    {
++      Label L;
++
++      __ trunc_w_s(F12, FSF);
++      __ move(AT, 0x7fffffff);
++      __ mfc1(FSR, F12);
++      __ c_un_s(FSF, FSF);    //NaN?
++      __ movt(FSR, R0);
++
++      __ bne(AT, FSR, L);
++      __ delayed()->lui(T9, 0x8000);
++
++      __ mfc1(AT, FSF);
++      __ andr(AT, AT, T9);
++
++      __ movn(FSR, T9, AT);
++
++      __ bind(L);
++    }
++      break;
++    case Bytecodes::_f2l:
++    {
++      Label L;
++
++      __ trunc_l_s(F12, FSF);
++      __ daddiu(AT, R0, -1);
++      __ dsrl(AT, AT, 1);
++      __ dmfc1(FSR, F12);
++      __ c_un_s(FSF, FSF);    //NaN?
++      __ movt(FSR, R0);
++
++      __ bne(AT, FSR, L);
++      __ delayed()->lui(T9, 0x8000);
++
++      __ mfc1(AT, FSF);
++      __ andr(AT, AT, T9);
++
++      __ dsll32(T9, T9, 0);
++      __ movn(FSR, T9, AT);
++
++      __ bind(L);
++    }
++      break;
++    case Bytecodes::_f2d:
++      __ cvt_d_s(FSF, FSF);
++      break;
++    case Bytecodes::_d2i:
++    {
++      Label L;
++
++      __ trunc_w_d(F12, FSF);
++      __ move(AT, 0x7fffffff);
++      __ mfc1(FSR, F12);
++
++      __ bne(FSR, AT, L);
++      __ delayed()->mtc1(R0, F12);
++
++      __ cvt_d_w(F12, F12);
++      __ c_ult_d(FSF, F12);
++      __ bc1f(L);
++      __ delayed()->addiu(T9, R0, -1);
++
++      __ c_un_d(FSF, FSF);    //NaN?
++      __ subu32(FSR, T9, AT);
++      __ movt(FSR, R0);
++
++      __ bind(L);
++    }
++      break;
++    case Bytecodes::_d2l:
++    {
++      Label L;
++
++      __ trunc_l_d(F12, FSF);
++      __ daddiu(AT, R0, -1);
++      __ dsrl(AT, AT, 1);
++      __ dmfc1(FSR, F12);
++
++      __ bne(FSR, AT, L);
++      __ delayed()->mtc1(R0, F12);
++
++      __ cvt_d_w(F12, F12);
++      __ c_ult_d(FSF, F12);
++      __ bc1f(L);
++      __ delayed()->daddiu(T9, R0, -1);
++
++      __ c_un_d(FSF, FSF);    //NaN?
++      __ subu(FSR, T9, AT);
++      __ movt(FSR, R0);
++
++    __ bind(L);
++    }
++      break;
++    case Bytecodes::_d2f:
++      __ cvt_s_d(FSF, FSF);
++      break;
++    default             :
++      ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::lcmp() {
++  transition(ltos, itos);
++
++  __ pop(T0);
++  __ pop(R0);
++
++  __ slt(AT, T0, FSR);
++  __ slt(FSR, FSR, T0);
++  __ subu(FSR, FSR, AT);
++}
++
++void TemplateTable::float_cmp(bool is_float, int unordered_result) {
++  __ ori(FSR, R0, 1);
++  __ ori(AT, R0, 1);
++
++  if (is_float) {
++    __ lwc1(FTF, at_sp());
++    __ daddiu(SP, SP, 1 * wordSize);
++    if (unordered_result < 0) {
++      __ c_olt_s(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_ult_s(FTF, FSF);
++    } else {
++      __ c_ult_s(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_olt_s(FTF, FSF);
++    }
++  } else {
++    __ ldc1(FTF, at_sp());
++    __ daddiu(SP, SP, 2 * wordSize);
++    if (unordered_result < 0) {
++      __ c_olt_d(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_ult_d(FTF, FSF);
++    } else {
++      __ c_ult_d(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_olt_d(FTF, FSF);
++    }
++  }
++
++  __ movf(AT, R0);
++  __ subu(FSR, FSR, AT);
++}
++
++
++// used registers : T3, A7, Rnext
++// FSR : return bci, this is defined by the vm specification
++// T2 : MDO taken count
++// T3 : method
++// A7 : offset
++// Rnext : next bytecode, this is required by dispatch_base
++void TemplateTable::branch(bool is_jsr, bool is_wide) {
++  __ get_method(T3);
++  __ profile_taken_branch(A7, T2);    // only C2 meaningful
++
++  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
++                             InvocationCounter::counter_offset();
++  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
++                              InvocationCounter::counter_offset();
++
++  // Load up T4 with the branch displacement
++  if (!is_wide) {
++    __ lb(A7, BCP, 1);
++    __ lbu(AT, BCP, 2);
++    __ dsll(A7, A7, 8);
++    __ orr(A7, A7, AT);
++  } else {
++    __ get_4_byte_integer_at_bcp(A7, AT, 1);
++    __ swap(A7);
++  }
++
++  // Handle all the JSR stuff here, then exit.
++  // It's much shorter and cleaner than intermingling with the non-JSR
++  // normal-branch stuff occuring below.
++  if (is_jsr) {
++    // Pre-load the next target bytecode into Rnext
++    __ daddu(AT, BCP, A7);
++    __ lbu(Rnext, AT, 0);
++
++    // compute return address as bci in FSR
++    __ daddiu(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
++    __ ld(AT, T3, in_bytes(Method::const_offset()));
++    __ dsubu(FSR, FSR, AT);
++    // Adjust the bcp in BCP by the displacement in A7
++    __ daddu(BCP, BCP, A7);
++    // jsr returns atos that is not an oop
++    // Push return address
++    __ push_i(FSR);
++    // jsr returns vtos
++    __ dispatch_only_noverify(vtos);
++
++    return;
++  }
++
++  // Normal (non-jsr) branch handling
++
++  // Adjust the bcp in S0 by the displacement in T4
++  __ daddu(BCP, BCP, A7);
++
++  assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
++  Label backedge_counter_overflow;
++  Label profile_method;
++  Label dispatch;
++  if (UseLoopCounter) {
++    // increment backedge counter for backward branches
++    // T3: method
++    // T4: target offset
++    // BCP: target bcp
++    // LVP: locals pointer
++    __ bgtz(A7, dispatch);  // check if forward or backward branch
++    __ delayed()->nop();
++
++    // check if MethodCounters exists
++    Label has_counters;
++    __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ bne(AT, R0, has_counters);
++    __ delayed()->nop();
++    __ push(T3);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
++               T3);
++    __ pop(T3);
++    __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ beq(AT, R0, dispatch);
++    __ delayed()->nop();
++    __ bind(has_counters);
++
++    if (TieredCompilation) {
++      Label no_mdo;
++      int increment = InvocationCounter::count_increment;
++      int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
++      if (ProfileInterpreter) {
++        // Are we profiling?
++        __ ld(T0, Address(T3, in_bytes(Method::method_data_offset())));
++        __ beq(T0, R0, no_mdo);
++        __ delayed()->nop();
++        // Increment the MDO backedge counter
++        const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) +
++                                           in_bytes(InvocationCounter::counter_offset()));
++        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
++                                   T1, false, Assembler::zero, &backedge_counter_overflow);
++        __ beq(R0, R0, dispatch);
++        __ delayed()->nop();
++      }
++      __ bind(no_mdo);
++      // Increment backedge counter in MethodCounters*
++      __ ld(T0, Address(T3, Method::method_counters_offset()));
++      __ increment_mask_and_jump(Address(T0, be_offset), increment, mask,
++                                 T1, false, Assembler::zero, &backedge_counter_overflow);
++      if (!UseOnStackReplacement) {
++        __ bind(backedge_counter_overflow);
++      }
++    } else {
++      // increment back edge counter
++      __ ld(T1, T3, in_bytes(Method::method_counters_offset()));
++      __ lw(T0, T1, in_bytes(be_offset));
++      __ increment(T0, InvocationCounter::count_increment);
++      __ sw(T0, T1, in_bytes(be_offset));
++
++      // load invocation counter
++      __ lw(T1, T1, in_bytes(inv_offset));
++      // buffer bit added, mask no needed
++
++      // dadd backedge counter & invocation counter
++      __ daddu(T1, T1, T0);
++
++      if (ProfileInterpreter) {
++        // Test to see if we should create a method data oop
++        // T1 : backedge counter & invocation counter
++        if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) {
++          __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit);
++        } else {
++          __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
++          __ lw(AT, AT, 0);
++          __ slt(AT, T1, AT);
++        }
++
++        __ bne(AT, R0, dispatch);
++        __ delayed()->nop();
++
++        // if no method data exists, go to profile method
++        __ test_method_data_pointer(T1, profile_method);
++
++        if (UseOnStackReplacement) {
++          if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) {
++            __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit);
++          } else {
++            __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
++            __ lw(AT, AT, 0);
++            __ slt(AT, T2, AT);
++          }
++
++          __ bne(AT, R0, dispatch);
++          __ delayed()->nop();
++
++          // When ProfileInterpreter is on, the backedge_count comes
++          // from the methodDataOop, which value does not get reset on
++          // the call to  frequency_counter_overflow().
++          // To avoid excessive calls to the overflow routine while
++          // the method is being compiled, dadd a second test to make
++          // sure the overflow function is called only once every
++          // overflow_frequency.
++          const int overflow_frequency = 1024;
++          __ andi(AT, T2, overflow_frequency-1);
++          __ beq(AT, R0, backedge_counter_overflow);
++          __ delayed()->nop();
++        }
++      } else {
++        if (UseOnStackReplacement) {
++          // check for overflow against AT, which is the sum of the counters
++          __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
++          __ lw(AT, AT, 0);
++          __ slt(AT, T1, AT);
++          __ beq(AT, R0, backedge_counter_overflow);
++          __ delayed()->nop();
++        }
++      }
++    }
++    __ bind(dispatch);
++  }
++
++  // Pre-load the next target bytecode into Rnext
++  __ lbu(Rnext, BCP, 0);
++
++  // continue with the bytecode @ target
++  // FSR: return bci for jsr's, unused otherwise
++  // Rnext: target bytecode
++  // BCP: target bcp
++  __ dispatch_only(vtos, true);
++
++  if (UseLoopCounter) {
++    if (ProfileInterpreter) {
++      // Out-of-line code to allocate method data oop.
++      __ bind(profile_method);
++      __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      __ b(dispatch);
++      __ delayed()->nop();
++    }
++
++    if (UseOnStackReplacement) {
++      // invocation counter overflow
++      __ bind(backedge_counter_overflow);
++      __ subu(A7, BCP, A7);  // branch bcp
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), A7);
++
++      // V0: osr nmethod (osr ok) or NULL (osr not possible)
++      // V1: osr adapter frame return address
++      // LVP: locals pointer
++      // BCP: bcp
++      __ beq(V0, R0, dispatch);
++      __ delayed()->nop();
++      // nmethod may have been invalidated (VM may block upon call_VM return)
++      __ lb(T3, V0, nmethod::state_offset());
++      __ move(AT, nmethod::in_use);
++      __ bne(AT, T3, dispatch);
++      __ delayed()->nop();
++
++      // We have the address of an on stack replacement routine in rax.
++      // In preparation of invoking it, first we must migrate the locals
++      // and monitors from off the interpreter frame on the stack.
++      // Ensure to save the osr nmethod over the migration call,
++      // it will be preserved in Rnext.
++      __ move(Rnext, V0);
++      const Register thread = TREG;
++#ifndef OPT_THREAD
++      __ get_thread(thread);
++#endif
++      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
++
++      // V0 is OSR buffer, move it to expected parameter location
++      // refer to osrBufferPointer in c1_LIRAssembler_mips.cpp
++      __ move(T0, V0);
++
++      // pop the interpreter frame
++      __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
++      //FIXME, shall we keep the return address on the stack?
++      __ leave();                                // remove frame anchor
++      __ move(LVP, RA);
++      __ move(SP, A7);
++
++      __ move(AT, -(StackAlignmentInBytes));
++      __ andr(SP , SP , AT);
++
++      // push the (possibly adjusted) return address
++      //refer to osr_entry in c1_LIRAssembler_mips.cpp
++      __ ld(AT, Rnext, nmethod::osr_entry_point_offset());
++      __ jr(AT);
++      __ delayed()->nop();
++    }
++  }
++}
++
++
++void TemplateTable::if_0cmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    case less:
++      __ bgez(FSR, not_taken);
++      break;
++    case less_equal:
++      __ bgtz(FSR, not_taken);
++      break;
++    case greater:
++      __ blez(FSR, not_taken);
++      break;
++    case greater_equal:
++      __ bltz(FSR, not_taken);
++      break;
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_icmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++
++  __ pop_i(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    case less:
++      __ slt(AT, SSR, FSR);
++      __ beq(AT, R0, not_taken);
++      break;
++    case less_equal:
++      __ slt(AT, FSR, SSR);
++      __ bne(AT, R0, not_taken);
++      break;
++    case greater:
++      __ slt(AT, FSR, SSR);
++      __ beq(AT, R0, not_taken);
++      break;
++    case greater_equal:
++      __ slt(AT, SSR, FSR);
++      __ bne(AT, R0, not_taken);
++      break;
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_nullcmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++
++void TemplateTable::if_acmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  //  __ lw(SSR, SP, 0);
++  __ pop_ptr(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::ret() {
++  transition(vtos, vtos);
++
++  locals_index(T2);
++  __ ld(T2, T2, 0);
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld(BCP, T1, in_bytes(Method::const_offset()));
++  __ daddu(BCP, BCP, T2);
++  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos, 0, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::wide_ret() {
++  transition(vtos, vtos);
++
++  locals_index_wide(T2);
++  __ ld(T2, T2, 0);                   // get return bci, compute return bcp
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld(BCP, T1, in_bytes(Method::const_offset()));
++  __ daddu(BCP, BCP, T2);
++  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos, 0, true);
++}
++
++// used register T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : low
++// A7 : high
++// Rnext : dest bytecode, required by dispatch_base
++void TemplateTable::tableswitch() {
++  Label default_case, continue_execution;
++  transition(itos, vtos);
++
++  // align BCP
++  __ daddiu(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // load lo & hi
++  __ lw(T3, T2, 1 * BytesPerInt);
++  __ swap(T3);
++  __ lw(A7, T2, 2 * BytesPerInt);
++  __ swap(A7);
++
++  // check against lo & hi
++  __ slt(AT, FSR, T3);
++  __ bne(AT, R0, default_case);
++  __ delayed()->nop();
++
++  __ slt(AT, A7, FSR);
++  __ bne(AT, R0, default_case);
++  __ delayed()->nop();
++
++  // lookup dispatch offset, in A7 big endian
++  __ dsubu(FSR, FSR, T3);
++  __ dsll(AT, FSR, Address::times_4);
++  __ daddu(AT, T2, AT);
++  __ lw(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(FSR, T9, T3);
++
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ daddu(BCP, BCP, A7);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++
++  // handle default
++  __ bind(default_case);
++  __ profile_switch_default(FSR);
++  __ lw(A7, T2, 0);
++  __ b(continue_execution);
++  __ delayed()->nop();
++}
++
++void TemplateTable::lookupswitch() {
++  transition(itos, itos);
++  __ stop("lookupswitch bytecode should have been rewritten");
++}
++
++// used registers : T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : pair index
++// A7 : offset
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_linearswitch() {
++  transition(itos, vtos);
++  Label loop_entry, loop, found, continue_execution;
++
++  // swap FSR so we can avoid swapping the table entries
++  __ swap(FSR);
++
++  // align BCP
++  __ daddiu(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // set counter
++  __ lw(T3, T2, BytesPerInt);
++  __ swap(T3);
++  __ b(loop_entry);
++  __ delayed()->nop();
++
++  // table search
++  __ bind(loop);
++  // get the entry value
++  __ dsll(AT, T3, Address::times_8);
++  __ daddu(AT, T2, AT);
++  __ lw(AT, AT, 2 * BytesPerInt);
++
++  // found?
++  __ beq(FSR, AT, found);
++  __ delayed()->nop();
++
++  __ bind(loop_entry);
++  __ bgtz(T3, loop);
++  __ delayed()->daddiu(T3, T3, -1);
++
++  // default case
++  __ profile_switch_default(FSR);
++  __ lw(A7, T2, 0);
++  __ b(continue_execution);
++  __ delayed()->nop();
++
++  // entry found -> get offset
++  __ bind(found);
++  __ dsll(AT, T3, Address::times_8);
++  __ daddu(AT, T2, AT);
++  __ lw(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(T3, FSR, T2);
++
++  // continue execution
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ daddu(BCP, BCP, A7);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++}
++
++// used registers : T0, T1, T2, T3, A7, Rnext
++// T2 : pairs address(array)
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_binaryswitch() {
++  transition(itos, vtos);
++  // Implementation using the following core algorithm:
++  //
++  // int binary_search(int key, LookupswitchPair* array, int n) {
++  //   // Binary search according to "Methodik des Programmierens" by
++  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
++  //   int i = 0;
++  //   int j = n;
++  //   while (i+1 < j) {
++  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
++  //     // with      Q: for all i: 0 <= i < n: key < a[i]
++  //     // where a stands for the array and assuming that the (inexisting)
++  //     // element a[n] is infinitely big.
++  //     int h = (i + j) >> 1;
++  //     // i < h < j
++  //     if (key < array[h].fast_match()) {
++  //       j = h;
++  //     } else {
++  //       i = h;
++  //     }
++  //   }
++  //   // R: a[i] <= key < a[i+1] or Q
++  //   // (i.e., if key is within array, i is the correct index)
++  //   return i;
++  // }
++
++  // register allocation
++  const Register array = T2;
++  const Register i = T3, j = A7;
++  const Register h = T1;
++  const Register temp = T0;
++  const Register key = FSR;
++
++  // setup array
++  __ daddiu(array, BCP, 3*BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(array, array, AT);
++
++  // initialize i & j
++  __ move(i, R0);
++  __ lw(j, array, - 1 * BytesPerInt);
++  // Convert j into native byteordering
++  __ swap(j);
++
++  // and start
++  Label entry;
++  __ b(entry);
++  __ delayed()->nop();
++
++  // binary search loop
++  {
++    Label loop;
++    __ bind(loop);
++    // int h = (i + j) >> 1;
++    __ daddu(h, i, j);
++    __ dsrl(h, h, 1);
++    // if (key < array[h].fast_match()) {
++    //   j = h;
++    // } else {
++    //   i = h;
++    // }
++    // Convert array[h].match to native byte-ordering before compare
++    __ dsll(AT, h, Address::times_8);
++    __ daddu(AT, array, AT);
++    __ lw(temp, AT, 0 * BytesPerInt);
++    __ swap(temp);
++
++    __ slt(AT, key, temp);
++    __ movz(i, h, AT);
++    __ movn(j, h, AT);
++
++    // while (i+1 < j)
++    __ bind(entry);
++    __ daddiu(h, i, 1);
++    __ slt(AT, h, j);
++    __ bne(AT, R0, loop);
++    __ delayed()->nop();
++  }
++
++  // end of binary search, result index is i (must check again!)
++  Label default_case;
++  // Convert array[i].match to native byte-ordering before compare
++  __ dsll(AT, i, Address::times_8);
++  __ daddu(AT, array, AT);
++  __ lw(temp, AT, 0 * BytesPerInt);
++  __ swap(temp);
++  __ bne(key, temp, default_case);
++  __ delayed()->nop();
++
++  // entry found -> j = offset
++  __ dsll(AT, i, Address::times_8);
++  __ daddu(AT, array, AT);
++  __ lw(j, AT, 1 * BytesPerInt);
++  __ profile_switch_case(i, key, array);
++  __ swap(j);
++
++  __ daddu(BCP, BCP, j);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++
++  // default case -> j = default offset
++  __ bind(default_case);
++  __ profile_switch_default(i);
++  __ lw(j, array, - 2 * BytesPerInt);
++  __ swap(j);
++  __ daddu(BCP, BCP, j);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++}
++
++void TemplateTable::_return(TosState state) {
++  transition(state, state);
++  assert(_desc->calls_vm(),
++      "inconsistent calls_vm information"); // call in remove_activation
++
++  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
++    assert(state == vtos, "only valid state");
++    __ ld(T1, aaddress(0));
++    __ load_klass(LVP, T1);
++    __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset()));
++    __ move(AT, JVM_ACC_HAS_FINALIZER);
++    __ andr(AT, AT, LVP);
++    Label skip_register_finalizer;
++    __ beq(AT, R0, skip_register_finalizer);
++    __ delayed()->nop();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::register_finalizer), T1);
++    __ bind(skip_register_finalizer);
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) {
++    Label no_safepoint;
++    NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll"));
++    __ lb(AT, thread, in_bytes(Thread::polling_page_offset()));
++    __ andi(AT, AT, SafepointMechanism::poll_bit());
++    __ beq(AT, R0, no_safepoint);
++    __ delayed()->nop();
++    __ push(state);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::at_safepoint));
++    __ pop(state);
++    __ bind(no_safepoint);
++  }
++
++  // Narrow result if state is itos but result type is smaller.
++  // Need to narrow in the return bytecode rather than in generate_return_entry
++  // since compiled code callers expect the result to already be narrowed.
++  if (state == itos) {
++    __ narrow(FSR);
++  }
++
++  __ remove_activation(state, T9);
++  __ sync();
++
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++// ----------------------------------------------------------------------------
++// Volatile variables demand their effects be made known to all CPU's
++// in order.  Store buffers on most chips allow reads & writes to
++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
++// without some kind of memory barrier (i.e., it's not sufficient that
++// the interpreter does not reorder volatile references, the hardware
++// also must not reorder them).
++//
++// According to the new Java Memory Model (JMM):
++// (1) All volatiles are serialized wrt to each other.  ALSO reads &
++//     writes act as aquire & release, so:
++// (2) A read cannot let unrelated NON-volatile memory refs that
++//     happen after the read float up to before the read.  It's OK for
++//     non-volatile memory refs that happen before the volatile read to
++//     float down below it.
++// (3) Similar a volatile write cannot let unrelated NON-volatile
++//     memory refs that happen BEFORE the write float down to after the
++//     write.  It's OK for non-volatile memory refs that happen after the
++//     volatile write to float up before it.
++//
++// We only put in barriers around volatile refs (they are expensive),
++// not _between_ memory refs (that would require us to track the
++// flavor of the previous memory refs).  Requirements (2) and (3)
++// require some barriers before volatile stores and after volatile
++// loads.  These nearly cover requirement (1) but miss the
++// volatile-store-volatile-load case.  This final case is placed after
++// volatile-stores although it could just as well go before
++// volatile-loads.
++void TemplateTable::volatile_barrier() {
++  if(os::is_MP()) __ sync();
++}
++
++// we dont shift left 2 bits in get_cache_and_index_at_bcp
++// for we always need shift the index we use it. the ConstantPoolCacheEntry
++// is 16-byte long, index is the index in
++// ConstantPoolCache, so cache + base_offset() + index * 16 is
++// the corresponding ConstantPoolCacheEntry
++// used registers : T2
++// NOTE : the returned index need also shift left 4 to get the address!
++void TemplateTable::resolve_cache_and_index(int byte_no,
++                                            Register Rcache,
++                                            Register index,
++                                            size_t index_size) {
++  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++  const Register temp = A1;
++  assert_different_registers(Rcache, index);
++
++  Label resolved;
++
++  Bytecodes::Code code = bytecode();
++  switch (code) {
++  case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
++  case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
++  default: break;
++  }
++
++  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
++  // is resolved?
++  int i = (int)code;
++  __ addiu(temp, temp, -i);
++  __ beq(temp, R0, resolved);
++  __ delayed()->nop();
++
++  // resolve first time through
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
++
++  __ move(temp, i);
++  __ call_VM(NOREG, entry, temp);
++
++  // Update registers with resolved info
++  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
++  __ bind(resolved);
++}
++
++// The Rcache and index registers must be set before call
++void TemplateTable::load_field_cp_cache_entry(Register obj,
++                                              Register cache,
++                                              Register index,
++                                              Register off,
++                                              Register flags,
++                                              bool is_static = false) {
++  assert_different_registers(cache, index, flags, off);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++  // Field offset
++  __ dsll(AT, index, Address::times_ptr);
++  __ daddu(AT, cache, AT);
++  __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
++  // Flags
++  __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
++
++  // klass overwrite register
++  if (is_static) {
++    __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ ld(obj, Address(obj, mirror_offset));
++
++    __ resolve_oop_handle(obj, T9);
++  }
++}
++
++// get the method, itable_index and flags of the current invoke
++void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
++                                               Register method,
++                                               Register itable_index,
++                                               Register flags,
++                                               bool is_invokevirtual,
++                                               bool is_invokevfinal, /*unused*/
++                                               bool is_invokedynamic) {
++  // setup registers
++  const Register cache = T3;
++  const Register index = T1;
++  assert_different_registers(method, flags);
++  assert_different_registers(method, cache, index);
++  assert_different_registers(itable_index, flags);
++  assert_different_registers(itable_index, cache, index);
++  assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
++  // determine constant pool cache field offsets
++  const int method_offset = in_bytes(
++    ConstantPoolCache::base_offset() +
++      ((byte_no == f2_byte)
++       ? ConstantPoolCacheEntry::f2_offset()
++       : ConstantPoolCacheEntry::f1_offset()));
++  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::flags_offset());
++  // access constant pool cache fields
++  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::f2_offset());
++
++  size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
++  resolve_cache_and_index(byte_no, cache, index, index_size);
++
++  //assert(wordSize == 8, "adjust code below");
++  // note we shift 4 not 2, for we get is the true inde
++  // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version
++  __ dsll(AT, index, Address::times_ptr);
++  __ daddu(AT, cache, AT);
++  __ ld(method, AT, method_offset);
++
++  if (itable_index != NOREG) {
++    __ ld(itable_index, AT, index_offset);
++  }
++  __ ld(flags, AT, flags_offset);
++}
++
++// The registers cache and index expected to be set before call.
++// Correct values of the cache and index registers are preserved.
++void TemplateTable::jvmti_post_field_access(Register cache, Register index,
++                                            bool is_static, bool has_tos) {
++  // do the JVMTI work here to avoid disturbing the register state below
++  // We use c_rarg registers here because we want to use the register used in
++  // the call to the VM
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the time to call into the VM.
++    Label L1;
++    // kill FSR
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    assert_different_registers(cache, index, AT);
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ lw(AT, AT, 0);
++    __ beq(AT, R0, L1);
++    __ delayed()->nop();
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp3, 1);
++
++    // cache entry pointer
++    __ daddiu(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset()));
++    __ shl(tmp3, LogBytesPerWord);
++    __ daddu(tmp2, tmp2, tmp3);
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      __ ld(tmp1, SP, 0);
++      __ verify_oop(tmp1);
++    }
++    // tmp1: object pointer or NULL
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                       InterpreterRuntime::post_field_access),
++               tmp1, tmp2, tmp3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++void TemplateTable::pop_and_check_object(Register r) {
++  __ pop_ptr(r);
++  __ null_check(r);  // for field access must check obj.
++  __ verify_oop(r);
++}
++
++// used registers : T1, T2, T3, T1
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T1 : field address
++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
++// following mapping to the TosState states:
++// btos: 0
++// ctos: 1
++// stos: 2
++// itos: 3
++// ltos: 4
++// ftos: 5
++// dtos: 6
++// atos: 7
++// vtos: 8
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_access(cache, index, is_static, false);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  {
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  if (!is_static) pop_and_check_object(obj);
++  __ daddu(index, obj, off);
++
++  const Address field(index, 0);
++
++  Label Done, notByte, notBool, notInt, notShort, notChar,
++              notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++  __ delayed()->nop();
++
++  // btos
++  __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(btos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++
++  __ bind(notByte);
++  __ move(AT, ztos);
++  __ bne(flags, AT, notBool);
++  __ delayed()->nop();
++
++  // ztos
++  __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ztos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++
++  __ bind(notBool);
++  __ move(AT, itos);
++  __ bne(flags, AT, notInt);
++  __ delayed()->nop();
++
++  // itos
++  __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(itos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notInt);
++  __ move(AT, atos);
++  __ bne(flags, AT, notObj);
++  __ delayed()->nop();
++
++  // atos
++  //add for compressedoops
++  do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP);
++  __ push(atos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notObj);
++  __ move(AT, ctos);
++  __ bne(flags, AT, notChar);
++  __ delayed()->nop();
++
++  // ctos
++  __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ctos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notChar);
++  __ move(AT, stos);
++  __ bne(flags, AT, notShort);
++  __ delayed()->nop();
++
++  // stos
++  __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(stos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notShort);
++  __ move(AT, ltos);
++  __ bne(flags, AT, notLong);
++  __ delayed()->nop();
++
++  // FIXME : the load/store should be atomic, we have no simple method to do this in mips32
++  // ltos
++  __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg);
++  __ push(ltos);
++
++  // Don't rewrite to _fast_lgetfield for potential volatile case.
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notLong);
++  __ move(AT, ftos);
++  __ bne(flags, AT, notFloat);
++  __ delayed()->nop();
++
++  // ftos
++  __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++  __ push(ftos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notFloat);
++  __ move(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++  __ delayed()->nop();
++#endif
++
++  // dtos
++  __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
++  __ push(dtos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++  __ delayed()->nop();
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++
++void TemplateTable::getfield(int byte_no) {
++  getfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_getfield(int byte_no) {
++  getfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::getstatic(int byte_no) {
++  getfield_or_static(byte_no, true);
++}
++
++// The registers cache and index expected to be set before call.
++// The function may destroy various registers, just not the cache and index registers.
++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
++  transition(vtos, vtos);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L1;
++    //kill AT, T1, T2, T3, T9
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T9;
++    assert_different_registers(cache, index, tmp4);
++
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ lw(AT, AT, 0);
++    __ beq(AT, R0, L1);
++    __ delayed()->nop();
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp4, 1);
++
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      // Life is harder. The stack holds the value on top, followed by
++      // the object.  We don't know the size of the value, though; it
++      // could be one or two words depending on its type. As a result,
++      // we must find the type to determine where the object is.
++      Label two_word, valsize_known;
++      __ dsll(AT, tmp4, Address::times_8);
++      __ daddu(AT, tmp2, AT);
++      __ ld(tmp3, AT, in_bytes(cp_base_offset +
++                               ConstantPoolCacheEntry::flags_offset()));
++      __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift);
++
++      ConstantPoolCacheEntry::verify_tos_state_shift();
++      __ move(tmp1, SP);
++      __ move(AT, ltos);
++      __ beq(tmp3, AT, two_word);
++      __ delayed()->nop();
++      __ move(AT, dtos);
++      __ beq(tmp3, AT, two_word);
++      __ delayed()->nop();
++      __ b(valsize_known);
++      __ delayed()->daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) );
++
++      __ bind(two_word);
++      __ daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2));
++
++      __ bind(valsize_known);
++      // setup object pointer
++      __ ld(tmp1, tmp1, 0*wordSize);
++    }
++    // cache entry pointer
++    __ daddiu(tmp2, tmp2, in_bytes(cp_base_offset));
++    __ shl(tmp4, LogBytesPerWord);
++    __ daddu(tmp2, tmp2, tmp4);
++    // object (tos)
++    __ move(tmp3, SP);
++    // tmp1: object pointer set up above (NULL if static)
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++// used registers : T0, T1, T2, T3, T8
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T8 : volatile bit
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++  const Register bc    = T3;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_mod(cache, index, is_static);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  Label Done;
++  {
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++
++  Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++
++  // btos
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++  __ delayed()->nop();
++
++  __ pop(btos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_BYTE, IN_HEAP, Address(T9), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ztos
++  __ bind(notByte);
++  __ move(AT, ztos);
++  __ bne(flags, AT, notBool);
++  __ delayed()->nop();
++
++  __ pop(ztos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ andi(FSR, FSR, 0x1);
++  __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T9), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // itos
++  __ bind(notBool);
++  __ move(AT, itos);
++  __ bne(flags, AT, notInt);
++  __ delayed()->nop();
++
++  __ pop(itos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_INT, IN_HEAP, Address(T9), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // atos
++  __ bind(notInt);
++  __ move(AT, atos);
++  __ bne(flags, AT, notObj);
++  __ delayed()->nop();
++
++  __ pop(atos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++
++  do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ctos
++  __ bind(notObj);
++  __ move(AT, ctos);
++  __ bne(flags, AT, notChar);
++  __ delayed()->nop();
++
++  __ pop(ctos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_CHAR, IN_HEAP, Address(T9), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // stos
++  __ bind(notChar);
++  __ move(AT, stos);
++  __ bne(flags, AT, notShort);
++  __ delayed()->nop();
++
++  __ pop(stos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_SHORT, IN_HEAP, Address(T9), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ltos
++  __ bind(notShort);
++  __ move(AT, ltos);
++  __ bne(flags, AT, notLong);
++  __ delayed()->nop();
++
++  __ pop(ltos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_LONG, IN_HEAP, Address(T9), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ftos
++  __ bind(notLong);
++  __ move(AT, ftos);
++  __ bne(flags, AT, notFloat);
++  __ delayed()->nop();
++
++  __ pop(ftos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_FLOAT, IN_HEAP, Address(T9), noreg, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++
++  // dtos
++  __ bind(notFloat);
++  __ move(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++  __ delayed()->nop();
++#endif
++
++  __ pop(dtos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_DOUBLE, IN_HEAP, Address(T9), noreg, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++void TemplateTable::putfield(int byte_no) {
++  putfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_putfield(int byte_no) {
++  putfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::putstatic(int byte_no) {
++  putfield_or_static(byte_no, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : cp_entry
++// T2 : obj
++// T3 : value pointer
++void TemplateTable::jvmti_post_fast_field_mod() {
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L2;
++    //kill AT, T1, T2, T3, T9
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T9;
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ lw(tmp3, AT, 0);
++    __ beq(tmp3, R0, L2);
++    __ delayed()->nop();
++    __ pop_ptr(tmp1);
++    __ verify_oop(tmp1);
++    __ push_ptr(tmp1);
++    switch (bytecode()) {          // load values into the jvalue object
++    case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ push_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ push_d(FSF); break;
++    case Bytecodes::_fast_fputfield: __ push_f(); break;
++    case Bytecodes::_fast_lputfield: __ push_l(FSR); break;
++      default:  ShouldNotReachHere();
++    }
++    __ move(tmp3, SP);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1);
++    __ verify_oop(tmp1);
++    // tmp1: object pointer copied above
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++
++    switch (bytecode()) {             // restore tos values
++    case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ pop_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ pop_d(); break;
++    case Bytecodes::_fast_fputfield: __ pop_f(); break;
++    case Bytecodes::_fast_lputfield: __ pop_l(FSR); break;
++    default: break;
++    }
++    __ bind(L2);
++  }
++}
++
++// used registers : T2, T3, T1
++// T2 : index & off & field address
++// T3 : cache & obj
++// T1 : flags
++void TemplateTable::fast_storefield(TosState state) {
++  transition(state, vtos);
++
++  const Register scratch = T8;
++
++  ByteSize base = ConstantPoolCache::base_offset();
++
++  jvmti_post_fast_field_mod();
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ sync();
++
++  // test for volatile with T1
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
++
++  // replace index with field offset from cache entry
++  __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
++
++  Label Done;
++  {
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, T1);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // Get object from stack
++  pop_and_check_object(T3);
++
++  if (bytecode() != Bytecodes::_fast_aputfield) {
++    // field address
++    __ daddu(T2, T3, T2);
++  }
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_zputfield:
++      __ andi(FSR, FSR, 0x1);  // boolean is true if LSB is 1
++      __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_bputfield:
++      __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_sputfield:
++      __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_cputfield:
++      __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_iputfield:
++      __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_lputfield:
++      __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_fputfield:
++      __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg);
++      break;
++    case Bytecodes::_fast_dputfield:
++      __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg);
++      break;
++    case Bytecodes::_fast_aputfield:
++      do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++// used registers : T2, T3, T1
++// T3 : cp_entry & cache
++// T2 : index & offset
++void TemplateTable::fast_accessfield(TosState state) {
++  transition(atos, state);
++
++  const Register scratch = T8;
++
++  // do the JVMTI work here to avoid disturbing the register state below
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we take
++    // the time to call into the VM.
++    Label L1;
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ lw(T3, AT, 0);
++    __ beq(T3, R0, L1);
++    __ delayed()->nop();
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
++    __ move(TSR, FSR);
++    __ verify_oop(FSR);
++    // FSR: object pointer copied above
++    // T3: cache entry pointer
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
++               FSR, T3);
++    __ move(FSR, TSR);
++    __ bind(L1);
++  }
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ sync();
++
++  // replace index with field offset from cache entry
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // FSR: object
++  __ verify_oop(FSR);
++  __ null_check(FSR);
++  // field addresses
++  __ daddu(FSR, FSR, T2);
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_bgetfield:
++      __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_sgetfield:
++      __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_cgetfield:
++      __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_igetfield:
++      __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_lgetfield:
++      __ stop("should not be rewritten");
++      break;
++    case Bytecodes::_fast_fgetfield:
++      __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_dgetfield:
++      __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_agetfield:
++      //add for compressedoops
++      do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP);
++      __ verify_oop(FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
++// used registers : T1, T2, T3, T1
++// T1 : obj & field address
++// T2 : off
++// T3 : cache
++// T1 : index
++void TemplateTable::fast_xaccess(TosState state) {
++  transition(vtos, state);
++
++  const Register scratch = T8;
++
++  // get receiver
++  __ ld(T1, aaddress(0));
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 2);
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // make sure exception is reported in correct bcp range (getfield is
++  // next instruction)
++  __ daddiu(BCP, BCP, 1);
++  __ null_check(T1);
++  __ daddu(T1, T1, T2);
++
++  if (state == itos) {
++    __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg);
++  } else if (state == atos) {
++    do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP);
++    __ verify_oop(FSR);
++  } else if (state == ftos) {
++    __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg);
++  } else {
++    ShouldNotReachHere();
++  }
++  __ daddiu(BCP, BCP, -1);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++
++
++//-----------------------------------------------------------------------------
++// Calls
++
++void TemplateTable::count_calls(Register method, Register temp) {
++  // implemented elsewhere
++  ShouldNotReachHere();
++}
++
++// method, index, recv, flags: T1, T2, T3, T1
++// byte_no = 2 for _invokevirtual, 1 else
++// T0 : return address
++// get the method & index of the invoke, and push the return address of
++// the invoke(first word in the frame)
++// this address is where the return code jmp to.
++// NOTE : this method will set T3&T1 as recv&flags
++void TemplateTable::prepare_invoke(int byte_no,
++                                   Register method,  // linked method (or i-klass)
++                                   Register index,   // itable index, MethodType, etc.
++                                   Register recv,    // if caller wants to see it
++                                   Register flags    // if caller wants to test it
++                                   ) {
++  // determine flags
++  const Bytecodes::Code code = bytecode();
++  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
++  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
++  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
++  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
++  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
++  const bool load_receiver       = (recv  != noreg);
++  const bool save_flags          = (flags != noreg);
++  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
++  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
++  assert(flags == noreg || flags == T1, "error flags reg.");
++  assert(recv  == noreg || recv  == T3, "error recv reg.");
++
++  // setup registers & access constant pool cache
++  if(recv == noreg) recv  = T3;
++  if(flags == noreg) flags  = T1;
++  assert_different_registers(method, index, recv, flags);
++
++  // save 'interpreter return address'
++  __ save_bcp();
++
++  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
++
++  if (is_invokedynamic || is_invokehandle) {
++   Label L_no_push;
++     __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
++     __ andr(AT, AT, flags);
++     __ beq(AT, R0, L_no_push);
++     __ delayed()->nop();
++     // Push the appendix as a trailing parameter.
++     // This must be done before we get the receiver,
++     // since the parameter_size includes it.
++     Register tmp = SSR;
++     __ push(tmp);
++     __ move(tmp, index);
++     assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
++     __ load_resolved_reference_at_index(index, tmp, recv);
++     __ pop(tmp);
++     __ push(index);  // push appendix (MethodType, CallSite, etc.)
++     __ bind(L_no_push);
++  }
++
++  // load receiver if needed (after appendix is pushed so parameter size is correct)
++  // Note: no return address pushed yet
++  if (load_receiver) {
++    __ move(AT, ConstantPoolCacheEntry::parameter_size_mask);
++    __ andr(recv, flags, AT);
++    // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
++    const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
++    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
++    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
++    __ ld(recv, recv_addr);
++    __ verify_oop(recv);
++  }
++  if(save_flags) {
++    __ move(BCP, flags);
++  }
++
++  // compute return type
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, 0xf);
++
++  // Make sure we don't need to mask flags for tos_state_shift after the above shift
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++  // load return address
++  {
++    const address table = (address) Interpreter::invoke_return_entry_table_for(code);
++    __ li(AT, (long)table);
++    __ dsll(flags, flags, LogBytesPerWord);
++    __ daddu(AT, AT, flags);
++    __ ld(RA, AT, 0);
++  }
++
++  if (save_flags) {
++    __ move(flags, BCP);
++    __ restore_bcp();
++  }
++}
++
++// used registers : T0, T3, T1, T2
++// T3 : recv, this two register using convention is by prepare_invoke
++// T1 : flags, klass
++// Rmethod : method, index must be Rmethod
++void TemplateTable::invokevirtual_helper(Register index,
++                                         Register recv,
++                                         Register flags) {
++
++  assert_different_registers(index, recv, flags, T2);
++
++  // Test for an invoke of a final method
++  Label notFinal;
++  __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, flags, AT);
++  __ beq(AT, R0, notFinal);
++  __ delayed()->nop();
++
++  Register method = index;  // method must be Rmethod
++  assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention");
++
++  // do the call - the index is actually the method to call
++  // the index is indeed methodOop, for this is vfinal,
++  // see ConstantPoolCacheEntry::set_method for more info
++
++
++  // It's final, need a null check here!
++  __ null_check(recv);
++
++  // profile this call
++  __ profile_final_call(T2);
++
++  // T2: tmp, used for mdp
++  // method: callee
++  // T9: tmp
++  // is_virtual: true
++  __ profile_arguments_type(T2, method, T9, true);
++
++  __ jump_from_interpreted(method, T2);
++
++  __ bind(notFinal);
++
++  // get receiver klass
++  __ null_check(recv, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T2, recv);
++
++  // profile this call
++  __ profile_virtual_call(T2, T0, T1);
++
++  // get target methodOop & entry point
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++  __ dsll(AT, index, Address::times_ptr);
++  // T2: receiver
++  __ daddu(AT, T2, AT);
++  //this is a ualign read
++  __ ld(method, AT, base + vtableEntry::method_offset_in_bytes());
++  __ profile_arguments_type(T2, method, T9, true);
++  __ jump_from_interpreted(method, T2);
++
++}
++
++void TemplateTable::invokevirtual(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
++  // now recv & flags in T3, T1
++  invokevirtual_helper(Rmethod, T3, T1);
++}
++
++// T9 : entry
++// Rmethod : method
++void TemplateTable::invokespecial(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3);
++  // now recv & flags in T3, T1
++  __ verify_oop(T3);
++  __ null_check(T3);
++  __ profile_call(T9);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T9: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T9, false);
++
++  __ jump_from_interpreted(Rmethod, T9);
++  __ move(T0, T3);
++}
++
++void TemplateTable::invokestatic(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG);
++
++  __ profile_call(T9);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T9: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T9, false);
++
++  __ jump_from_interpreted(Rmethod, T9);
++}
++
++// i have no idea what to do here, now. for future change. FIXME.
++void TemplateTable::fast_invokevfinal(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  __ stop("fast_invokevfinal not used on mips64");
++}
++
++// used registers : T0, T1, T2, T3, T1, A7
++// T0 : itable, vtable, entry
++// T1 : interface
++// T3 : receiver
++// T1 : flags, klass
++// Rmethod : index, method, this is required by interpreter_entry
++void TemplateTable::invokeinterface(int byte_no) {
++  transition(vtos, vtos);
++  //this method will use T1-T4 and T0
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, T2, Rmethod, T3, T1);
++  // T2: reference klass (from f1) if interface method
++  // Rmethod: method (from f2)
++  // T3: receiver
++  // T1: flags
++
++  // First check for Object case, then private interface method,
++  // then regular interface method.
++
++  // Special case of invokeinterface called for virtual method of
++  // java.lang.Object.  See cpCache.cpp for details.
++  Label notObjectMethod;
++  __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notObjectMethod);
++  __ delayed()->nop();
++
++  invokevirtual_helper(Rmethod, T3, T1);
++  // no return from above
++  __ bind(notObjectMethod);
++
++  Label no_such_interface; // for receiver subtype check
++  Register recvKlass; // used for exception processing
++
++  // Check for private method invocation - indicated by vfinal
++  Label notVFinal;
++  __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notVFinal);
++  __ delayed()->nop();
++
++  // Get receiver klass into FSR - also a null check
++  __ null_check(T3, oopDesc::klass_offset_in_bytes());
++  __ load_klass(FSR, T3);
++
++  Label subtype;
++  __ check_klass_subtype(FSR, T2, T0, subtype);
++  // If we get here the typecheck failed
++  recvKlass = T1;
++  __ move(recvKlass, FSR);
++  __ b(no_such_interface);
++  __ delayed()->nop();
++
++  __ bind(subtype);
++
++  // do the call - rbx is actually the method to call
++
++  __ profile_final_call(T1);
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  __ jump_from_interpreted(Rmethod, T1);
++  // no return from above
++  __ bind(notVFinal);
++
++  // Get receiver klass into T1 - also a null check
++  __ restore_locals();
++  __ null_check(T3, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T1, T3);
++
++  Label no_such_method;
++
++  // Preserve method for throw_AbstractMethodErrorVerbose.
++  __ move(T3, Rmethod);
++  // Receiver subtype check against REFC.
++  // Superklass in T2. Subklass in T1.
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             T1, T2, noreg,
++                             // outputs: scan temp. reg, scan temp. reg
++                             T0, FSR,
++                             no_such_interface,
++                             /*return_method=*/false);
++
++
++  // profile this call
++  __ restore_bcp();
++  __ profile_virtual_call(T1, T0, FSR);
++
++  // Get declaring interface class from method, and itable index
++  __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes());
++  __ lw(Rmethod, Rmethod, in_bytes(Method::itable_index_offset()));
++  __ addiu(Rmethod, Rmethod, (-1) * Method::itable_index_max);
++  __ subu32(Rmethod, R0, Rmethod);
++
++  // Preserve recvKlass for throw_AbstractMethodErrorVerbose.
++  __ move(FSR, T1);
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             FSR, T2, Rmethod,
++                             // outputs: method, scan temp. reg
++                             Rmethod, T0,
++                             no_such_interface);
++
++  // Rmethod: Method* to call
++  // T3: receiver
++  // Check for abstract method error
++  // Note: This should be done more efficiently via a throw_abstract_method_error
++  //       interpreter entry point and a conditional jump to it in case of a null
++  //       method.
++  __ beq(Rmethod, R0, no_such_method);
++  __ delayed()->nop();
++
++  __ profile_called_method(Rmethod, T0, T1);
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  // do the call
++  // T3: receiver
++  // Rmethod: Method*
++  __ jump_from_interpreted(Rmethod, T1);
++  __ should_not_reach_here();
++
++  // exception handling code follows...
++  // note: must restore interpreter registers to canonical
++  //       state for exception handling to work correctly!
++
++  __ bind(no_such_method);
++  // throw exception
++  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
++  __ restore_bcp();
++  __ restore_locals();
++  // Pass arguments for generating a verbose error message.
++  recvKlass = A1;
++  Register method = A2;
++  if (recvKlass != T1) { __ move(recvKlass, T1); }
++  if (method != T3)    { __ move(method, T3);    }
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  __ bind(no_such_interface);
++  // throw exception
++  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
++  __ restore_bcp();
++  __ restore_locals();
++  // Pass arguments for generating a verbose error message.
++  if (recvKlass != T1) { __ move(recvKlass, T1); }
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++}
++
++
++void TemplateTable::invokehandle(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  const Register T2_method  = Rmethod;
++  const Register FSR_mtype  = FSR;
++  const Register T3_recv    = T3;
++
++   prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
++   //??__ verify_method_ptr(T2_method);
++   __ verify_oop(T3_recv);
++   __ null_check(T3_recv);
++
++   // T9: MethodType object (from cpool->resolved_references[f1], if necessary)
++   // T2_method: MH.invokeExact_MT method (from f2)
++
++   // Note:  T9 is already pushed (if necessary) by prepare_invoke
++
++   // FIXME: profile the LambdaForm also
++   __ profile_final_call(T9);
++
++   // T8: tmp, used for mdp
++   // T2_method: callee
++   // T9: tmp
++   // is_virtual: true
++   __ profile_arguments_type(T8, T2_method, T9, true);
++
++  __ jump_from_interpreted(T2_method, T9);
++}
++
++ void TemplateTable::invokedynamic(int byte_no) {
++   transition(vtos, vtos);
++   assert(byte_no == f1_byte, "use this argument");
++
++   //const Register Rmethod   = T2;
++   const Register T2_callsite = T2;
++
++   prepare_invoke(byte_no, Rmethod, T2_callsite);
++
++   // T2: CallSite object (from cpool->resolved_references[f1])
++   // Rmethod: MH.linkToCallSite method (from f2)
++
++   // Note:  T2_callsite is already pushed by prepare_invoke
++   // %%% should make a type profile for any invokedynamic that takes a ref argument
++   // profile this call
++   __ profile_call(T9);
++
++   // T8: tmp, used for mdp
++   // Rmethod: callee
++   // T9: tmp
++   // is_virtual: false
++   __ profile_arguments_type(T8, Rmethod, T9, false);
++
++   __ verify_oop(T2_callsite);
++
++   __ jump_from_interpreted(Rmethod, T9);
++ }
++
++//-----------------------------------------------------------------------------
++// Allocation
++// T1 : tags & buffer end & thread
++// T2 : object end
++// T3 : klass
++// T1 : object size
++// A1 : cpool
++// A2 : cp index
++// return object in FSR
++void TemplateTable::_new() {
++  transition(vtos, atos);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++
++  Label slow_case;
++  Label done;
++  Label initialize_header;
++  Label initialize_object; // including clearing the fields
++  Label allocate_shared;
++
++  __ get_cpool_and_tags(A1, T1);
++
++  // make sure the class we're about to instantiate has been resolved.
++  // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++  if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) {
++    __ gslbx(AT, T1, A2, tags_offset);
++  } else {
++    __ daddu(T1, T1, A2);
++    __ lb(AT, T1, tags_offset);
++  }
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ bne(AT, R0, slow_case);
++  __ delayed()->nop();
++
++  // get InstanceKlass
++  __ load_resolved_klass_at_index(A1, A2, T3);
++
++  // make sure klass is initialized & doesn't have finalizer
++  // make sure klass is fully initialized
++  __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
++  __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized);
++  __ bne(AT, R0, slow_case);
++  __ delayed()->nop();
++
++  // has_finalizer
++  __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) );
++  __ andi(AT, T0, Klass::_lh_instance_slow_path_bit);
++  __ bne(AT, R0, slow_case);
++  __ delayed()->nop();
++
++  // Allocate the instance
++  // 1) Try to allocate in the TLAB
++  // 2) if fail and the object is large allocate in the shared Eden
++  // 3) if the above fails (or is not applicable), go to a slow case
++  // (creates a new TLAB, etc.)
++
++  const bool allow_shared_alloc =
++    Universe::heap()->supports_inline_contig_alloc();
++
++#ifndef OPT_THREAD
++    const Register thread = T8;
++    if (UseTLAB || allow_shared_alloc) {
++      __ get_thread(thread);
++    }
++#else
++    const Register thread = TREG;
++#endif
++
++  if (UseTLAB) {
++    // get tlab_top
++    __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
++    // get tlab_end
++    __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
++    __ daddu(T2, FSR, T0);
++    __ slt(AT, AT, T2);
++    __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case);
++    __ delayed()->nop();
++    __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
++
++    if (ZeroTLAB) {
++      // the fields have been already cleared
++      __ beq(R0, R0, initialize_header);
++    } else {
++      // initialize both the header and fields
++      __ beq(R0, R0, initialize_object);
++    }
++    __ delayed()->nop();
++  }
++
++  // Allocation in the shared Eden , if allowed
++  // T0 : instance size in words
++  if(allow_shared_alloc){
++    __ bind(allocate_shared);
++
++    Label done, retry;
++    Address heap_top(T1);
++    __ set64(T1, (long)Universe::heap()->top_addr());
++    __ ld(FSR, heap_top);
++
++    __ bind(retry);
++    __ set64(AT, (long)Universe::heap()->end_addr());
++    __ ld(AT, AT, 0);
++    __ daddu(T2, FSR, T0);
++    __ slt(AT, AT, T2);
++    __ bne(AT, R0, slow_case);
++    __ delayed()->nop();
++
++    // Compare FSR with the top addr, and if still equal, store the new
++    // top addr in T2 at the address of the top addr pointer. Sets AT if was
++    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
++    //
++    // FSR: object begin
++    // T2: object end
++    // T0: instance size in words
++
++    // if someone beat us on the allocation, try again, otherwise continue
++    __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry);
++
++    __ bind(done);
++
++    __ incr_allocated_bytes(thread, T0, 0);
++  }
++
++  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
++    // The object is initialized before the header.  If the object size is
++    // zero, go directly to the header initialization.
++    __ bind(initialize_object);
++    __ set64(AT, - sizeof(oopDesc));
++    __ daddu(T0, T0, AT);
++    __ beq(T0, R0, initialize_header);
++    __ delayed()->nop();
++
++    // initialize remaining object fields: T0 is a multiple of 2
++    {
++      Label loop;
++      __ daddu(T1, FSR, T0);
++      __ daddiu(T1, T1, -oopSize);
++
++      __ bind(loop);
++      __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize);
++      __ bne(T1, FSR, loop); //dont clear header
++      __ delayed()->daddiu(T1, T1, -oopSize);
++    }
++
++    //klass in T3,
++    // initialize object header only.
++    __ bind(initialize_header);
++    if (UseBiasedLocking) {
++      __ ld(AT, T3, in_bytes(Klass::prototype_header_offset()));
++      __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ());
++    } else {
++      __ set64(AT, (long)markOopDesc::prototype());
++      __ sd(AT, FSR, oopDesc::mark_offset_in_bytes());
++    }
++
++    __ store_klass_gap(FSR, R0);
++    __ store_klass(FSR, T3);
++
++    {
++      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
++      // Trigger dtrace event for fastpath
++      __ push(atos);
++      __ call_VM_leaf(
++           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
++      __ pop(atos);
++
++    }
++    __ b(done);
++    __ delayed()->nop();
++  }
++
++  // slow case
++  __ bind(slow_case);
++  __ get_constant_pool(A1);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
++
++  // continue
++  __ bind(done);
++  __ sync();
++}
++
++void TemplateTable::newarray() {
++  transition(itos, atos);
++  __ lbu(A1, at_bcp(1));
++  //type, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
++  __ sync();
++}
++
++void TemplateTable::anewarray() {
++  transition(itos, atos);
++  __ get_2_byte_integer_at_bcp(A2, AT, 1);
++  __ huswap(A2);
++  __ get_constant_pool(A1);
++  // cp, index, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
++  __ sync();
++}
++
++void TemplateTable::arraylength() {
++  transition(atos, itos);
++  __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
++  __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
++}
++
++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
++// T2 : sub klass
++// T3 : cpool
++// T3 : super klass
++void TemplateTable::checkcast() {
++  transition(atos, atos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++  __ beq(FSR, R0, is_null);
++  __ delayed()->nop();
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ huswap(T2);
++
++  // See if bytecode has already been quicked
++  __ daddu(AT, T1, T2);
++  __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++  __ delayed()->nop();
++
++  // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
++  // Then, GC will move the object in V0 to another places in heap.
++  // Therefore, We should never save such an object in register.
++  // Instead, we should save it in the stack. It can be modified automatically by the GC thread.
++  // After GC, the object address in FSR is changed to a new place.
++  //
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++  __ delayed()->nop();
++
++  // klass already in cp, get superklass in T3
++  __ bind(quicked);
++  __ load_resolved_klass_at_index(T3, T2, T3);
++
++  __ bind(resolved);
++
++  // get subklass in T2
++  //add for compressedoops
++  __ load_klass(T2, FSR);
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ClassCastException_entry);
++  __ delayed()->nop();
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++
++  // Collect counts on whether this check-cast sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ b(done);
++    __ delayed()->nop();
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);
++  }
++  __ bind(done);
++}
++
++// i use T3 as cpool, T1 as tags, T2 as index
++// object always in FSR, superklass in T3, subklass in T2
++void TemplateTable::instanceof() {
++  transition(atos, itos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++
++  __ beq(FSR, R0, is_null);
++  __ delayed()->nop();
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  // get index
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ huswap(T2);
++
++  // See if bytecode has already been quicked
++  // quicked
++  __ daddu(AT, T1, T2);
++  __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++  __ delayed()->nop();
++
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++  __ delayed()->nop();
++
++  // get superklass in T3, subklass in T2
++  __ bind(quicked);
++  __ load_resolved_klass_at_index(T3, T2, T3);
++
++  __ bind(resolved);
++  // get subklass in T2
++  //add for compressedoops
++  __ load_klass(T2, FSR);
++
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++  // Come here on failure
++  __ b(done);
++  __ delayed(); __ move(FSR, R0);
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++  __ move(FSR, 1);
++
++  // Collect counts on whether this test sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ beq(R0, R0, done);
++    __ delayed()->nop();
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);   // same as 'done'
++  }
++  __ bind(done);
++  // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
++  // FSR = 1: obj != NULL and obj is     an instanceof the specified klass
++}
++
++//--------------------------------------------------------
++//--------------------------------------------
++// Breakpoints
++void TemplateTable::_breakpoint() {
++  // Note: We get here even if we are single stepping..
++  // jbug inists on setting breakpoints at every bytecode
++  // even if we are in single step mode.
++
++  transition(vtos, vtos);
++
++  // get the unpatched byte code
++  __ get_method(A1);
++  __ call_VM(NOREG,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::get_original_bytecode_at),
++             A1, BCP);
++  __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal
++
++  // post the breakpoint event
++  __ get_method(A1);
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
++
++  // complete the execution of original bytecode
++  __ dispatch_only_normal(vtos);
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateTable::athrow() {
++  transition(atos, vtos);
++  __ null_check(FSR);
++  __ jmp(Interpreter::throw_exception_entry());
++  __ delayed()->nop();
++}
++
++//-----------------------------------------------------------------------------
++// Synchronization
++//
++// Note: monitorenter & exit are symmetric routines; which is reflected
++//       in the assembly code structure as well
++//
++// Stack layout:
++//
++// [expressions  ] <--- SP               = expression stack top
++// ..
++// [expressions  ]
++// [monitor entry] <--- monitor block top = expression stack bot
++// ..
++// [monitor entry]
++// [frame data   ] <--- monitor block bot
++// ...
++// [return addr  ] <--- FP
++
++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
++// object always in FSR
++void TemplateTable::monitorenter() {
++  transition(atos, vtos);
++
++  // check for NULL object
++  __ null_check(FSR);
++
++  const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
++      * wordSize);
++  const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
++  Label allocated;
++
++  // initialize entry pointer
++  __ move(c_rarg0, R0);
++
++  // find a free slot in the monitor block (result in c_rarg0)
++  {
++    Label entry, loop, exit;
++    __ ld(T2, monitor_block_top);
++    __ b(entry);
++    __ delayed()->daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++
++    // free slot?
++    __ bind(loop);
++    __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes());
++    __ movz(c_rarg0, T2, AT);
++
++    __ beq(FSR, AT, exit);
++    __ delayed()->nop();
++    __ daddiu(T2, T2, entry_size);
++
++    __ bind(entry);
++    __ bne(T3, T2, loop);
++    __ delayed()->nop();
++    __ bind(exit);
++  }
++
++  __ bne(c_rarg0, R0, allocated);
++  __ delayed()->nop();
++
++  // allocate one if there's no free slot
++  {
++    Label entry, loop;
++    // 1. compute new pointers                   // SP: old expression stack top
++    __ ld(c_rarg0, monitor_block_top);
++    __ daddiu(SP, SP, - entry_size);
++    __ daddiu(c_rarg0, c_rarg0, - entry_size);
++    __ sd(c_rarg0, monitor_block_top);
++    __ b(entry);
++    __ delayed(); __ move(T3, SP);
++
++    // 2. move expression stack contents
++    __ bind(loop);
++    __ ld(AT, T3, entry_size);
++    __ sd(AT, T3, 0);
++    __ daddiu(T3, T3, wordSize);
++    __ bind(entry);
++    __ bne(T3, c_rarg0, loop);
++    __ delayed()->nop();
++  }
++
++  __ bind(allocated);
++  // Increment bcp to point to the next bytecode,
++  // so exception handling for async. exceptions work correctly.
++  // The object has already been poped from the stack, so the
++  // expression stack looks correct.
++  __ daddiu(BCP, BCP, 1);
++  __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  __ lock_object(c_rarg0);
++  // check to make sure this monitor doesn't cause stack overflow after locking
++  __ save_bcp();  // in case of exception
++  __ generate_stack_overflow_check(0);
++  // The bcp has already been incremented. Just need to dispatch to next instruction.
++
++  __ dispatch_next(vtos);
++}
++
++// T2 : top
++// c_rarg0 : entry
++void TemplateTable::monitorexit() {
++  transition(atos, vtos);
++
++  __ null_check(FSR);
++
++  const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
++  Label found;
++
++  // find matching slot
++  {
++    Label entry, loop;
++    __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ b(entry);
++    __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++
++    __ bind(loop);
++    __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    __ beq(FSR, AT, found);
++    __ delayed()->nop();
++    __ daddiu(c_rarg0, c_rarg0, entry_size);
++    __ bind(entry);
++    __ bne(T2, c_rarg0, loop);
++    __ delayed()->nop();
++  }
++
++  // error handling. Unlocking was not block-structured
++  Label end;
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_illegal_monitor_state_exception));
++  __ should_not_reach_here();
++
++  // call run-time routine
++  // c_rarg0: points to monitor entry
++  __ bind(found);
++  __ move(TSR, FSR);
++  __ unlock_object(c_rarg0);
++  __ move(FSR, TSR);
++  __ bind(end);
++}
++
++
++// Wide instructions
++void TemplateTable::wide() {
++  transition(vtos, vtos);
++  __ lbu(Rnext, at_bcp(1));
++  __ dsll(T9, Rnext, Address::times_8);
++  __ li(AT, (long)Interpreter::_wentry_point);
++  __ daddu(AT, T9, AT);
++  __ ld(T9, AT, 0);
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++
++void TemplateTable::multianewarray() {
++  transition(vtos, atos);
++  // last dim is on top of stack; we want address of first one:
++  // first_addr = last_addr + (ndims - 1) * wordSize
++  __ lbu(A1, at_bcp(3));  // dimension
++  __ daddiu(A1, A1, -1);
++  __ dsll(A1, A1, Address::times_8);
++  __ daddu(A1, SP, A1);    // now A1 pointer to the count array on the stack
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
++  __ lbu(AT, at_bcp(3));
++  __ dsll(AT, AT, Address::times_8);
++  __ daddu(SP, SP, AT);
++  __ sync();
++}
++#endif // !CC_INTERP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/templateTable_mips.hpp b/src/hotspot/cpu/mips/templateTable_mips.hpp
+--- a/src/hotspot/cpu/mips/templateTable_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/templateTable_mips.hpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
++#define CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
++
++  static void prepare_invoke(int byte_no,
++                             Register method,         // linked method (or i-klass)
++                             Register index = noreg,  // itable index, MethodType, etc.
++                             Register recv  = noreg,  // if caller wants to see it
++                             Register flags = noreg   // if caller wants to test it
++                             );
++  static void invokevirtual_helper(Register index, Register recv,
++                                   Register flags);
++  static void volatile_barrier();
++
++  // Helpers
++  static void index_check(Register array, Register index);
++  static void index_check_without_pop(Register array, Register index);
++
++#endif // CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vmreg_mips.cpp b/src/hotspot/cpu/mips/vmreg_mips.cpp
+--- a/src/hotspot/cpu/mips/vmreg_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/vmreg_mips.cpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "code/vmreg.hpp"
++
++
++
++void VMRegImpl::set_regName() {
++  Register reg = ::as_Register(0);
++  int i;
++  for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
++    regName[i++] = reg->name();
++    regName[i++] = reg->name();
++    reg = reg->successor();
++  }
++
++  FloatRegister freg = ::as_FloatRegister(0);
++  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
++    regName[i++] = freg->name();
++    regName[i++] = freg->name();
++    freg = freg->successor();
++  }
++
++  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
++    regName[i] = "NON-GPR-FPR";
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vmreg_mips.hpp b/src/hotspot/cpu/mips/vmreg_mips.hpp
+--- a/src/hotspot/cpu/mips/vmreg_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/vmreg_mips.hpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VMREG_MIPS_HPP
++#define CPU_MIPS_VM_VMREG_MIPS_HPP
++
++inline Register as_Register() {
++  assert( is_Register(), "must be");
++  return ::as_Register(value() >> 1);
++}
++
++inline FloatRegister as_FloatRegister() {
++  assert( is_FloatRegister(), "must be" );
++  assert( is_even(value()), "must be" );
++  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
++}
++
++inline bool is_Register() {
++  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
++}
++
++inline bool is_FloatRegister() {
++  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
++}
++
++inline   bool is_concrete() {
++  assert(is_reg(), "must be");
++  if(is_Register()) return true;
++  if(is_FloatRegister()) return true;
++  assert(false, "what register?");
++  return false;
++}
++
++#endif // CPU_MIPS_VM_VMREG_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vmreg_mips.inline.hpp b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp
+--- a/src/hotspot/cpu/mips/vmreg_mips.inline.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
++
++inline VMReg RegisterImpl::as_VMReg() {
++  if( this==noreg ) return VMRegImpl::Bad();
++  return VMRegImpl::as_VMReg(encoding() << 1 );
++}
++
++inline VMReg FloatRegisterImpl::as_VMReg() {
++  return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
++}
++
++#endif // CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vmStructs_mips.hpp b/src/hotspot/cpu/mips/vmStructs_mips.hpp
+--- a/src/hotspot/cpu/mips/vmStructs_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/vmStructs_mips.hpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,68 @@
++/*
++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
++#define CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
++
++// These are the CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)            \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* JavaCallWrapper            */                                                                                                   \
++  /******************************/                                                                                                   \
++  /******************************/                                                                                                   \
++  /* JavaFrameAnchor            */                                                                                                   \
++  /******************************/                                                                                                   \
++  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                              \
++                                                                                                                                     \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
++  /* be present there)                                                */
++
++
++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)                               \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
++  /* be present there)                                                */
++
++
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used        */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                      */
++
++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used         */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                       */
++
++#endif // CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vm_version_ext_mips.cpp b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp
+--- a/src/hotspot/cpu/mips/vm_version_ext_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,90 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "memory/allocation.inline.hpp"
++#include "runtime/os.inline.hpp"
++#include "vm_version_ext_mips.hpp"
++
++// VM_Version_Ext statics
++int VM_Version_Ext::_no_of_threads = 0;
++int VM_Version_Ext::_no_of_cores = 0;
++int VM_Version_Ext::_no_of_sockets = 0;
++bool VM_Version_Ext::_initialized = false;
++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
++
++void VM_Version_Ext::initialize_cpu_information(void) {
++  // do nothing if cpu info has been initialized
++  if (_initialized) {
++    return;
++  }
++
++  _no_of_cores  = os::processor_count();
++  _no_of_threads = _no_of_cores;
++  _no_of_sockets = _no_of_cores;
++  if (is_loongson()) {
++    snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Loongson MIPS");
++    snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Loongson MIPS %s", cpu_features());
++  } else {
++    snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "MIPS");
++    snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "MIPS %s", cpu_features());
++  }
++  _initialized = true;
++}
++
++int VM_Version_Ext::number_of_threads(void) {
++  initialize_cpu_information();
++  return _no_of_threads;
++}
++
++int VM_Version_Ext::number_of_cores(void) {
++  initialize_cpu_information();
++  return _no_of_cores;
++}
++
++int VM_Version_Ext::number_of_sockets(void) {
++  initialize_cpu_information();
++  return _no_of_sockets;
++}
++
++const char* VM_Version_Ext::cpu_name(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
++  return tmp;
++}
++
++const char* VM_Version_Ext::cpu_description(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
++  return tmp;
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vm_version_ext_mips.hpp b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp
+--- a/src/hotspot/cpu/mips/vm_version_ext_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
++#define CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
++
++#include "runtime/vm_version.hpp"
++#include "utilities/macros.hpp"
++
++class VM_Version_Ext : public VM_Version {
++ private:
++  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
++  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
++
++  static int               _no_of_threads;
++  static int               _no_of_cores;
++  static int               _no_of_sockets;
++  static bool              _initialized;
++  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
++  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
++
++ public:
++  static int number_of_threads(void);
++  static int number_of_cores(void);
++  static int number_of_sockets(void);
++
++  static const char* cpu_name(void);
++  static const char* cpu_description(void);
++  static void initialize_cpu_information(void);
++};
++
++#endif // CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vm_version_mips.cpp b/src/hotspot/cpu/mips/vm_version_mips.cpp
+--- a/src/hotspot/cpu/mips/vm_version_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/vm_version_mips.cpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,516 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/java.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/vm_version.hpp"
++#ifdef TARGET_OS_FAMILY_linux
++# include "os_linux.inline.hpp"
++#endif
++
++int VM_Version::_cpuFeatures;
++const char* VM_Version::_features_str = "";
++VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
++volatile bool VM_Version::_is_determine_cpucfg_supported_running = false;
++bool VM_Version::_is_cpucfg_instruction_supported = true;
++bool VM_Version::_cpu_info_is_initialized = false;
++
++static BufferBlob* stub_blob;
++static const int stub_size = 600;
++
++extern "C" {
++  typedef void (*get_cpu_info_stub_t)(void*);
++}
++static get_cpu_info_stub_t get_cpu_info_stub = NULL;
++
++
++class VM_Version_StubGenerator: public StubCodeGenerator {
++ public:
++
++  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
++
++  address generate_get_cpu_info() {
++    assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized");
++    StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
++#   define __ _masm->
++
++    address start = __ pc();
++
++    __ enter();
++    __ push(AT);
++    __ push(V0);
++
++    __ li(AT, (long)0);
++    __ cpucfg(V0, AT);
++    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
++    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
++
++    __ li(AT, 1);
++    __ cpucfg(V0, AT);
++    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
++    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
++
++    __ li(AT, 2);
++    __ cpucfg(V0, AT);
++    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
++    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
++
++    __ pop(V0);
++    __ pop(AT);
++    __ leave();
++    __ jr(RA);
++    __ delayed()->nop();
++#   undef __
++
++    return start;
++  };
++};
++
++uint32_t VM_Version::get_feature_flags_by_cpucfg() {
++  uint32_t result = 0;
++  if (_cpuid_info.cpucfg_info_id1.bits.MMI != 0)
++    result |= CPU_MMI;
++  if (_cpuid_info.cpucfg_info_id1.bits.MSA1 != 0)
++    result |= CPU_MSA1_0;
++  if (_cpuid_info.cpucfg_info_id1.bits.MSA2 != 0)
++    result |= CPU_MSA2_0;
++  if (_cpuid_info.cpucfg_info_id1.bits.CGP != 0)
++    result |= CPU_CGP;
++  if (_cpuid_info.cpucfg_info_id1.bits.LSX1 != 0)
++    result |= CPU_LSX1;
++  if (_cpuid_info.cpucfg_info_id1.bits.LSX2 != 0)
++    result |= CPU_LSX2;
++  if (_cpuid_info.cpucfg_info_id1.bits.LASX != 0)
++    result |= CPU_LASX;
++  if (_cpuid_info.cpucfg_info_id1.bits.LLSYNC != 0)
++    result |= CPU_LLSYNC;
++  if (_cpuid_info.cpucfg_info_id1.bits.TGTSYNC != 0)
++    result |= CPU_TGTSYNC;
++  if (_cpuid_info.cpucfg_info_id1.bits.MUALP != 0)
++    result |= CPU_MUALP;
++  if (_cpuid_info.cpucfg_info_id2.bits.LEXT1 != 0)
++    result |= CPU_LEXT1;
++  if (_cpuid_info.cpucfg_info_id2.bits.LEXT2 != 0)
++    result |= CPU_LEXT2;
++  if (_cpuid_info.cpucfg_info_id2.bits.LEXT3 != 0)
++    result |= CPU_LEXT3;
++  if (_cpuid_info.cpucfg_info_id2.bits.LAMO != 0)
++    result |= CPU_LAMO;
++  if (_cpuid_info.cpucfg_info_id2.bits.LPIXU != 0)
++    result |= CPU_LPIXU;
++
++  result |= CPU_ULSYNC;
++
++  return result;
++}
++
++void read_cpu_info(const char *path, char *result) {
++  FILE *ptr;
++  char buf[1024];
++  int i = 0;
++  if((ptr=fopen(path, "r")) != NULL) {
++    while(fgets(buf, 1024, ptr)!=NULL) {
++      strcat(result,buf);
++      i++;
++      if (i == 10) break;
++    }
++    fclose(ptr);
++  } else {
++    warning("Can't detect CPU info - cannot open %s", path);
++  }
++}
++
++void strlwr(char *str) {
++  for (; *str!='\0'; str++)
++    *str = tolower(*str);
++}
++
++int VM_Version::get_feature_flags_by_cpuinfo(int features) {
++  assert(!cpu_info_is_initialized(), "VM_Version should not be initialized");
++
++  char res[10240];
++  int i;
++  memset(res, '\0', 10240 * sizeof(char));
++  read_cpu_info("/proc/cpuinfo", res);
++  // res is converted to lower case
++  strlwr(res);
++
++  if (strstr(res, "loongson")) {
++    // Loongson CPU
++    features |= CPU_LOONGSON;
++
++    const struct Loongson_Cpuinfo loongson_cpuinfo[] = {
++      {L_3A1000,  "3a1000"},
++      {L_3B1500,  "3b1500"},
++      {L_3A2000,  "3a2000"},
++      {L_3B2000,  "3b2000"},
++      {L_3A3000,  "3a3000"},
++      {L_3B3000,  "3b3000"},
++      {L_2K1000,  "2k1000"},
++      {L_UNKNOWN, "unknown"}
++    };
++
++    // Loongson Family
++    int detected = 0;
++    for (i = 0; i <= L_UNKNOWN; i++) {
++      switch (i) {
++        // 3A1000 and 3B1500 may use an old kernel and further comparsion is needed
++        // test PRID REV in /proc/cpuinfo
++        // 3A1000: V0.5, model name: ICT Loongson-3A V0.5  FPU V0.1
++        // 3B1500: V0.7, model name: ICT Loongson-3B V0.7  FPU V0.1
++        case L_3A1000:
++          if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3a v0.5")) {
++            features |= CPU_LOONGSON_GS464;
++            detected++;
++            //tty->print_cr("3A1000 platform");
++          }
++          break;
++        case L_3B1500:
++          if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3b v0.7")) {
++            features |= CPU_LOONGSON_GS464;
++            detected++;
++            //tty->print_cr("3B1500 platform");
++          }
++          break;
++        case L_3A2000:
++        case L_3B2000:
++        case L_3A3000:
++        case L_3B3000:
++          if (strstr(res, loongson_cpuinfo[i].match_str)) {
++            features |= CPU_LOONGSON_GS464E;
++            detected++;
++            //tty->print_cr("3A2000/3A3000/3B2000/3B3000 platform");
++          }
++          break;
++        case L_2K1000:
++          if (strstr(res, loongson_cpuinfo[i].match_str)) {
++            features |= CPU_LOONGSON_GS264;
++            detected++;
++            //tty->print_cr("2K1000 platform");
++          }
++          break;
++        case L_UNKNOWN:
++          if (detected == 0) {
++            detected++;
++            //tty->print_cr("unknown Loongson platform");
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++    assert (detected == 1, "one and only one of LOONGSON_CPU_FAMILY should be detected");
++  } else { // not Loongson
++    // Not Loongson CPU
++    //tty->print_cr("MIPS platform");
++  }
++
++  if (features & CPU_LOONGSON_GS264) {
++    features |= CPU_LEXT1;
++    features |= CPU_LEXT2;
++    features |= CPU_TGTSYNC;
++    features |= CPU_ULSYNC;
++    features |= CPU_MSA1_0;
++    features |= CPU_LSX1;
++  } else if (features & CPU_LOONGSON_GS464) {
++    features |= CPU_LEXT1;
++    features |= CPU_LLSYNC;
++    features |= CPU_TGTSYNC;
++  } else if (features & CPU_LOONGSON_GS464E) {
++    features |= CPU_LEXT1;
++    features |= CPU_LEXT2;
++    features |= CPU_LEXT3;
++    features |= CPU_TGTSYNC;
++    features |= CPU_ULSYNC;
++  } else if (features & CPU_LOONGSON) {
++    // unknow loongson
++    features |= CPU_LLSYNC;
++    features |= CPU_TGTSYNC;
++    features |= CPU_ULSYNC;
++  }
++  VM_Version::_cpu_info_is_initialized = true;
++
++  return features;
++}
++
++void VM_Version::get_processor_features() {
++
++  clean_cpuFeatures();
++
++  // test if cpucfg instruction is supported
++  VM_Version::_is_determine_cpucfg_supported_running = true;
++  __asm__ __volatile__(
++    ".insn \n\t"
++    ".word (0xc8080118)\n\t" // cpucfg zero, zero
++    :
++    :
++    :
++    );
++  VM_Version::_is_determine_cpucfg_supported_running = false;
++
++  if (supports_cpucfg()) {
++    get_cpu_info_stub(&_cpuid_info);
++    _cpuFeatures = get_feature_flags_by_cpucfg();
++    // Only Loongson CPUs support cpucfg
++    _cpuFeatures |= CPU_LOONGSON;
++  } else {
++    _cpuFeatures = get_feature_flags_by_cpuinfo(0);
++  }
++
++  _supports_cx8 = true;
++
++  if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) {
++    FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650);
++  }
++
++#ifdef COMPILER2
++  if (MaxVectorSize > 0) {
++    if (!is_power_of_2(MaxVectorSize)) {
++      warning("MaxVectorSize must be a power of 2");
++      MaxVectorSize = 8;
++    }
++    if (MaxVectorSize > 0 && supports_ps()) {
++      MaxVectorSize = 8;
++    } else {
++      MaxVectorSize = 0;
++    }
++  }
++  //
++  // Vector optimization of MIPS works in most cases, but cannot pass hotspot/test/compiler/6340864/TestFloatVect.java.
++  // Vector optimization was closed by default.
++  // The reasons:
++  // 1. The kernel does not have emulation of PS instructions yet, so the emulation of PS instructions must be done in JVM, see JVM_handle_linux_signal.
++  // 2. It seems the gcc4.4.7 had some bug related to ucontext_t, which is used in signal handler to emulate PS instructions.
++  //
++  if (FLAG_IS_DEFAULT(MaxVectorSize)) {
++    MaxVectorSize = 0;
++  }
++
++#endif
++
++  if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 1000);
++    }
++  } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 2000);
++    }
++  } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 3000);
++    }
++  } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 4000);
++    }
++  } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
++    }
++  } else {
++    assert(false, "Should Not Reach Here, what is the cpu type?");
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
++    }
++  }
++
++  if (supports_lext1()) {
++    if (FLAG_IS_DEFAULT(UseLEXT1)) {
++      FLAG_SET_DEFAULT(UseLEXT1, true);
++    }
++  } else if (UseLEXT1) {
++    warning("LEXT1 instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLEXT1, false);
++  }
++
++  if (supports_lext2()) {
++    if (FLAG_IS_DEFAULT(UseLEXT2)) {
++      FLAG_SET_DEFAULT(UseLEXT2, true);
++    }
++  } else if (UseLEXT2) {
++    warning("LEXT2 instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLEXT2, false);
++  }
++
++  if (supports_lext3()) {
++    if (FLAG_IS_DEFAULT(UseLEXT3)) {
++      FLAG_SET_DEFAULT(UseLEXT3, true);
++    }
++  } else if (UseLEXT3) {
++    warning("LEXT3 instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLEXT3, false);
++  }
++
++  if (UseLEXT2) {
++    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) {
++      FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 1);
++    }
++  } else if (UseCountTrailingZerosInstructionMIPS64) {
++    if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64))
++      warning("ctz/dctz instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 0);
++  }
++
++  if (TieredCompilation) {
++    if (!FLAG_IS_DEFAULT(TieredCompilation))
++      warning("TieredCompilation not supported");
++    FLAG_SET_DEFAULT(TieredCompilation, false);
++  }
++
++  char buf[256];
++  bool is_unknown_loongson_cpu = is_loongson() && !is_gs464() && !is_gs464e() && !is_gs264() && !supports_cpucfg();
++
++  // A note on the _features_string format:
++  //   There are jtreg tests checking the _features_string for various properties.
++  //   For some strange reason, these tests require the string to contain
++  //   only _lowercase_ characters. Keep that in mind when being surprised
++  //   about the unusual notation of features - and when adding new ones.
++  //   Features may have one comma at the end.
++  //   Furthermore, use one, and only one, separator space between features.
++  //   Multiple spaces are considered separate tokens, messing up everything.
++  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, usesynclevel:%d",
++              (is_loongson()           ?  "mips-compatible loongson cpu"  : "mips cpu"),
++              (is_gs464()              ?  ", gs464 (3a1000/3b1500)" : ""),
++              (is_gs464e()             ?  ", gs464e (3a2000/3a3000/3b2000/3b3000)" : ""),
++              (is_gs264()              ?  ", gs264 (2k1000)" : ""),
++              (is_unknown_loongson_cpu ?  ", unknown loongson cpu" : ""),
++              (supports_dsp()          ?  ", dsp" : ""),
++              (supports_ps()           ?  ", ps" : ""),
++              (supports_3d()           ?  ", 3d" : ""),
++              (supports_mmi()          ?  ", mmi" : ""),
++              (supports_msa1_0()       ?  ", msa1_0" : ""),
++              (supports_msa2_0()       ?  ", msa2_0" : ""),
++              (supports_lsx1()         ?  ", lsx1" : ""),
++              (supports_lsx2()         ?  ", lsx2" : ""),
++              (supports_lasx()         ?  ", lasx" : ""),
++              (supports_lext1()        ?  ", lext1" : ""),
++              (supports_lext2()        ?  ", lext2" : ""),
++              (supports_lext3()        ?  ", lext3" : ""),
++              (supports_cgp()          ?  ", aes, crc, sha1, sha256, sha512" : ""),
++              (supports_lamo()         ?  ", lamo" : ""),
++              (supports_lpixu()        ?  ", lpixu" : ""),
++              (needs_llsync()          ?  ", llsync" : ""),
++              (needs_tgtsync()         ?  ", tgtsync": ""),
++              (needs_ulsync()          ?  ", ulsync": ""),
++              (supports_mualp()        ?  ", mualp" : ""),
++              UseSyncLevel);
++  _features_str = strdup(buf);
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchLines, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1);
++  }
++
++  if (UseSHA) {
++    warning("SHA instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
++
++  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
++    warning("SHA intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
++    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
++    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++  }
++
++  if (UseAES) {
++    if (!FLAG_IS_DEFAULT(UseAES)) {
++      warning("AES instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseAES, false);
++    }
++  }
++
++  if (UseCRC32Intrinsics) {
++    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
++      warning("CRC32Intrinsics instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
++    }
++  }
++
++  if (UseCRC32CIntrinsics) {
++    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
++      warning("CRC32CIntrinsics instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
++    }
++  }
++
++  if (UseAESIntrinsics) {
++    if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) {
++      warning("AES intrinsics are not available on this CPU");
++      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++    }
++  }
++
++#ifdef COMPILER2
++  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
++    UseMontgomeryMultiplyIntrinsic = true;
++  }
++  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
++    UseMontgomerySquareIntrinsic = true;
++  }
++#endif
++
++  if (FLAG_IS_DEFAULT(UseFMA)) {
++    FLAG_SET_DEFAULT(UseFMA, true);
++  }
++
++  UNSUPPORTED_OPTION(CriticalJNINatives);
++}
++
++void VM_Version::initialize() {
++  ResourceMark rm;
++  // Making this stub must be FIRST use of assembler
++
++  stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
++  if (stub_blob == NULL) {
++    vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
++  }
++  CodeBuffer c(stub_blob);
++  VM_Version_StubGenerator g(&c);
++  get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
++                                     g.generate_get_cpu_info());
++
++  get_processor_features();
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vm_version_mips.hpp b/src/hotspot/cpu/mips/vm_version_mips.hpp
+--- a/src/hotspot/cpu/mips/vm_version_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/vm_version_mips.hpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,221 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VM_VERSION_MIPS_HPP
++#define CPU_MIPS_VM_VM_VERSION_MIPS_HPP
++
++#include "runtime/abstract_vm_version.hpp"
++#include "runtime/globals_extension.hpp"
++#include "utilities/sizes.hpp"
++
++class VM_Version: public Abstract_VM_Version {
++public:
++
++  union Loongson_Cpucfg_Id1 {
++    uint32_t value;
++    struct {
++      uint32_t FP_CFG  : 1,
++               FPREV   : 3,
++               MMI     : 1,
++               MSA1    : 1,
++               MSA2    : 1,
++               CGP     : 1,
++               WRP     : 1,
++               LSX1    : 1,
++               LSX2    : 1,
++               LASX    : 1,
++               R6FXP   : 1,
++               R6CRCP  : 1,
++               R6FPP   : 1,
++               CNT64   : 1,
++               LSLDR0  : 1,
++               LSPREF  : 1,
++               LSPREFX : 1,
++               LSSYNCI : 1,
++               LSUCA   : 1,
++               LLSYNC  : 1,
++               TGTSYNC : 1,
++               LLEXC   : 1,
++               SCRAND  : 1,
++               MUALP   : 1,
++               KMUALEn : 1,
++               ITLBT   : 1,
++               LSUPERF : 1,
++               SFBP    : 1,
++               CDMAP   : 1,
++                       : 1;
++    } bits;
++  };
++
++  union Loongson_Cpucfg_Id2 {
++    uint32_t value;
++    struct {
++      uint32_t LEXT1    : 1,
++               LEXT2    : 1,
++               LEXT3    : 1,
++               LSPW     : 1,
++               LBT1     : 1,
++               LBT2     : 1,
++               LBT3     : 1,
++               LBTMMU   : 1,
++               LPMP     : 1,
++               LPMRev   : 3,
++               LAMO     : 1,
++               LPIXU    : 1,
++               LPIXNU   : 1,
++               LVZP     : 1,
++               LVZRev   : 3,
++               LGFTP    : 1,
++               LGFTRev  : 3,
++               LLFTP    : 1,
++               LLFTRev  : 3,
++               LCSRP    : 1,
++               DISBLKLY : 1,
++                        : 3;
++    } bits;
++  };
++
++protected:
++
++  enum {
++    CPU_LOONGSON          = (1 << 1),
++    CPU_LOONGSON_GS464    = (1 << 2),
++    CPU_LOONGSON_GS464E   = (1 << 3),
++    CPU_LOONGSON_GS264    = (1 << 4),
++    CPU_MMI               = (1 << 11),
++    CPU_MSA1_0            = (1 << 12),
++    CPU_MSA2_0            = (1 << 13),
++    CPU_CGP               = (1 << 14),
++    CPU_LSX1              = (1 << 15),
++    CPU_LSX2              = (1 << 16),
++    CPU_LASX              = (1 << 17),
++    CPU_LEXT1             = (1 << 18),
++    CPU_LEXT2             = (1 << 19),
++    CPU_LEXT3             = (1 << 20),
++    CPU_LAMO              = (1 << 21),
++    CPU_LPIXU             = (1 << 22),
++    CPU_LLSYNC            = (1 << 23),
++    CPU_TGTSYNC           = (1 << 24),
++    CPU_ULSYNC           = (1 << 25),
++    CPU_MUALP             = (1 << 26),
++
++    //////////////////////add some other feature here//////////////////
++  } cpuFeatureFlags;
++
++  enum Loongson_Family {
++    L_3A1000    = 0,
++    L_3B1500    = 1,
++    L_3A2000    = 2,
++    L_3B2000    = 3,
++    L_3A3000    = 4,
++    L_3B3000    = 5,
++    L_2K1000    = 6,
++    L_UNKNOWN   = 7
++  };
++
++  struct Loongson_Cpuinfo {
++    Loongson_Family    id;
++    const char* const  match_str;
++  };
++
++  static int  _cpuFeatures;
++  static const char* _features_str;
++  static volatile bool _is_determine_cpucfg_supported_running;
++  static bool _is_cpucfg_instruction_supported;
++  static bool _cpu_info_is_initialized;
++
++  struct CpuidInfo {
++    uint32_t            cpucfg_info_id0;
++    Loongson_Cpucfg_Id1 cpucfg_info_id1;
++    Loongson_Cpucfg_Id2 cpucfg_info_id2;
++    uint32_t            cpucfg_info_id3;
++    uint32_t            cpucfg_info_id4;
++    uint32_t            cpucfg_info_id5;
++    uint32_t            cpucfg_info_id6;
++    uint32_t            cpucfg_info_id8;
++  };
++
++  // The actual cpuid info block
++  static CpuidInfo _cpuid_info;
++
++  static uint32_t get_feature_flags_by_cpucfg();
++  static int      get_feature_flags_by_cpuinfo(int features);
++  static void     get_processor_features();
++
++public:
++  // Offsets for cpuid asm stub
++  static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); }
++  static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); }
++  static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); }
++  static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); }
++  static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); }
++  static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); }
++  static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); }
++  static ByteSize Loongson_Cpucfg_id8_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id8); }
++
++  static bool is_determine_features_test_running() { return _is_determine_cpucfg_supported_running; }
++
++  static void clean_cpuFeatures()   { _cpuFeatures = 0; }
++
++  // Initialization
++  static void initialize();
++
++  static bool cpu_info_is_initialized()                   { return _cpu_info_is_initialized; }
++
++  static bool supports_cpucfg()                  { return _is_cpucfg_instruction_supported; }
++  static bool set_supports_cpucfg(bool value)    { return _is_cpucfg_instruction_supported = value; }
++
++  static bool is_loongson()      { return _cpuFeatures & CPU_LOONGSON; }
++  static bool is_gs264()         { return _cpuFeatures & CPU_LOONGSON_GS264; }
++  static bool is_gs464()         { return _cpuFeatures & CPU_LOONGSON_GS464; }
++  static bool is_gs464e()        { return _cpuFeatures & CPU_LOONGSON_GS464E; }
++  static bool supports_dsp()     { return 0; /*not supported yet*/}
++  static bool supports_ps()      { return 0; /*not supported yet*/}
++  static bool supports_3d()      { return 0; /*not supported yet*/}
++  static bool supports_msa1_0()  { return _cpuFeatures & CPU_MSA1_0; }
++  static bool supports_msa2_0()  { return _cpuFeatures & CPU_MSA2_0; }
++  static bool supports_cgp()     { return _cpuFeatures & CPU_CGP; }
++  static bool supports_mmi()     { return _cpuFeatures & CPU_MMI; }
++  static bool supports_lsx1()    { return _cpuFeatures & CPU_LSX1; }
++  static bool supports_lsx2()    { return _cpuFeatures & CPU_LSX2; }
++  static bool supports_lasx()    { return _cpuFeatures & CPU_LASX; }
++  static bool supports_lext1()   { return _cpuFeatures & CPU_LEXT1; }
++  static bool supports_lext2()   { return _cpuFeatures & CPU_LEXT2; }
++  static bool supports_lext3()   { return _cpuFeatures & CPU_LEXT3; }
++  static bool supports_lamo()    { return _cpuFeatures & CPU_LAMO; }
++  static bool supports_lpixu()   { return _cpuFeatures & CPU_LPIXU; }
++  static bool needs_llsync()     { return _cpuFeatures & CPU_LLSYNC; }
++  static bool needs_tgtsync()    { return _cpuFeatures & CPU_TGTSYNC; }
++  static bool needs_ulsync()     { return _cpuFeatures & CPU_ULSYNC; }
++  static bool supports_mualp()   { return _cpuFeatures & CPU_MUALP; }
++
++  //mips has no such instructions, use ll/sc instead
++  static bool supports_compare_and_exchange() { return false; }
++
++  static const char* cpu_features()           { return _features_str; }
++
++};
++
++#endif // CPU_MIPS_VM_VM_VERSION_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp
+--- a/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp	2024-01-30 10:00:11.848098317 +0800
+@@ -0,0 +1,340 @@
++/*
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/vtableStubs.hpp"
++#include "interp_masm_mips.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klass.inline.hpp"
++#include "oops/klassVtable.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_mips.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++
++// machine-dependent part of VtableStubs: create VtableStub of correct size and
++// initialize its code
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#ifndef PRODUCT
++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
++#endif
++
++// used by compiler only;  reciever in T0.
++// used registers :
++// Rmethod : receiver klass & method
++// NOTE: If this code is used by the C1, the receiver_location is always 0.
++// when reach here, receiver in T0, klass in T8
++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(true);
++  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  int       load_const_maxLen = 6*BytesPerInstWord;  // load_const generates 6 instructions. Assume that as max size for li
++  // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
++  const int index_dependent_slop     = 0;
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  Register t1 = T8, t2 = Rmethod;
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    start_pc = __ pc();
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++    __ lw(t1, AT , 0);
++    __ addiu(t1, t1, 1);
++    __ sw(t1, AT,0);
++  }
++#endif
++
++  // get receiver (need to skip return address on top of stack)
++  //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0");
++
++  // get receiver klass
++  address npe_addr = __ pc();
++  //add for compressedoops
++  __ load_klass(t1, T0);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    // check offset vs vtable length
++    __ lw(t2, t1, in_bytes(Klass::vtable_length_offset()));
++    assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code");
++    __ move(AT, vtable_index*vtableEntry::size());
++    __ slt(AT, AT, t2);
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ move(A2, vtable_index);
++    __ move(A1, A0);
++
++    // VTABLE TODO: find upper bound for call_VM length.
++    start_pc = __ pc();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2);
++    const ptrdiff_t estimate = 512;
++    const ptrdiff_t codesize = __ pc() - start_pc;
++    slop_delta  = estimate - codesize;  // call_VM varies in length, depending on data
++    assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
++    __ bind(L);
++  }
++#endif // PRODUCT
++  const Register method = Rmethod;
++
++  // load methodOop and target address
++  start_pc = __ pc();
++  // lookup_virtual_method generates 18 instructions (worst case)
++  __ lookup_virtual_method(t1, vtable_index, method);
++  slop_delta  = 18*BytesPerInstWord - (int)(__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    __ beq(method, R0, L);
++    __ delayed()->nop();
++    __ ld(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("Vtable entry is NULL");
++    __ bind(L);
++  }
++#endif // PRODUCT
++
++  // T8: receiver klass
++  // T0: receiver
++  // Rmethod: methodOop
++  // T9: entry
++  address ame_addr = __ pc();
++  __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset()));
++  __ jr(T9);
++  __ delayed()->nop();
++  masm->flush();
++  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
++
++  return s;
++}
++
++
++// used registers :
++//  T1 T2
++// when reach here, the receiver in T0, klass in T1
++VtableStub* VtableStubs::create_itable_stub(int itable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(false);
++  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  int       load_const_maxLen = 6*BytesPerInstWord;  // load_const generates 6 instructions. Assume that as max size for li
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler *masm = new MacroAssembler(&cb);
++
++  // we T8,T9 as temparary register, they are free from register allocator
++  Register t1 = T8, t2 = T2;
++  // Entry arguments:
++  //  T1: Interface
++  //  T0: Receiver
++
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    start_pc = __ pc();
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++    __ lw(T8, AT, 0);
++    __ addiu(T8, T8,1);
++    __ sw(T8, AT, 0);
++  }
++#endif // PRODUCT
++
++  const Register holder_klass_reg   = T1; // declaring interface klass (DECC)
++  const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC)
++
++  const Register icholder_reg = T1;
++  __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset());
++  __ ld_ptr(holder_klass_reg,   icholder_reg, CompiledICHolder::holder_metadata_offset());
++
++  Label L_no_such_interface;
++
++  // get receiver klass (also an implicit null-check)
++  address npe_addr = __ pc();
++  __ load_klass(t1, T0);
++  {
++    // x86 use lookup_interface_method, but lookup_interface_method does not work on MIPS.
++    // No dynamic code size variance here, so slop_bytes is not needed.
++    const int base = in_bytes(Klass::vtable_start_offset());
++    assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
++    assert(Assembler::is_simm16(base), "change this code");
++    __ daddiu(t2, t1, base);
++    __ lw(AT, t1, in_bytes(Klass::vtable_length_offset()));
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(t2, t2, AT);
++    if (HeapWordsPerLong > 1) {
++      __ round_to(t2, BytesPerLong);
++    }
++
++    Label hit, entry;
++    __ bind(entry);
++
++    // Check that the entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++    __ beq(AT, R0, L_no_such_interface);
++    __ delayed()->nop();
++
++    __ bne(AT, resolved_klass_reg, entry);
++    __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize);
++
++  }
++
++  // add for compressedoops
++  __ load_klass(t1, T0);
++  // compute itable entry offset (in words)
++  const int base = in_bytes(Klass::vtable_start_offset());
++  __ daddiu(t2, t1, base);
++  __ lw(AT, t1, in_bytes(Klass::vtable_length_offset()));
++  __ dsll(AT, AT, Address::times_8);
++  __ daddu(t2, t2, AT);
++  if (HeapWordsPerLong > 1) {
++    __ round_to(t2, BytesPerLong);
++  }
++
++  Label hit, entry;
++  __ bind(entry);
++
++  // Check that the entry is non-null.  A null entry means that
++  // the receiver class doesn't implement the interface, and wasn't the
++  // same as when the caller was compiled.
++  __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++  __ beq(AT, R0, L_no_such_interface);
++  __ delayed()->nop();
++
++  __ bne(AT, holder_klass_reg, entry);
++  __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize);
++
++  // We found a hit, move offset into T9
++  __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize);
++
++  // Compute itableMethodEntry.
++  const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) +
++    itableMethodEntry::method_offset_in_bytes();
++
++  // Get methodOop and entrypoint for compiler
++  const Register method = Rmethod;
++  __ dsll(AT, t2, Address::times_1);
++  __ addu(AT, AT, t1 );
++  start_pc = __ pc();
++  __ set64(t1, method_offset);
++  slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++  __ addu(AT, AT, t1 );
++  __ ld_ptr(method, AT, 0);
++
++#ifdef ASSERT
++  if (DebugVtables) {
++    Label L1;
++    __ beq(method, R0, L1);
++    __ delayed()->nop();
++    __ ld(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L1);
++    __ delayed()->nop();
++    __ stop("methodOop is null");
++    __ bind(L1);
++  }
++#endif // ASSERT
++
++  // Rmethod: methodOop
++  // T0: receiver
++  // T9: entry point
++  address ame_addr = __ pc();
++  __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset()));
++  __ jr(T9);
++  __ delayed()->nop();
++
++  __ bind(L_no_such_interface);
++  // Handle IncompatibleClassChangeError in itable stubs.
++  // More detailed error message.
++  // We force resolving of the call site by jumping to the "handle
++  // wrong method" stub, and so let the interpreter runtime do all the
++  // dirty work.
++  start_pc = __ pc();
++  __ set64(T9, (long)SharedRuntime::get_handle_wrong_method_stub());
++  slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++  __ jr(T9);
++  __ delayed()->nop();
++
++  masm->flush();
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
++
++  return s;
++}
++
++// NOTE : whenever you change the code above, dont forget to change the const here
++int VtableStub::pd_code_alignment() {
++  const unsigned int icache_line_size = wordSize;
++  return icache_line_size;
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
+--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp	2024-01-30 10:00:11.851431611 +0800
+@@ -488,6 +488,9 @@
+   }
+ }
+ 
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   Bytecodes::Code code = op->bytecode();
+@@ -1608,6 +1611,10 @@
+   __ bind(skip);
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
++
+ 
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
+                              CodeEmitInfo* info, bool pop_fpu_stack) {
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
+--- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp	2024-01-30 10:00:11.851431611 +0800
+@@ -273,21 +273,29 @@
+   __ move(temp, addr);
+ }
+ 
+-
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   LIR_Opr tmp = FrameMap::R0_opr;
+   __ load(new LIR_Address(base, disp, T_INT), tmp, info);
+-  __ cmp(condition, tmp, c);
++  __ cmp_branch(condition, tmp, c, T_INT, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base,
+-                               int disp, BasicType type, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   LIR_Opr tmp = FrameMap::R0_opr;
+   __ load(new LIR_Address(base, disp, type), tmp, info);
+-  __ cmp(condition, reg, tmp);
++  __ cmp_branch(condition, reg, tmp, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+ 
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+   assert(left != result, "should be different registers");
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp
+--- a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp	2024-01-30 10:00:11.851431611 +0800
+@@ -62,3 +62,24 @@
+ #endif
+ }
+ #endif // PRODUCT
++
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp	2024-01-30 10:00:11.861431492 +0800
+@@ -379,6 +379,9 @@
+   }
+ }
+ 
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   LIR_Opr src  = op->in_opr();
+@@ -1503,6 +1506,10 @@
+   }
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
++
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
+                              CodeEmitInfo* info, bool pop_fpu_stack) {
+   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
+--- a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp	2024-01-30 10:00:11.861431492 +0800
+@@ -213,16 +213,29 @@
+   __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr);
+ }
+ 
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   LIR_Opr scratch = FrameMap::Z_R1_opr;
+   __ load(new LIR_Address(base, disp, T_INT), scratch, info);
+-  __ cmp(condition, scratch, c);
++  __ cmp_branch(condition, scratch, c, T_INT, tgt);
+ }
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
++
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
++  __ branch(condition, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
++
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+   if (tmp->is_valid()) {
+     if (is_power_of_2(c + 1)) {
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/s390/c1_LIR_s390.cpp b/src/hotspot/cpu/s390/c1_LIR_s390.cpp
+--- a/src/hotspot/cpu/s390/c1_LIR_s390.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/s390/c1_LIR_s390.cpp	2024-01-30 10:00:11.861431492 +0800
+@@ -56,3 +56,23 @@
+ }
+ #endif // PRODUCT
+ 
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp
+--- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp	2024-01-30 10:00:11.878097961 +0800
+@@ -599,6 +599,9 @@
+   // The peephole pass fills the delay slot
+ }
+ 
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   Bytecodes::Code code = op->bytecode();
+@@ -1638,6 +1641,9 @@
+   __ bind(skip);
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
+   assert(info == NULL, "unused on this code path");
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp
+--- a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp	2024-01-30 10:00:11.878097961 +0800
+@@ -267,19 +267,29 @@
+   __ move(temp, addr);
+ }
+ 
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   LIR_Opr o7opr = FrameMap::O7_opr;
+   __ load(new LIR_Address(base, disp, T_INT), o7opr, info);
+-  __ cmp(condition, o7opr, c);
++  __ cmp_branch(condition, o7opr, c, T_INT, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   LIR_Opr o7opr = FrameMap::O7_opr;
+   __ load(new LIR_Address(base, disp, type), o7opr, info);
+-  __ cmp(condition, reg, o7opr);
++  __ cmp_branch(condition, reg, o7opr, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+ 
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) {
+   assert(left != result, "should be different registers");
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp
+--- a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp	2024-01-30 10:00:11.878097961 +0800
+@@ -54,3 +54,24 @@
+          "wrong type for addresses");
+ }
+ #endif // PRODUCT
++
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp	2024-01-30 10:00:11.888097840 +0800
+@@ -1442,6 +1442,10 @@
+   }
+ }
+ 
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
++
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   LIR_Opr src  = op->in_opr();
+   LIR_Opr dest = op->result_opr();
+@@ -2030,6 +2034,9 @@
+   }
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
+   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
+--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp	2024-01-30 10:00:11.888097840 +0800
+@@ -255,15 +255,27 @@
+   __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr);
+ }
+ 
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   __ cmp_mem_int(condition, base, disp, c, info);
++  __ branch(condition, T_INT, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
++  __ branch(condition, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+ 
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+   if (tmp->is_valid() && c > 0 && c < max_jint) {
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/x86/c1_LIR_x86.cpp b/src/hotspot/cpu/x86/c1_LIR_x86.cpp
+--- a/src/hotspot/cpu/x86/c1_LIR_x86.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/x86/c1_LIR_x86.cpp	2024-01-30 10:00:11.888097840 +0800
+@@ -72,3 +72,24 @@
+ #endif
+ }
+ #endif // PRODUCT
++
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
+--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp	2024-01-30 10:00:11.891431134 +0800
+@@ -263,7 +263,8 @@
+ #define __ ce->masm()->
+ 
+ void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce,
+-                                                         LIR_Opr ref) const {
++                                                         LIR_Opr ref,
++                                                         LIR_Opr res) const {
+   __ testptr(ref->as_register(), address_bad_mask_from_thread(r15_thread));
+ }
+ 
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
+--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp	2024-01-30 10:00:11.891431134 +0800
+@@ -77,7 +77,8 @@
+ 
+ #ifdef COMPILER1
+   void generate_c1_load_barrier_test(LIR_Assembler* ce,
+-                                     LIR_Opr ref) const;
++                                     LIR_Opr ref,
++                                     LIR_Opr res) const;
+ 
+   void generate_c1_load_barrier_stub(LIR_Assembler* ce,
+                                      ZLoadBarrierStubC1* stub) const;
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
+--- a/src/hotspot/os/linux/os_linux.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/os/linux/os_linux.cpp	2024-01-30 10:00:11.914764190 +0800
+@@ -23,6 +23,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2021 Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ // no precompiled headers
+ #include "jvm.h"
+ #include "classfile/classLoader.hpp"
+@@ -4076,6 +4082,8 @@
+     IA64_ONLY(256 * M)
+     PPC_ONLY(4 * M)
+     S390_ONLY(1 * M)
++    MIPS64_ONLY(4 * M)
++    LOONGARCH64_ONLY(4 * M); //In MIPS _large_page_size is seted 4*M. // TODO: LA
+     SPARC_ONLY(4 * M);
+ #endif // ZERO
+ 
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp
+--- a/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,24 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp
+--- a/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,160 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP
++
++#include "runtime/vm_version.hpp"
++
++// Implementation of class atomic
++
++template<size_t byte_size>
++struct Atomic::PlatformAdd
++  : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
++{
++  template<typename I, typename D>
++  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
++    //Unimplemented();
++    return __sync_add_and_fetch(dest, add_value);
++  }
++};
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
++                                             T volatile* dest,
++                                             atomic_memory_order order) const {
++  T __ret, __tmp;
++
++  STATIC_ASSERT(4 == sizeof(T));
++  __asm__ __volatile__ (
++      "1: ll.w  %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   sc.w  %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __ret;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
++                                             T volatile* dest,
++                                             atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(T));
++  T __ret;
++  jlong __tmp;
++  __asm__ __volatile__ (
++      "1: ll.d  %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   sc.d  %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "ZC" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __ret;
++}
++
++#if 0
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(1 == sizeof(T));
++}
++
++#else
++// No direct support for cmpxchg of bytes; emulate using int.
++template<>
++struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
++#endif
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(4 == sizeof(T));
++  T __prev;
++  jint __cmp;
++
++  __asm__ __volatile__ (
++      "1: ll.w  %[__prev], %[__dest]    \n\t"
++      "   bne   %[__prev], %[__old], 2f  \n\t"
++      "   move  %[__cmp],  $r0          \n\t"
++      "   move  %[__cmp],  %[__new]  \n\t"
++      "   sc.w  %[__cmp],  %[__dest]  \n\t"
++      "   beqz  %[__cmp],  1b    \n\t"
++      "2:        \n\t"
++      "  dbar 0x700        \n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "ZC" (*(volatile jint*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __prev;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(T));
++  T __prev;
++  jlong __cmp;
++
++  __asm__ __volatile__ (
++      "1: ll.d  %[__prev], %[__dest]    \n\t"
++      "   bne   %[__prev], %[__old], 2f  \n\t"
++      "   move  %[__cmp],  $r0          \n\t"
++      "   move  %[__cmp],  %[__new]  \n\t"
++      "   sc.d  %[__cmp],  %[__dest]  \n\t"
++      "   beqz  %[__cmp],  1b    \n\t"
++      "2:        \n\t"
++      "   dbar 0x700 \n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "ZC" (*(volatile jlong*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++  return __prev;
++}
++
++
++#endif // OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp
+--- a/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP
++
++#include <byteswap.h>
++
++// Efficient swapping of data bytes from Java byte
++// ordering to native byte ordering and vice versa.
++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); }
++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); }
++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); }
++
++#endif // OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp
+--- a/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,125 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP
++
++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  (void)memmove(to, from, count * HeapWordSize);
++}
++
++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    (void)memcpy(to, from, count * HeapWordSize);
++    break;
++  }
++}
++
++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    while (count-- > 0) {
++      *to++ = *from++;
++    }
++    break;
++  }
++}
++
++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_words(from, to, count);
++}
++
++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_disjoint_words(from, to, count);
++}
++
++static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
++  (void)memmove(to, from, count);
++}
++
++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
++  pd_conjoint_bytes(from, to, count);
++}
++
++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
++  copy_conjoint_atomic<jshort>(from, to, count);
++}
++
++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
++  copy_conjoint_atomic<jint>(from, to, count);
++}
++
++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
++  copy_conjoint_atomic<jlong>(from, to, count);
++}
++
++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
++  copy_conjoint_atomic<oop>(from, to, count);
++}
++
++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_bytes_atomic(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
++}
++
++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
++}
++
++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
++}
++
++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
++  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
++}
++
++#endif // OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp
+--- a/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, DontYieldALot,            false);
++define_pd_global(intx, ThreadStackSize,          2048); // 0 => use system default
++define_pd_global(intx, VMThreadStackSize,        2048);
++
++define_pd_global(intx, CompilerThreadStackSize,  2048);
++
++define_pd_global(uintx,JVMInvokeMethodSlack,     8192);
++
++// Used on 64 bit platforms for UseCompressedOops base address
++define_pd_global(uintx,HeapBaseMinAddress,       2*G);
++
++#endif // OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s
+--- a/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,25 @@
++#
++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++
++
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp
+--- a/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP
++
++#include "runtime/os.hpp"
++
++// Included in orderAccess.hpp header file.
++
++// Implementation of class OrderAccess.
++#define inlasm_sync(v) if (os::is_ActiveCoresMP()) \
++                        __asm__ __volatile__ ("nop"   : : : "memory"); \
++                      else \
++                        __asm__ __volatile__ ("dbar %0"   : :"K"(v) : "memory");
++
++inline void OrderAccess::loadload()   { inlasm_sync(0x15); }
++inline void OrderAccess::storestore() { inlasm_sync(0x1a); }
++inline void OrderAccess::loadstore()  { inlasm_sync(0x16); }
++inline void OrderAccess::storeload()  { inlasm_sync(0x19); }
++
++inline void OrderAccess::acquire() { inlasm_sync(0x14); }
++inline void OrderAccess::release() { inlasm_sync(0x12); }
++inline void OrderAccess::fence()   { inlasm_sync(0x10); }
++
++
++#undef inlasm_sync
++
++#endif // OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp
+--- a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,710 @@
++/*
++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// no precompiled headers
++#include "asm/macroAssembler.hpp"
++#include "classfile/classLoader.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/allocation.inline.hpp"
++#include "os_share_linux.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/extendedPC.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/java.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/osThread.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/timer.hpp"
++#include "utilities/events.hpp"
++#include "utilities/vmError.hpp"
++#include "compiler/disassembler.hpp"
++
++// put OS-includes here
++# include <sys/types.h>
++# include <sys/mman.h>
++# include <pthread.h>
++# include <signal.h>
++# include <errno.h>
++# include <dlfcn.h>
++# include <stdlib.h>
++# include <stdio.h>
++# include <unistd.h>
++# include <sys/resource.h>
++# include <pthread.h>
++# include <sys/stat.h>
++# include <sys/time.h>
++# include <sys/utsname.h>
++# include <sys/socket.h>
++# include <sys/wait.h>
++# include <pwd.h>
++# include <poll.h>
++# include <ucontext.h>
++# include <fpu_control.h>
++
++#define REG_SP 3
++#define REG_FP 22
++
++NOINLINE address os::current_stack_pointer() {
++  register void *sp __asm__ ("$r3");
++  return (address) sp;
++}
++
++char* os::non_memory_address_word() {
++  // Must never look like an address returned by reserve_memory,
++  // even in its subfields (as defined by the CPU immediate fields,
++  // if the CPU splits constants across multiple instructions).
++
++  return (char*) -1;
++}
++
++address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
++  return (address)uc->uc_mcontext.__pc;
++}
++
++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
++  uc->uc_mcontext.__pc = (intptr_t)pc;
++}
++
++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP];
++}
++
++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread
++// is currently interrupted by SIGPROF.
++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
++// frames. Currently we don't do that on Linux, so it's the same as
++// os::fetch_frame_from_context().
++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
++  const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  assert(thread != NULL, "just checking");
++  assert(ret_sp != NULL, "just checking");
++  assert(ret_fp != NULL, "just checking");
++
++  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
++}
++
++ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
++                    intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  ExtendedPC  epc;
++  ucontext_t* uc = (ucontext_t*)ucVoid;
++
++  if (uc != NULL) {
++    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
++    if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
++    if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc);
++  } else {
++    // construct empty ExtendedPC for return value checking
++    epc = ExtendedPC(NULL);
++    if (ret_sp) *ret_sp = (intptr_t *)NULL;
++    if (ret_fp) *ret_fp = (intptr_t *)NULL;
++  }
++
++  return epc;
++}
++
++frame os::fetch_frame_from_context(const void* ucVoid) {
++  intptr_t* sp;
++  intptr_t* fp;
++  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
++  return frame(sp, fp, epc.pc());
++}
++
++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
++  address pc = (address) os::Linux::ucontext_get_pc(uc);
++  if (Interpreter::contains(pc)) {
++    // interpreter performs stack banging after the fixed frame header has
++    // been generated while the compilers perform it before. To maintain
++    // semantic consistency between interpreted and compiled frames, the
++    // method returns the Java sender of the current frame.
++    *fr = os::fetch_frame_from_context(uc);
++    if (!fr->is_first_java_frame()) {
++      assert(fr->safe_for_sender(thread), "Safety check");
++      *fr = fr->java_sender();
++    }
++  } else {
++    // more complex code with compiled code
++    assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
++    CodeBlob* cb = CodeCache::find_blob(pc);
++    if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
++      // Not sure where the pc points to, fallback to default
++      // stack overflow handling
++      return false;
++    } else {
++      // In compiled code, the stack banging is performed before LR
++      // has been saved in the frame. RA is live, and SP and FP
++      // belong to the caller.
++      intptr_t* fp = os::Linux::ucontext_get_fp(uc);
++      intptr_t* sp = os::Linux::ucontext_get_sp(uc);
++      address pc = (address)(uc->uc_mcontext.__gregs[1]);
++      *fr = frame(sp, fp, pc);
++      if (!fr->is_java_frame()) {
++        assert(fr->safe_for_sender(thread), "Safety check");
++        assert(!fr->is_first_frame(), "Safety check");
++        *fr = fr->java_sender();
++      }
++    }
++  }
++  assert(fr->is_java_frame(), "Safety check");
++  return true;
++}
++
++// By default, gcc always save frame pointer on stack. It may get
++// turned off by -fomit-frame-pointer,
++frame os::get_sender_for_C_frame(frame* fr) {
++  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
++}
++
++frame os::current_frame() {
++  intptr_t *fp = ((intptr_t **)__builtin_frame_address(0))[frame::native_frame_link_offset];
++  frame myframe((intptr_t*)os::current_stack_pointer(),
++                (intptr_t*)fp,
++                CAST_FROM_FN_PTR(address, os::current_frame));
++  if (os::is_first_C_frame(&myframe)) {
++    // stack is not walkable
++    return frame();
++  } else {
++    return os::get_sender_for_C_frame(&myframe);
++  }
++}
++
++extern "C" int
++JVM_handle_linux_signal(int sig,
++                        siginfo_t* info,
++                        void* ucVoid,
++                        int abort_if_unrecognized) {
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx",
++      info->si_signo,
++      info->si_code,
++      info->si_errno,
++      info->si_addr);
++#endif
++
++  ucontext_t* uc = (ucontext_t*) ucVoid;
++
++  Thread* t = Thread::current_or_null_safe();
++
++  SignalHandlerMark shm(t);
++
++  // Note: it's not uncommon that JNI code uses signal/sigset to install
++  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
++  // or have a SIGILL handler when detecting CPU type). When that happens,
++  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
++  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
++  // that do not require siginfo/ucontext first.
++
++  if (sig == SIGPIPE/* || sig == SIGXFSZ*/) {
++    // allow chained handler to go first
++    if (os::Linux::chained_handler(sig, info, ucVoid)) {
++      return true;
++    } else {
++      if (PrintMiscellaneous && (WizardMode || Verbose)) {
++        warning("Ignoring SIGPIPE - see bug 4229104");
++      }
++      return true;
++    }
++  }
++
++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
++  if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
++    handle_assert_poison_fault(ucVoid, info->si_addr);
++    return 1;
++  }
++#endif
++
++  JavaThread* thread = NULL;
++  VMThread* vmthread = NULL;
++  if (os::Linux::signal_handlers_are_installed) {
++    if (t != NULL ){
++      if(t->is_Java_thread()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("this thread is a java thread");
++#endif
++        thread = (JavaThread*)t;
++      }
++      else if(t->is_VM_thread()){
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("this thread is a VM thread\n");
++#endif
++        vmthread = (VMThread *)t;
++      }
++    }
++  }
++
++  // Handle SafeFetch faults:
++  if (uc != NULL) {
++    address const pc = (address) os::Linux::ucontext_get_pc(uc);
++    if (pc && StubRoutines::is_safefetch_fault(pc)) {
++      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
++      return 1;
++    }
++  }
++
++  // decide if this trap can be handled by a stub
++  address stub = NULL;
++  address pc   = NULL;
++
++  pc = (address) os::Linux::ucontext_get_pc(uc);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("pc=%lx", pc);
++  os::print_context(tty, uc);
++#endif
++  //%note os_trap_1
++  if (info != NULL && uc != NULL && thread != NULL) {
++    pc = (address) os::Linux::ucontext_get_pc(uc);
++
++    // Handle ALL stack overflow variations here
++    if (sig == SIGSEGV) {
++      address addr = (address) info->si_addr;
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("handle all stack overflow variations: ");
++      /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n",
++        addr,
++        thread->stack_base(),
++        thread->stack_base() - thread->stack_size());
++        */
++#endif
++
++      // check if fault address is within thread stack
++      if (thread->on_local_stack(addr)) {
++        // stack overflow
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("stack exception check \n");
++#endif
++        if (thread->in_stack_yellow_reserved_zone(addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is in yellow zone\n");
++#endif
++          if (thread->thread_state() == _thread_in_Java) {
++            if (thread->in_stack_reserved_zone(addr)) {
++              frame fr;
++              if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
++                assert(fr.is_java_frame(), "Must be a Java frame");
++                frame activation =
++                  SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
++                if (activation.sp() != NULL) {
++                  thread->disable_stack_reserved_zone();
++                  if (activation.is_interpreted_frame()) {
++                    thread->set_reserved_stack_activation((address)(
++                      activation.fp() + frame::interpreter_frame_initial_sp_offset));
++                  } else {
++                    thread->set_reserved_stack_activation((address)activation.unextended_sp());
++                  }
++                  return 1;
++                }
++              }
++            }
++            // Throw a stack overflow exception.  Guard pages will be reenabled
++            // while unwinding the stack.
++#ifdef PRINT_SIGNAL_HANDLE
++            tty->print("this thread is in java\n");
++#endif
++            thread->disable_stack_yellow_reserved_zone();
++            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
++          } else {
++            // Thread was in the vm or native code.  Return and try to finish.
++#ifdef PRINT_SIGNAL_HANDLE
++            tty->print("this thread is in vm or native codes and return\n");
++#endif
++            thread->disable_stack_yellow_reserved_zone();
++            return 1;
++          }
++        } else if (thread->in_stack_red_zone(addr)) {
++          // Fatal red zone violation.  Disable the guard pages and fall through
++          // to handle_unexpected_exception way down below.
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is in red zone\n");
++#endif
++          thread->disable_stack_red_zone();
++          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
++
++          // This is a likely cause, but hard to verify. Let's just print
++          // it as a hint.
++          tty->print_raw_cr("Please check if any of your loaded .so files has "
++                            "enabled executable stack (see man page execstack(8))");
++        } else {
++          // Accessing stack address below sp may cause SEGV if current
++          // thread has MAP_GROWSDOWN stack. This should only happen when
++          // current thread was created by user code with MAP_GROWSDOWN flag
++          // and then attached to VM. See notes in os_linux.cpp.
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is neither in yellow zone nor in the red one\n");
++#endif
++          if (thread->osthread()->expanding_stack() == 0) {
++             thread->osthread()->set_expanding_stack();
++             if (os::Linux::manually_expand_stack(thread, addr)) {
++               thread->osthread()->clear_expanding_stack();
++               return 1;
++             }
++             thread->osthread()->clear_expanding_stack();
++          } else {
++             fatal("recursive segv. expanding stack.");
++          }
++        }
++      }
++    } // sig == SIGSEGV
++
++    if (thread->thread_state() == _thread_in_Java) {
++      // Java thread running in Java code => find exception handler if any
++      // a fault inside compiled code, the interpreter, or a stub
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("java thread running in java code\n");
++#endif
++
++      // Handle signal from NativeJump::patch_verified_entry().
++      if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig);
++#endif
++        stub = SharedRuntime::get_handle_wrong_method_stub();
++      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig);
++#endif
++        stub = SharedRuntime::get_poll_stub(pc);
++      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
++        // BugId 4454115: A read from a MappedByteBuffer can fault
++        // here if the underlying file has been truncated.
++        // Do not crash the VM in such a case.
++        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
++        CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("cb = %lx, nm = %lx\n", cb, nm);
++#endif
++        if (nm != NULL && nm->has_unsafe_access()) {
++          address next_pc = (address)((unsigned long)pc + sizeof(unsigned int));
++          stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++        }
++      } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) {
++        // HACK: si_code does not work on linux 2.2.12-20!!!
++        int op = pc[0] & 0x3f;
++        int op1 = pc[3] & 0x3f;
++        //FIXME, Must port to LA code!!
++        switch (op) {
++          case 0x1e:  //ddiv
++          case 0x1f:  //ddivu
++          case 0x1a:  //div
++          case 0x1b:  //divu
++          case 0x34:  //trap
++            // In LA, div_by_zero exception can only be triggered by explicit 'trap'.
++            stub = SharedRuntime::continuation_for_implicit_exception(thread,
++                                    pc,
++                                    SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
++            break;
++          default:
++            // TODO: handle more cases if we are using other x86 instructions
++            //   that can generate SIGFPE signal on linux.
++            tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1);
++            //fatal("please update this code.");
++        }
++      } else if (sig == SIGSEGV &&
++          !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("continuation for implicit exception\n");
++#endif
++        // Determination of interpreter/vtable stub/compiled code null exception
++        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("continuation_for_implicit_exception stub: %lx", stub);
++#endif
++      }
++    } else if (thread->thread_state() == _thread_in_vm &&
++               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
++               thread->doing_unsafe_access()) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("SIGBUS in vm thread \n");
++#endif
++      address next_pc = (address)((unsigned long)pc + sizeof(unsigned int));
++      stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++    }
++
++    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
++    // and the heap gets shrunk before the field access.
++    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("jni fast get trap: ");
++#endif
++      address addr = JNI_FastGetField::find_slowcase_pc(pc);
++      if (addr != (address)-1) {
++        stub = addr;
++      }
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("addr = %d, stub = %lx", addr, stub);
++#endif
++    }
++
++    // Check to see if we caught the safepoint code in the
++    // process of write protecting the memory serialization page.
++    // It write enables the page immediately after protecting it
++    // so we can just return to retry the write.
++    if ((sig == SIGSEGV) &&
++        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("write protecting the memory serialiazation page\n");
++#endif
++      // Block current thread until the memory serialize page permission restored.
++      os::block_on_serialize_page_trap();
++      return true;
++    }
++  }
++
++  if (stub != NULL) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("resolved stub=%lx\n",stub);
++#endif
++    // save all thread context in case we need to restore it
++    if (thread != NULL) thread->set_saved_exception_pc(pc);
++
++    os::Linux::ucontext_set_pc(uc, stub);
++    return true;
++  }
++
++  // signal-chaining
++  if (os::Linux::chained_handler(sig, info, ucVoid)) {
++#ifdef PRINT_SIGNAL_HANDLE
++     tty->print_cr("signal chaining\n");
++#endif
++     return true;
++  }
++
++  if (!abort_if_unrecognized) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("abort becauce of unrecognized\n");
++#endif
++    // caller wants another chance, so give it to him
++    return false;
++  }
++
++  if (pc == NULL && uc != NULL) {
++    pc = os::Linux::ucontext_get_pc(uc);
++  }
++
++  // unmask current signal
++  sigset_t newset;
++  sigemptyset(&newset);
++  sigaddset(&newset, sig);
++  sigprocmask(SIG_UNBLOCK, &newset, NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("VMError in signal handler\n");
++#endif
++  VMError::report_and_die(t, sig, pc, info, ucVoid);
++
++  ShouldNotReachHere();
++  return true; // Mute compiler
++}
++
++void os::Linux::init_thread_fpu_state(void) {
++}
++
++int os::Linux::get_fpu_control_word(void) {
++  return 0; // mute compiler
++}
++
++void os::Linux::set_fpu_control_word(int fpu_control) {
++}
++
++bool os::is_allocatable(size_t bytes) {
++
++  if (bytes < 2 * G) {
++    return true;
++  }
++
++  char* addr = reserve_memory(bytes, NULL);
++
++  if (addr != NULL) {
++    release_memory(addr, bytes);
++  }
++
++  return addr != NULL;
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// thread stack
++
++// Minimum usable stack sizes required to get to user code. Space for
++// HotSpot guard pages is added later.
++size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K;
++size_t os::Posix::_java_thread_min_stack_allowed = 40 * K;
++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K;
++
++// Return default stack size for thr_type
++size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
++  // Default stack size (compiler thread needs larger stack)
++  size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K);
++  return s;
++}
++
++/////////////////////////////////////////////////////////////////////////////
++// helper functions for fatal error handler
++void os::print_register_info(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  ucontext_t *uc = (ucontext_t*)context;
++
++  st->print_cr("Register to memory mapping:");
++  st->cr();
++  // this is horrendously verbose but the layout of the registers in the
++  //   // context does not match how we defined our abstract Register set, so
++  //     // we can't just iterate through the gregs area
++  //
++  //       // this is only for the "general purpose" registers
++  st->print("ZERO=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[0]);
++  st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[1]);
++  st->print("TP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[2]);
++  st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[3]);
++  st->cr();
++  st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[4]);
++  st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[5]);
++  st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[6]);
++  st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[7]);
++  st->cr();
++  st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[8]);
++  st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[9]);
++  st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[10]);
++  st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[11]);
++  st->cr();
++  st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[12]);
++  st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[13]);
++  st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[14]);
++  st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[15]);
++  st->cr();
++  st->print("T4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[16]);
++  st->print("T5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[17]);
++  st->print("T6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[18]);
++  st->print("T7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[19]);
++  st->cr();
++  st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[20]);
++  st->print("RX=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[21]);
++  st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[22]);
++  st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[23]);
++  st->cr();
++  st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[24]);
++  st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[25]);
++  st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[26]);
++  st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[27]);
++  st->cr();
++  st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[28]);
++  st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[29]);
++  st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[30]);
++  st->print("S8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[31]);
++  st->cr();
++
++}
++
++void os::print_context(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  const ucontext_t *uc = (const ucontext_t*)context;
++  st->print_cr("Registers:");
++  st->print(  "ZERO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[0]);
++  st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[1]);
++  st->print(", TP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[2]);
++  st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[3]);
++  st->cr();
++  st->print(  "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[4]);
++  st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[5]);
++  st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[6]);
++  st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[7]);
++  st->cr();
++  st->print(  "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[8]);
++  st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[9]);
++  st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[10]);
++  st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[11]);
++  st->cr();
++  st->print(  "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[12]);
++  st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[13]);
++  st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[14]);
++  st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[15]);
++  st->cr();
++  st->print(  "T4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[16]);
++  st->print(", T5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[17]);
++  st->print(", T6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[18]);
++  st->print(", T7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[19]);
++  st->cr();
++  st->print(  "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[20]);
++  st->print(", RX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[21]);
++  st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[22]);
++  st->print(", S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[23]);
++  st->cr();
++  st->print(  "S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[24]);
++  st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[25]);
++  st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[26]);
++  st->print(", S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[27]);
++  st->cr();
++  st->print(  "S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[28]);
++  st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[29]);
++  st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[30]);
++  st->print(", S8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[31]);
++  st->cr();
++  st->cr();
++
++  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
++  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp));
++  print_hex_dump(st, (address)(sp - 32), (address)(sp + 32), sizeof(intptr_t));
++  st->cr();
++
++  // Note: it may be unsafe to inspect memory near pc. For example, pc may
++  // point to garbage if entry point in an nmethod is corrupted. Leave
++  // this at the end, and hope for the best.
++  address pc = os::Linux::ucontext_get_pc(uc);
++  st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc));
++  print_hex_dump(st, pc - 64, pc + 64, sizeof(char));
++  Disassembler::decode(pc - 80, pc + 80, st);
++}
++
++void os::setup_fpu() {
++  // no use for LA
++}
++
++#ifndef PRODUCT
++void os::verify_stack_alignment() {
++  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
++}
++#endif
++
++int os::extra_bang_size_in_bytes() {
++  // LA does not require the additional stack bang.
++  return 0;
++}
++
++bool os::is_ActiveCoresMP() {
++  return UseActiveCoresMP && _initial_active_processor_count == 1;
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp
+--- a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP
++
++  static void setup_fpu();
++  static bool is_allocatable(size_t bytes);
++
++  // Used to register dynamic code cache area with the OS
++  // Note: Currently only used in 64 bit Windows implementations
++  static bool register_code_area(char *low, char *high) { return true; }
++
++  static bool is_ActiveCoresMP();
++
++#endif // OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp
+--- a/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
++
++
++inline void Prefetch::read (void *loc, intx interval) {
++// According to previous and present SPECjbb2015 score,
++// comment prefetch is better than if (interval >= 0) prefetch branch.
++// So choose comment prefetch as the base line.
++#if 0
++  __asm__ __volatile__ (
++                        "       preld  0, %[__loc] \n"
++                        :
++                        : [__loc] "m"( *((address)loc + interval) )
++                        : "memory"
++                        );
++#endif
++}
++
++inline void Prefetch::write(void *loc, intx interval) {
++// Ditto
++#if 0
++  __asm__ __volatile__ (
++                        "       preld  8, %[__loc] \n"
++                        :
++                        : [__loc] "m"( *((address)loc + interval) )
++                        : "memory"
++                        );
++#endif
++}
++
++#endif // OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp
+--- a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,116 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "memory/metaspaceShared.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++
++void JavaThread::pd_initialize()
++{
++    _anchor.clear();
++}
++
++frame JavaThread::pd_last_frame() {
++  assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
++  if (_anchor.last_Java_pc() != NULL) {
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
++  } else {
++    // This will pick up pc from sp
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
++  }
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
++// currently interrupted by SIGPROF
++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
++  void* ucontext, bool isInJava) {
++
++  assert(Thread::current() == this, "caller must be current thread");
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++
++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
++  assert(this->is_Java_thread(), "must be JavaThread");
++  JavaThread* jt = (JavaThread *)this;
++
++  // If we have a last_Java_frame, then we should use it even if
++  // isInJava == true.  It should be more reliable than ucontext info.
++  if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
++    *fr_addr = jt->pd_last_frame();
++    return true;
++  }
++
++  // At this point, we don't have a last_Java_frame, so
++  // we try to glean some information out of the ucontext
++  // if we were running Java code when SIGPROF came in.
++  if (isInJava) {
++    ucontext_t* uc = (ucontext_t*) ucontext;
++
++    intptr_t* ret_fp;
++    intptr_t* ret_sp;
++    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
++      &ret_sp, &ret_fp);
++    if (addr.pc() == NULL || ret_sp == NULL ) {
++      // ucontext wasn't useful
++      return false;
++    }
++
++    if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) {
++      // In the middle of a trampoline call. Bail out for safety.
++      // This happens rarely so shouldn't affect profiling.
++      return false;
++    }
++
++    frame ret_frame(ret_sp, ret_fp, addr.pc());
++    if (!ret_frame.safe_for_sender(jt)) {
++#ifdef COMPILER2
++      // C2 and JVMCI use ebp as a general register see if NULL fp helps
++      frame ret_frame2(ret_sp, NULL, addr.pc());
++      if (!ret_frame2.safe_for_sender(jt)) {
++        // nothing else to try if the frame isn't good
++        return false;
++      }
++      ret_frame = ret_frame2;
++#else
++      // nothing else to try if the frame isn't good
++      return false;
++#endif // COMPILER2_OR_JVMCI
++    }
++    *fr_addr = ret_frame;
++    return true;
++  }
++
++  // nothing else to try
++  return false;
++}
++
++void JavaThread::cache_global_variables() { }
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp
+--- a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,66 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
++
++ private:
++  void pd_initialize();
++
++  frame pd_last_frame();
++
++ public:
++  // Mutators are highly dangerous....
++  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
++  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
++
++  void set_base_of_stack_pointer(intptr_t* base_sp) {
++  }
++
++  static ByteSize last_Java_fp_offset()          {
++    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
++  }
++
++  intptr_t* base_of_stack_pointer() {
++    return NULL;
++  }
++  void record_base_of_stack_pointer() {
++  }
++
++  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
++    bool isInJava);
++
++  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
++private:
++  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
++public:
++
++  // These routines are only used on cpu architectures that
++  // have separate register stacks (Itanium).
++  static bool register_stack_overflow() { return false; }
++  static void enable_register_stack_guard() {}
++  static void disable_register_stack_guard() {}
++
++#endif // OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp
+--- a/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP
++
++// These are the OS and CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* Threads (NOTE: incomplete) */                                                                                                   \
++  /******************************/                                                                                                   \
++  nonstatic_field(OSThread,                      _thread_id,                                      pid_t)                             \
++  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
++
++
++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
++                                                                          \
++  /**********************/                                                \
++  /* Posix Thread IDs   */                                                \
++  /**********************/                                                \
++                                                                          \
++  declare_integer_type(pid_t)                                             \
++  declare_unsigned_integer_type(pthread_t)
++
++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#endif // OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp
+--- a/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,93 @@
++/*
++ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/os.hpp"
++#include "runtime/vm_version.hpp"
++
++#include <asm/hwcap.h>
++#include <sys/auxv.h>
++
++#ifndef HWCAP_LOONGARCH_LAM
++#define HWCAP_LOONGARCH_LAM       (1 << 1)
++#endif
++
++#ifndef HWCAP_LOONGARCH_UAL
++#define HWCAP_LOONGARCH_UAL       (1 << 2)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LSX
++#define HWCAP_LOONGARCH_LSX       (1 << 4)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LASX
++#define HWCAP_LOONGARCH_LASX      (1 << 5)
++#endif
++
++#ifndef HWCAP_LOONGARCH_COMPLEX
++#define HWCAP_LOONGARCH_COMPLEX   (1 << 7)
++#endif
++
++#ifndef HWCAP_LOONGARCH_CRYPTO
++#define HWCAP_LOONGARCH_CRYPTO    (1 << 8)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LBT_X86
++#define HWCAP_LOONGARCH_LBT_X86   (1 << 10)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LBT_ARM
++#define HWCAP_LOONGARCH_LBT_ARM   (1 << 11)
++#endif
++
++#ifndef HWCAP_LOONGARCH_LBT_MIPS
++#define HWCAP_LOONGARCH_LBT_MIPS  (1 << 12)
++#endif
++
++void VM_Version::get_os_cpu_info() {
++
++  uint64_t auxv = getauxval(AT_HWCAP);
++
++  STATIC_ASSERT(CPU_LAM      == HWCAP_LOONGARCH_LAM);
++  STATIC_ASSERT(CPU_UAL      == HWCAP_LOONGARCH_UAL);
++  STATIC_ASSERT(CPU_LSX      == HWCAP_LOONGARCH_LSX);
++  STATIC_ASSERT(CPU_LASX     == HWCAP_LOONGARCH_LASX);
++  STATIC_ASSERT(CPU_COMPLEX  == HWCAP_LOONGARCH_COMPLEX);
++  STATIC_ASSERT(CPU_CRYPTO   == HWCAP_LOONGARCH_CRYPTO);
++  STATIC_ASSERT(CPU_LBT_X86  == HWCAP_LOONGARCH_LBT_X86);
++  STATIC_ASSERT(CPU_LBT_ARM  == HWCAP_LOONGARCH_LBT_ARM);
++  STATIC_ASSERT(CPU_LBT_MIPS == HWCAP_LOONGARCH_LBT_MIPS);
++
++  _features = auxv & (
++      HWCAP_LOONGARCH_LAM     |
++      HWCAP_LOONGARCH_UAL     |
++      HWCAP_LOONGARCH_LSX     |
++      HWCAP_LOONGARCH_LASX    |
++      HWCAP_LOONGARCH_COMPLEX |
++      HWCAP_LOONGARCH_CRYPTO  |
++      HWCAP_LOONGARCH_LBT_X86 |
++      HWCAP_LOONGARCH_LBT_ARM |
++      HWCAP_LOONGARCH_LBT_MIPS);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp
+--- a/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,24 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp
+--- a/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,191 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP
++
++#include "runtime/vm_version.hpp"
++
++// Implementation of class atomic
++
++template<size_t byte_size>
++struct Atomic::PlatformAdd
++  : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
++{
++  template<typename I, typename D>
++  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
++    //Unimplemented();
++    return __sync_add_and_fetch(dest, add_value);
++  }
++};
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
++                                             T volatile* dest,
++                                             atomic_memory_order order) const {
++  T __ret, __tmp;
++
++  STATIC_ASSERT(4 == sizeof(T));
++  __asm__ __volatile__ (
++      " .set push\n\t"
++      " .set mips64\n\t"
++      " .set noreorder\n\t"
++
++      "1: sync\n\t"
++      "   ll    %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   sc    %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++      "   nop        \n\t"
++
++      " .set pop\n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __ret;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
++                                             T volatile* dest,
++                                             atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(T));
++  T __ret;
++  jlong __tmp;
++  __asm__ __volatile__ (
++      " .set push\n\t"
++      " .set mips64\n\t"
++      " .set noreorder\n\t"
++
++      "1: sync\n\t"
++      "   lld   %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   scd   %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++      "   nop        \n\t"
++
++      " .set pop\n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "m" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++  return __ret;
++}
++
++#if 0
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(1 == sizeof(T));
++}
++
++#else
++// No direct support for cmpxchg of bytes; emulate using int.
++template<>
++struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
++#endif
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(4 == sizeof(T));
++  T __prev;
++  jint __cmp;
++
++  __asm__ __volatile__ (
++      "  .set push\n\t"
++      "  .set mips64\n\t"
++      "  .set noreorder\n\t"
++
++      "1:sync \n\t"
++      "  ll     %[__prev], %[__dest]    \n\t"
++      "  bne    %[__prev], %[__old], 2f  \n\t"
++      "  move  %[__cmp],  $0          \n\t"
++      "  move  %[__cmp],  %[__new]  \n\t"
++      "  sc  %[__cmp],  %[__dest]  \n\t"
++      "  beqz  %[__cmp],  1b    \n\t"
++      "  nop        \n\t"
++      "2:        \n\t"
++      "  sync        \n\t"
++
++      "  .set pop\n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "m" (*(volatile jint*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __prev;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(T));
++  T __prev;
++  jlong __cmp;
++
++  __asm__ __volatile__ (
++      "  .set push\n\t"
++      "  .set mips64\n\t"
++      "  .set noreorder\n\t"
++
++      "1:sync \n\t"
++      "  lld   %[__prev], %[__dest]    \n\t"
++      "  bne   %[__prev], %[__old], 2f  \n\t"
++      "  move  %[__cmp],  $0          \n\t"
++      "  move  %[__cmp],  %[__new]  \n\t"
++      "  scd   %[__cmp],  %[__dest]  \n\t"
++      "  beqz  %[__cmp],  1b    \n\t"
++      "  nop        \n\t"
++      "2:        \n\t"
++      "  sync \n\t"
++
++      "  .set pop\n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "m" (*(volatile jlong*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++  return __prev;
++}
++
++
++#endif // OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp
+--- a/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
++
++#include <byteswap.h>
++
++// Efficient swapping of data bytes from Java byte
++// ordering to native byte ordering and vice versa.
++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); }
++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); }
++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); }
++
++#endif // OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp
+--- a/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,125 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
++
++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  (void)memmove(to, from, count * HeapWordSize);
++}
++
++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    (void)memcpy(to, from, count * HeapWordSize);
++    break;
++  }
++}
++
++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    while (count-- > 0) {
++      *to++ = *from++;
++    }
++    break;
++  }
++}
++
++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_words(from, to, count);
++}
++
++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_disjoint_words(from, to, count);
++}
++
++static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
++  (void)memmove(to, from, count);
++}
++
++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
++  pd_conjoint_bytes(from, to, count);
++}
++
++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
++  copy_conjoint_atomic<jshort>(from, to, count);
++}
++
++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
++  copy_conjoint_atomic<jint>(from, to, count);
++}
++
++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
++  copy_conjoint_atomic<jlong>(from, to, count);
++}
++
++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
++  copy_conjoint_atomic<oop>(from, to, count);
++}
++
++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_bytes_atomic(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
++}
++
++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
++}
++
++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
++}
++
++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
++  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
++}
++
++#endif // OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp
+--- a/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, DontYieldALot,            false);
++#ifdef MIPS64
++define_pd_global(intx, ThreadStackSize,          1024); // 0 => use system default
++define_pd_global(intx, VMThreadStackSize,        1024);
++#else
++// ThreadStackSize 320 allows a couple of test cases to run while
++// keeping the number of threads that can be created high.  System
++// default ThreadStackSize appears to be 512 which is too big.
++define_pd_global(intx, ThreadStackSize,          320);
++define_pd_global(intx, VMThreadStackSize,        512);
++#endif // MIPS64
++
++define_pd_global(intx, CompilerThreadStackSize,  0);
++
++define_pd_global(uintx,JVMInvokeMethodSlack,     8192);
++
++// Used on 64 bit platforms for UseCompressedOops base address
++define_pd_global(uintx,HeapBaseMinAddress,       2*G);
++
++#endif // OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/linux_mips.s b/src/hotspot/os_cpu/linux_mips/linux_mips.s
+--- a/src/hotspot/os_cpu/linux_mips/linux_mips.s	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_mips/linux_mips.s	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,25 @@
++#
++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++
++
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp
+--- a/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP
++
++#include "runtime/os.hpp"
++
++// Included in orderAccess.hpp header file.
++
++// Implementation of class OrderAccess.
++#define inlasm_sync() if (os::is_ActiveCoresMP()) \
++                        __asm__ __volatile__ ("nop"   : : : "memory"); \
++                      else \
++                        __asm__ __volatile__ ("sync"   : : : "memory");
++
++inline void OrderAccess::loadload()   { inlasm_sync(); }
++inline void OrderAccess::storestore() { inlasm_sync(); }
++inline void OrderAccess::loadstore()  { inlasm_sync(); }
++inline void OrderAccess::storeload()  { inlasm_sync(); }
++
++inline void OrderAccess::acquire() { inlasm_sync(); }
++inline void OrderAccess::release() { inlasm_sync(); }
++inline void OrderAccess::fence()   { inlasm_sync(); }
++
++
++#undef inlasm_sync
++
++#endif // OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp
+--- a/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,1020 @@
++/*
++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// no precompiled headers
++#include "asm/macroAssembler.hpp"
++#include "classfile/classLoader.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/allocation.inline.hpp"
++#include "os_share_linux.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/extendedPC.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/java.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/osThread.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/timer.hpp"
++#include "utilities/events.hpp"
++#include "utilities/vmError.hpp"
++#include "compiler/disassembler.hpp"
++
++// put OS-includes here
++# include <sys/types.h>
++# include <sys/mman.h>
++# include <pthread.h>
++# include <signal.h>
++# include <errno.h>
++# include <dlfcn.h>
++# include <stdlib.h>
++# include <stdio.h>
++# include <unistd.h>
++# include <sys/resource.h>
++# include <pthread.h>
++# include <sys/stat.h>
++# include <sys/time.h>
++# include <sys/utsname.h>
++# include <sys/socket.h>
++# include <sys/wait.h>
++# include <pwd.h>
++# include <poll.h>
++# include <ucontext.h>
++# include <fpu_control.h>
++
++#define REG_SP 29
++#define REG_FP 30
++
++address os::current_stack_pointer() {
++  register void *sp __asm__ ("$29");
++  return (address) sp;
++}
++
++char* os::non_memory_address_word() {
++  // Must never look like an address returned by reserve_memory,
++  // even in its subfields (as defined by the CPU immediate fields,
++  // if the CPU splits constants across multiple instructions).
++
++  return (char*) -1;
++}
++
++address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
++  return (address)uc->uc_mcontext.pc;
++}
++
++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
++  uc->uc_mcontext.pc = (intptr_t)pc;
++}
++
++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.gregs[REG_SP];
++}
++
++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.gregs[REG_FP];
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread
++// is currently interrupted by SIGPROF.
++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
++// frames. Currently we don't do that on Linux, so it's the same as
++// os::fetch_frame_from_context().
++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
++  const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  assert(thread != NULL, "just checking");
++  assert(ret_sp != NULL, "just checking");
++  assert(ret_fp != NULL, "just checking");
++
++  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
++}
++
++ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
++                    intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  ExtendedPC  epc;
++  ucontext_t* uc = (ucontext_t*)ucVoid;
++
++  if (uc != NULL) {
++    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
++    if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
++    if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc);
++  } else {
++    // construct empty ExtendedPC for return value checking
++    epc = ExtendedPC(NULL);
++    if (ret_sp) *ret_sp = (intptr_t *)NULL;
++    if (ret_fp) *ret_fp = (intptr_t *)NULL;
++  }
++
++  return epc;
++}
++
++frame os::fetch_frame_from_context(const void* ucVoid) {
++  intptr_t* sp;
++  intptr_t* fp;
++  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
++  return frame(sp, fp, epc.pc());
++}
++
++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
++  address pc = (address) os::Linux::ucontext_get_pc(uc);
++  if (Interpreter::contains(pc)) {
++    // interpreter performs stack banging after the fixed frame header has
++    // been generated while the compilers perform it before. To maintain
++    // semantic consistency between interpreted and compiled frames, the
++    // method returns the Java sender of the current frame.
++    *fr = os::fetch_frame_from_context(uc);
++    if (!fr->is_first_java_frame()) {
++      assert(fr->safe_for_sender(thread), "Safety check");
++      *fr = fr->java_sender();
++    }
++  } else {
++    // more complex code with compiled code
++    assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
++    CodeBlob* cb = CodeCache::find_blob(pc);
++    if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
++      // Not sure where the pc points to, fallback to default
++      // stack overflow handling
++      return false;
++    } else {
++      // In compiled code, the stack banging is performed before LR
++      // has been saved in the frame. RA is live, and SP and FP
++      // belong to the caller.
++      intptr_t* fp = os::Linux::ucontext_get_fp(uc);
++      intptr_t* sp = os::Linux::ucontext_get_sp(uc);
++      address pc = (address)(uc->uc_mcontext.gregs[31]);
++      *fr = frame(sp, fp, pc);
++      if (!fr->is_java_frame()) {
++        assert(fr->safe_for_sender(thread), "Safety check");
++        assert(!fr->is_first_frame(), "Safety check");
++        *fr = fr->java_sender();
++      }
++    }
++  }
++  assert(fr->is_java_frame(), "Safety check");
++  return true;
++}
++
++// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get
++// turned off by -fomit-frame-pointer,
++frame os::get_sender_for_C_frame(frame* fr) {
++  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
++}
++
++//intptr_t* _get_previous_fp() {
++intptr_t* __attribute__((noinline)) os::get_previous_fp() {
++  int *pc;
++  intptr_t sp;
++  int *pc_limit = (int*)(void*)&os::get_previous_fp;
++  int insn;
++
++  {
++    l_pc:;
++    pc = (int*)&&l_pc;
++    __asm__ __volatile__ ("move %0,  $sp" : "=r" (sp));
++  }
++
++  do {
++    insn = *pc;
++    switch(bitfield(insn, 16, 16)) {
++      case 0x27bd:  /* addiu $sp,$sp,-i */
++      case 0x67bd:  /* daddiu $sp,$sp,-i */
++        assert ((short)bitfield(insn, 0, 16)<0, "bad frame");
++        sp -= (short)bitfield(insn, 0, 16);
++        return (intptr_t*)sp;
++    }
++    --pc;
++  } while (pc>=pc_limit); // The initial value of pc may be equal to pc_limit, because of GCC optimization.
++
++  ShouldNotReachHere();
++  return NULL; // mute compiler
++}
++
++
++frame os::current_frame() {
++  intptr_t* fp = (intptr_t*)get_previous_fp();
++  frame myframe((intptr_t*)os::current_stack_pointer(),
++                (intptr_t*)fp,
++                CAST_FROM_FN_PTR(address, os::current_frame));
++  if (os::is_first_C_frame(&myframe)) {
++    // stack is not walkable
++    return frame();
++  } else {
++    return os::get_sender_for_C_frame(&myframe);
++  }
++}
++
++//x86 add 2 new assemble function here!
++extern "C" int
++JVM_handle_linux_signal(int sig,
++                        siginfo_t* info,
++                        void* ucVoid,
++                        int abort_if_unrecognized) {
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx",
++      info->si_signo,
++      info->si_code,
++      info->si_errno,
++      info->si_addr);
++#endif
++
++  ucontext_t* uc = (ucontext_t*) ucVoid;
++
++  Thread* t = Thread::current_or_null_safe();
++
++  SignalHandlerMark shm(t);
++
++  // Note: it's not uncommon that JNI code uses signal/sigset to install
++  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
++  // or have a SIGILL handler when detecting CPU type). When that happens,
++  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
++  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
++  // that do not require siginfo/ucontext first.
++
++  if (sig == SIGPIPE/* || sig == SIGXFSZ*/) {
++    // allow chained handler to go first
++    if (os::Linux::chained_handler(sig, info, ucVoid)) {
++      return true;
++    } else {
++      if (PrintMiscellaneous && (WizardMode || Verbose)) {
++        warning("Ignoring SIGPIPE - see bug 4229104");
++      }
++      return true;
++    }
++  }
++
++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
++  if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
++    handle_assert_poison_fault(ucVoid, info->si_addr);
++    return 1;
++  }
++#endif
++
++  JavaThread* thread = NULL;
++  VMThread* vmthread = NULL;
++  if (os::Linux::signal_handlers_are_installed) {
++    if (t != NULL ){
++      if(t->is_Java_thread()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("this thread is a java thread");
++#endif
++        thread = (JavaThread*)t;
++      }
++      else if(t->is_VM_thread()){
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("this thread is a VM thread\n");
++#endif
++        vmthread = (VMThread *)t;
++      }
++    }
++  }
++
++  // Handle SafeFetch faults:
++  if (uc != NULL) {
++    address const pc = (address) os::Linux::ucontext_get_pc(uc);
++    if (pc && StubRoutines::is_safefetch_fault(pc)) {
++      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
++      return 1;
++    }
++  }
++
++  // decide if this trap can be handled by a stub
++  address stub = NULL;
++  address pc   = NULL;
++
++  pc = (address) os::Linux::ucontext_get_pc(uc);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("pc=%lx", pc);
++  os::print_context(tty, uc);
++#endif
++  //%note os_trap_1
++  if (info != NULL && uc != NULL && thread != NULL) {
++    pc = (address) os::Linux::ucontext_get_pc(uc);
++
++    // Handle ALL stack overflow variations here
++    if (sig == SIGSEGV) {
++      address addr = (address) info->si_addr;
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("handle all stack overflow variations: ");
++      /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n",
++        addr,
++        thread->stack_base(),
++        thread->stack_base() - thread->stack_size());
++        */
++#endif
++
++      // check if fault address is within thread stack
++      if (thread->on_local_stack(addr)) {
++        // stack overflow
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("stack exception check \n");
++#endif
++        if (thread->in_stack_yellow_reserved_zone(addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is in yellow zone\n");
++#endif
++          if (thread->thread_state() == _thread_in_Java) {
++            if (thread->in_stack_reserved_zone(addr)) {
++              frame fr;
++              if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
++                assert(fr.is_java_frame(), "Must be a Java frame");
++                frame activation =
++                  SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
++                if (activation.sp() != NULL) {
++                  thread->disable_stack_reserved_zone();
++                  if (activation.is_interpreted_frame()) {
++                    thread->set_reserved_stack_activation((address)(
++                      activation.fp() + frame::interpreter_frame_initial_sp_offset));
++                  } else {
++                    thread->set_reserved_stack_activation((address)activation.unextended_sp());
++                  }
++                  return 1;
++                }
++              }
++            }
++            // Throw a stack overflow exception.  Guard pages will be reenabled
++            // while unwinding the stack.
++#ifdef PRINT_SIGNAL_HANDLE
++            tty->print("this thread is in java\n");
++#endif
++            thread->disable_stack_yellow_reserved_zone();
++            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
++          } else {
++            // Thread was in the vm or native code.  Return and try to finish.
++#ifdef PRINT_SIGNAL_HANDLE
++            tty->print("this thread is in vm or native codes and return\n");
++#endif
++            thread->disable_stack_yellow_reserved_zone();
++            return 1;
++          }
++        } else if (thread->in_stack_red_zone(addr)) {
++          // Fatal red zone violation.  Disable the guard pages and fall through
++          // to handle_unexpected_exception way down below.
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is in red zone\n");
++#endif
++          thread->disable_stack_red_zone();
++          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
++
++          // This is a likely cause, but hard to verify. Let's just print
++          // it as a hint.
++          tty->print_raw_cr("Please check if any of your loaded .so files has "
++                            "enabled executable stack (see man page execstack(8))");
++        } else {
++          // Accessing stack address below sp may cause SEGV if current
++          // thread has MAP_GROWSDOWN stack. This should only happen when
++          // current thread was created by user code with MAP_GROWSDOWN flag
++          // and then attached to VM. See notes in os_linux.cpp.
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is neither in yellow zone nor in the red one\n");
++#endif
++          if (thread->osthread()->expanding_stack() == 0) {
++             thread->osthread()->set_expanding_stack();
++             if (os::Linux::manually_expand_stack(thread, addr)) {
++               thread->osthread()->clear_expanding_stack();
++               return 1;
++             }
++             thread->osthread()->clear_expanding_stack();
++          } else {
++             fatal("recursive segv. expanding stack.");
++          }
++        }
++      } //addr <
++    } //sig == SIGSEGV
++
++    if (thread->thread_state() == _thread_in_Java) {
++      // Java thread running in Java code => find exception handler if any
++      // a fault inside compiled code, the interpreter, or a stub
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("java thread running in java code\n");
++#endif
++
++      // Handle signal from NativeJump::patch_verified_entry().
++      if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig);
++#endif
++        stub = SharedRuntime::get_handle_wrong_method_stub();
++      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig);
++#endif
++        stub = SharedRuntime::get_poll_stub(pc);
++      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
++        // BugId 4454115: A read from a MappedByteBuffer can fault
++        // here if the underlying file has been truncated.
++        // Do not crash the VM in such a case.
++        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
++        CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("cb = %lx, nm = %lx\n", cb, nm);
++#endif
++        if (nm != NULL && nm->has_unsafe_access()) {
++          address next_pc = (address)((unsigned long)pc + sizeof(unsigned int));
++          stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++        }
++      } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) {
++        // HACK: si_code does not work on linux 2.2.12-20!!!
++        int op = pc[0] & 0x3f;
++        int op1 = pc[3] & 0x3f;
++        //FIXME, Must port to mips code!!
++        switch (op) {
++          case 0x1e:  //ddiv
++          case 0x1f:  //ddivu
++          case 0x1a:  //div
++          case 0x1b:  //divu
++          case 0x34:  //trap
++            /* In MIPS, div_by_zero exception can only be triggered by explicit 'trap'.
++             * Ref: [c1_LIRAssembler_mips.cpp] arithmetic_idiv()
++             */
++            stub = SharedRuntime::continuation_for_implicit_exception(thread,
++                                    pc,
++                                    SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
++            break;
++          default:
++            // TODO: handle more cases if we are using other x86 instructions
++            //   that can generate SIGFPE signal on linux.
++            tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1);
++            //fatal("please update this code.");
++        }
++      } else if (sig == SIGSEGV &&
++          !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("continuation for implicit exception\n");
++#endif
++        // Determination of interpreter/vtable stub/compiled code null exception
++        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("continuation_for_implicit_exception stub: %lx", stub);
++#endif
++      } else if (/*thread->thread_state() == _thread_in_Java && */sig == SIGILL) {
++        //Since kernel does not have emulation of PS instructions yet, the emulation must be handled here.
++        //The method is to trigger kernel emulation of float emulation.
++        int inst = *(int*)pc;
++        int ops = (inst >> 26) & 0x3f;
++        int ops_fmt = (inst >> 21) & 0x1f;
++        int op = inst & 0x3f;
++        if (ops == Assembler::cop1_op && ops_fmt == Assembler::ps_fmt) {
++          int ft, fs, fd;
++          ft = (inst >> 16) & 0x1f;
++          fs = (inst >> 11) & 0x1f;
++          fd = (inst >> 6) & 0x1f;
++          float ft_upper, ft_lower, fs_upper, fs_lower, fd_upper, fd_lower;
++          double ft_value, fs_value, fd_value;
++          ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft];
++          fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs];
++          __asm__ __volatile__ (
++            "cvt.s.pl %0, %4\n\t"
++            "cvt.s.pu %1, %4\n\t"
++            "cvt.s.pl %2, %5\n\t"
++            "cvt.s.pu %3, %5\n\t"
++            : "=f" (fs_lower), "=f" (fs_upper), "=f" (ft_lower), "=f" (ft_upper)
++            : "f" (fs_value), "f" (ft_value)
++          );
++
++          switch (op) {
++            case Assembler::fadd_op:
++              __asm__ __volatile__ (
++                "add.s  %1, %3, %5\n\t"
++                "add.s  %2, %4, %6\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
++                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            case Assembler::fsub_op:
++              //fd = fs - ft
++              __asm__ __volatile__ (
++                "sub.s  %1, %3, %5\n\t"
++                "sub.s  %2, %4, %6\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
++                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            case Assembler::fmul_op:
++              __asm__ __volatile__ (
++                "mul.s  %1, %3, %5\n\t"
++                "mul.s  %2, %4, %6\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
++                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            default:
++              tty->print_cr("unknown cop1 opcode 0x%x with SIGILL.", op);
++          }
++        } else if (ops == Assembler::cop1x_op /*&& op == Assembler::nmadd_ps_op*/) {
++          // madd.ps is not used, the code below were not tested
++          int fr, ft, fs, fd;
++          float fr_upper, fr_lower, fs_upper, fs_lower, ft_upper, ft_lower, fd_upper, fd_lower;
++          double fr_value, ft_value, fs_value, fd_value;
++          switch (op) {
++            case Assembler::madd_ps_op:
++              // fd = (fs * ft) + fr
++              fr = (inst >> 21) & 0x1f;
++              ft = (inst >> 16) & 0x1f;
++              fs = (inst >> 11) & 0x1f;
++              fd = (inst >> 6) & 0x1f;
++              fr_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fr];
++              ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft];
++              fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs];
++              __asm__ __volatile__ (
++                "cvt.s.pu %3, %9\n\t"
++                "cvt.s.pl %4, %9\n\t"
++                "cvt.s.pu %5, %10\n\t"
++                "cvt.s.pl %6, %10\n\t"
++                "cvt.s.pu %7, %11\n\t"
++                "cvt.s.pl %8, %11\n\t"
++                "madd.s %1, %3, %5, %7\n\t"
++                "madd.s %2, %4, %6, %8\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower), "=f" (fr_upper), "=f" (fr_lower), "=f" (fs_upper), "=f" (fs_lower), "=f" (ft_upper), "=f" (ft_lower)
++                : "f" (fr_value)/*9*/, "f" (fs_value)/*10*/, "f" (ft_value)/*11*/
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            default:
++              tty->print_cr("unknown cop1x opcode 0x%x with SIGILL.", op);
++          }
++        }
++      } //SIGILL
++    } else if (sig == SIGILL && VM_Version::is_determine_features_test_running()) {
++      // thread->thread_state() != _thread_in_Java
++      // SIGILL must be caused by VM_Version::determine_features().
++      VM_Version::set_supports_cpucfg(false);
++      stub = pc + 4;  // continue with next instruction.
++    } else if (thread->thread_state() == _thread_in_vm &&
++               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
++               thread->doing_unsafe_access()) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("SIGBUS in vm thread \n");
++#endif
++      address next_pc = (address)((unsigned long)pc + sizeof(unsigned int));
++      stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++    }
++
++    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
++    // and the heap gets shrunk before the field access.
++    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("jni fast get trap: ");
++#endif
++      address addr = JNI_FastGetField::find_slowcase_pc(pc);
++      if (addr != (address)-1) {
++        stub = addr;
++      }
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("addr = %d, stub = %lx", addr, stub);
++#endif
++    }
++
++    // Check to see if we caught the safepoint code in the
++    // process of write protecting the memory serialization page.
++    // It write enables the page immediately after protecting it
++    // so we can just return to retry the write.
++    if ((sig == SIGSEGV) &&
++        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("write protecting the memory serialiazation page\n");
++#endif
++      // Block current thread until the memory serialize page permission restored.
++      os::block_on_serialize_page_trap();
++      return true;
++    }
++  }
++
++  // Execution protection violation
++  //
++  // This should be kept as the last step in the triage.  We don't
++  // have a dedicated trap number for a no-execute fault, so be
++  // conservative and allow other handlers the first shot.
++  //
++  // Note: We don't test that info->si_code == SEGV_ACCERR here.
++  // this si_code is so generic that it is almost meaningless; and
++  // the si_code for this condition may change in the future.
++  // Furthermore, a false-positive should be harmless.
++  if (UnguardOnExecutionViolation > 0 &&
++      //(sig == SIGSEGV || sig == SIGBUS) &&
++      //uc->uc_mcontext.gregs[REG_TRAPNO] == trap_page_fault) {
++    (sig == SIGSEGV || sig == SIGBUS
++#ifdef OPT_RANGECHECK
++     || sig == SIGSYS
++#endif
++    ) &&
++      //(uc->uc_mcontext.cause == 2 || uc->uc_mcontext.cause == 3)) {
++      (uc->uc_mcontext.hi1 == 2 || uc->uc_mcontext.hi1 == 3)) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("execution protection violation\n");
++#endif
++
++    int page_size = os::vm_page_size();
++    address addr = (address) info->si_addr;
++    address pc = os::Linux::ucontext_get_pc(uc);
++    // Make sure the pc and the faulting address are sane.
++    //
++    // If an instruction spans a page boundary, and the page containing
++    // the beginning of the instruction is executable but the following
++    // page is not, the pc and the faulting address might be slightly
++    // different - we still want to unguard the 2nd page in this case.
++    //
++    // 15 bytes seems to be a (very) safe value for max instruction size.
++    bool pc_is_near_addr =
++      (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15);
++Untested("Unimplemented yet");
++    bool instr_spans_page_boundary =
++/*
++      (align_size_down((intptr_t) pc ^ (intptr_t) addr,
++                       (intptr_t) page_size) > 0);
++*/
++      (align_down((intptr_t) pc ^ (intptr_t) addr,
++                       (intptr_t) page_size) > 0);
++
++    if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) {
++      static volatile address last_addr =
++        (address) os::non_memory_address_word();
++
++      // In conservative mode, don't unguard unless the address is in the VM
++      if (addr != last_addr &&
++          (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) {
++
++        // Set memory to RWX and retry
++Untested("Unimplemented yet");
++/*
++        address page_start =
++          (address) align_size_down((intptr_t) addr, (intptr_t) page_size);
++*/
++        address page_start = align_down(addr, page_size);
++        bool res = os::protect_memory((char*) page_start, page_size,
++                                      os::MEM_PROT_RWX);
++
++        if (PrintMiscellaneous && Verbose) {
++          char buf[256];
++          jio_snprintf(buf, sizeof(buf), "Execution protection violation "
++                       "at " INTPTR_FORMAT
++                       ", unguarding " INTPTR_FORMAT ": %s, errno=%d", addr,
++                       page_start, (res ? "success" : "failed"), errno);
++          tty->print_raw_cr(buf);
++        }
++        stub = pc;
++
++        // Set last_addr so if we fault again at the same address, we don't end
++        // up in an endless loop.
++        //
++        // There are two potential complications here.  Two threads trapping at
++        // the same address at the same time could cause one of the threads to
++        // think it already unguarded, and abort the VM.  Likely very rare.
++        //
++        // The other race involves two threads alternately trapping at
++        // different addresses and failing to unguard the page, resulting in
++        // an endless loop.  This condition is probably even more unlikely than
++        // the first.
++        //
++        // Although both cases could be avoided by using locks or thread local
++        // last_addr, these solutions are unnecessary complication: this
++        // handler is a best-effort safety net, not a complete solution.  It is
++        // disabled by default and should only be used as a workaround in case
++        // we missed any no-execute-unsafe VM code.
++
++        last_addr = addr;
++      }
++    }
++  }
++
++  if (stub != NULL) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("resolved stub=%lx\n",stub);
++#endif
++    // save all thread context in case we need to restore it
++    if (thread != NULL) thread->set_saved_exception_pc(pc);
++
++    os::Linux::ucontext_set_pc(uc, stub);
++    return true;
++  }
++
++  // signal-chaining
++  if (os::Linux::chained_handler(sig, info, ucVoid)) {
++#ifdef PRINT_SIGNAL_HANDLE
++     tty->print_cr("signal chaining\n");
++#endif
++     return true;
++  }
++
++  if (!abort_if_unrecognized) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("abort becauce of unrecognized\n");
++#endif
++    // caller wants another chance, so give it to him
++    return false;
++  }
++
++  if (pc == NULL && uc != NULL) {
++    pc = os::Linux::ucontext_get_pc(uc);
++  }
++
++  // unmask current signal
++  sigset_t newset;
++  sigemptyset(&newset);
++  sigaddset(&newset, sig);
++  sigprocmask(SIG_UNBLOCK, &newset, NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("VMError in signal handler\n");
++#endif
++  VMError::report_and_die(t, sig, pc, info, ucVoid);
++
++  ShouldNotReachHere();
++  return true; // Mute compiler
++}
++
++// FCSR:...|24| 23 |22|21|...
++//      ...|FS|FCC0|FO|FN|...
++void os::Linux::init_thread_fpu_state(void) {
++  if (SetFSFOFN == 999)
++    return;
++  int fs = (SetFSFOFN / 100)? 1:0;
++  int fo = ((SetFSFOFN % 100) / 10)? 1:0;
++  int fn = (SetFSFOFN % 10)? 1:0;
++  int mask = fs << 24 | fo << 22 | fn << 21;
++
++  int fcsr = get_fpu_control_word();
++  fcsr = fcsr | mask;
++  set_fpu_control_word(fcsr);
++  /*
++  if (fcsr != get_fpu_control_word())
++    tty->print_cr(" fail to set to %lx, get_fpu_control_word:%lx", fcsr, get_fpu_control_word());
++  */
++}
++
++int os::Linux::get_fpu_control_word(void) {
++  int fcsr;
++  __asm__ __volatile__ (
++      ".set noat;"
++      "daddiu  %0, $0, 0;"
++      "cfc1 %0, $31;"
++      : "=r" (fcsr)
++      );
++  return fcsr;
++}
++
++void os::Linux::set_fpu_control_word(int fpu_control) {
++  __asm__ __volatile__ (
++      ".set noat;"
++      "ctc1 %0, $31;"
++      :
++      : "r" (fpu_control)
++      );
++}
++
++bool os::is_allocatable(size_t bytes) {
++
++  if (bytes < 2 * G) {
++    return true;
++  }
++
++  char* addr = reserve_memory(bytes, NULL);
++
++  if (addr != NULL) {
++    release_memory(addr, bytes);
++  }
++
++  return addr != NULL;
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// thread stack
++
++//size_t os::Linux::min_stack_allowed  = 96 * K;
++size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K;
++size_t os::Posix::_java_thread_min_stack_allowed = 40 * K;
++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K;
++
++
++/*
++// Test if pthread library can support variable thread stack size. LinuxThreads
++// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads
++// in floating stack mode and NPTL support variable stack size.
++bool os::Linux::supports_variable_stack_size() {
++  if (os::Linux::is_NPTL()) {
++     // NPTL, yes
++     return true;
++
++  } else {
++    // Note: We can't control default stack size when creating a thread.
++    // If we use non-default stack size (pthread_attr_setstacksize), both
++    // floating stack and non-floating stack LinuxThreads will return the
++    // same value. This makes it impossible to implement this function by
++    // detecting thread stack size directly.
++    //
++    // An alternative approach is to check %gs. Fixed-stack LinuxThreads
++    // do not use %gs, so its value is 0. Floating-stack LinuxThreads use
++    // %gs (either as LDT selector or GDT selector, depending on kernel)
++    // to access thread specific data.
++    //
++    // Note that %gs is a reserved glibc register since early 2001, so
++    // applications are not allowed to change its value (Ulrich Drepper from
++    // Redhat confirmed that all known offenders have been modified to use
++    // either %fs or TSD). In the worst case scenario, when VM is embedded in
++    // a native application that plays with %gs, we might see non-zero %gs
++    // even LinuxThreads is running in fixed stack mode. As the result, we'll
++    // return true and skip _thread_safety_check(), so we may not be able to
++    // detect stack-heap collisions. But otherwise it's harmless.
++    //
++    return false;
++  }
++}
++*/
++
++// Return default stack size for thr_type
++size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
++  // Default stack size (compiler thread needs larger stack)
++  size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K);
++  return s;
++}
++
++/////////////////////////////////////////////////////////////////////////////
++// helper functions for fatal error handler
++void os::print_register_info(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  ucontext_t *uc = (ucontext_t*)context;
++
++  st->print_cr("Register to memory mapping:");
++  st->cr();
++  // this is horrendously verbose but the layout of the registers in the
++  //   // context does not match how we defined our abstract Register set, so
++  //     // we can't just iterate through the gregs area
++  //
++  //       // this is only for the "general purpose" registers
++  st->print("R0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[0]);
++  st->print("AT=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[1]);
++  st->print("V0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[2]);
++  st->print("V1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[3]);
++  st->cr();
++  st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[4]);
++  st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[5]);
++  st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[6]);
++  st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[7]);
++  st->cr();
++  st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[8]);
++  st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[9]);
++  st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[10]);
++  st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[11]);
++  st->cr();
++  st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[12]);
++  st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[13]);
++  st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[14]);
++  st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[15]);
++  st->cr();
++  st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[16]);
++  st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[17]);
++  st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[18]);
++  st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[19]);
++  st->cr();
++  st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[20]);
++  st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[21]);
++  st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[22]);
++  st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[23]);
++  st->cr();
++  st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[24]);
++  st->print("T9=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[25]);
++  st->print("K0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[26]);
++  st->print("K1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[27]);
++  st->cr();
++  st->print("GP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[28]);
++  st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[29]);
++  st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[30]);
++  st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[31]);
++  st->cr();
++
++}
++
++void os::print_context(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  const ucontext_t *uc = (const ucontext_t*)context;
++  st->print_cr("Registers:");
++  st->print(  "R0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[0]);
++  st->print(", AT=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[1]);
++  st->print(", V0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[2]);
++  st->print(", V1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[3]);
++  st->cr();
++  st->print(  "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[4]);
++  st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[5]);
++  st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[6]);
++  st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[7]);
++  st->cr();
++  st->print(  "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[8]);
++  st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[9]);
++  st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[10]);
++  st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[11]);
++  st->cr();
++  st->print(  "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[12]);
++  st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[13]);
++  st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[14]);
++  st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[15]);
++  st->cr();
++  st->print(  "S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[16]);
++  st->print(", S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[17]);
++  st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[18]);
++  st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[19]);
++  st->cr();
++  st->print(  "S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[20]);
++  st->print(", S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[21]);
++  st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[22]);
++  st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[23]);
++  st->cr();
++  st->print(  "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[24]);
++  st->print(", T9=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[25]);
++  st->print(", K0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[26]);
++  st->print(", K1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[27]);
++  st->cr();
++  st->print(  "GP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[28]);
++  st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[29]);
++  st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[30]);
++  st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[31]);
++  st->cr();
++  st->cr();
++
++  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
++  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp));
++  print_hex_dump(st, (address)(sp - 32), (address)(sp + 32), sizeof(intptr_t));
++  st->cr();
++
++  // Note: it may be unsafe to inspect memory near pc. For example, pc may
++  // point to garbage if entry point in an nmethod is corrupted. Leave
++  // this at the end, and hope for the best.
++  address pc = os::Linux::ucontext_get_pc(uc);
++  st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc));
++  print_hex_dump(st, pc - 64, pc + 64, sizeof(char));
++  Disassembler::decode(pc - 80, pc + 80, st);
++}
++
++void os::setup_fpu() {
++  /*
++  //no use for MIPS
++  int fcsr;
++  address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std();
++  __asm__ __volatile__ (
++      ".set noat;"
++      "cfc1 %0, $31;"
++      "sw   %0, 0(%1);"
++      : "=r" (fcsr)
++      : "r" (fpu_cntrl)
++      : "memory"
++  );
++  printf("fpu_cntrl:  %lx\n", fpu_cntrl);
++  */
++}
++
++#ifndef PRODUCT
++void os::verify_stack_alignment() {
++  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
++}
++#endif
++
++int os::extra_bang_size_in_bytes() {
++  // MIPS does not require the additional stack bang.
++  return 0;
++}
++
++bool os::is_ActiveCoresMP() {
++  return UseActiveCoresMP && _initial_active_processor_count == 1;
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp
+--- a/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,39 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
++
++  static void setup_fpu();
++  static bool is_allocatable(size_t bytes);
++  static intptr_t *get_previous_fp();
++
++  // Used to register dynamic code cache area with the OS
++  // Note: Currently only used in 64 bit Windows implementations
++  static bool register_code_area(char *low, char *high) { return true; }
++
++  static bool is_ActiveCoresMP();
++
++#endif // OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp
+--- a/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,58 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
++
++
++inline void Prefetch::read (void *loc, intx interval) {
++        // 'pref' is implemented as NOP in Loongson 3A
++        __asm__ __volatile__ (
++                        "               .set push\n"
++                        "               .set mips32\n"
++                        "               .set noreorder\n"
++                        "       pref  0, 0(%[__loc]) \n"
++                        "       .set pop\n"
++                        : [__loc] "=&r"(loc)
++                        :
++                        : "memory"
++                        );
++}
++
++inline void Prefetch::write(void *loc, intx interval) {
++        __asm__ __volatile__ (
++                        "               .set push\n"
++                        "               .set mips32\n"
++                        "               .set noreorder\n"
++                        "       pref  1, 0(%[__loc]) \n"
++                        "       .set pop\n"
++                        : [__loc] "=&r"(loc)
++                        :
++                        : "memory"
++                        );
++
++}
++
++#endif // OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp
+--- a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,117 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "compiler/compileBroker.hpp"
++#include "memory/metaspaceShared.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++
++void JavaThread::pd_initialize()
++{
++    _anchor.clear();
++}
++
++frame JavaThread::pd_last_frame() {
++  assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
++  if (_anchor.last_Java_pc() != NULL) {
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
++  } else {
++    // This will pick up pc from sp
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
++  }
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
++// currently interrupted by SIGPROF
++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
++  void* ucontext, bool isInJava) {
++
++  assert(Thread::current() == this, "caller must be current thread");
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++
++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
++  assert(this->is_Java_thread(), "must be JavaThread");
++  JavaThread* jt = (JavaThread *)this;
++
++  // If we have a last_Java_frame, then we should use it even if
++  // isInJava == true.  It should be more reliable than ucontext info.
++  if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
++    *fr_addr = jt->pd_last_frame();
++    return true;
++  }
++
++  // At this point, we don't have a last_Java_frame, so
++  // we try to glean some information out of the ucontext
++  // if we were running Java code when SIGPROF came in.
++  if (isInJava) {
++    ucontext_t* uc = (ucontext_t*) ucontext;
++
++    intptr_t* ret_fp;
++    intptr_t* ret_sp;
++    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
++      &ret_sp, &ret_fp);
++    if (addr.pc() == NULL || ret_sp == NULL ) {
++      // ucontext wasn't useful
++      return false;
++    }
++
++    if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) {
++      // In the middle of a trampoline call. Bail out for safety.
++      // This happens rarely so shouldn't affect profiling.
++      return false;
++    }
++
++    frame ret_frame(ret_sp, ret_fp, addr.pc());
++    if (!ret_frame.safe_for_sender(jt)) {
++#ifdef COMPILER2
++      // C2 and JVMCI use ebp as a general register see if NULL fp helps
++      frame ret_frame2(ret_sp, NULL, addr.pc());
++      if (!ret_frame2.safe_for_sender(jt)) {
++        // nothing else to try if the frame isn't good
++        return false;
++      }
++      ret_frame = ret_frame2;
++#else
++      // nothing else to try if the frame isn't good
++      return false;
++#endif // COMPILER2_OR_JVMCI
++    }
++    *fr_addr = ret_frame;
++    return true;
++  }
++
++  // nothing else to try
++  return false;
++}
++
++void JavaThread::cache_global_variables() { }
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp
+--- a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,66 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
++
++ private:
++  void pd_initialize();
++
++  frame pd_last_frame();
++
++ public:
++  // Mutators are highly dangerous....
++  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
++  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
++
++  void set_base_of_stack_pointer(intptr_t* base_sp) {
++  }
++
++  static ByteSize last_Java_fp_offset()          {
++    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
++  }
++
++  intptr_t* base_of_stack_pointer() {
++    return NULL;
++  }
++  void record_base_of_stack_pointer() {
++  }
++
++  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
++    bool isInJava);
++
++  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
++private:
++  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
++public:
++
++  // These routines are only used on cpu architectures that
++  // have separate register stacks (Itanium).
++  static bool register_stack_overflow() { return false; }
++  static void enable_register_stack_guard() {}
++  static void disable_register_stack_guard() {}
++
++#endif // OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp
+--- a/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
++
++// These are the OS and CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* Threads (NOTE: incomplete) */                                                                                                   \
++  /******************************/                                                                                                   \
++  nonstatic_field(OSThread,                      _thread_id,                                      pid_t)                             \
++  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
++
++
++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
++                                                                          \
++  /**********************/                                                \
++  /* Posix Thread IDs   */                                                \
++  /**********************/                                                \
++                                                                          \
++  declare_integer_type(pid_t)                                             \
++  declare_unsigned_integer_type(pthread_t)
++
++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#endif // OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp
+--- a/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp	1970-01-01 08:00:00.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp	2024-01-30 10:00:11.931430657 +0800
+@@ -0,0 +1,28 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/os.hpp"
++#include "runtime/vm_version.hpp"
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp
+--- a/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp	2024-01-30 10:00:11.934763950 +0800
+@@ -85,4 +85,6 @@
+ 
+ const size_t    ZPlatformCacheLineSize        = 64;
+ 
++const bool      ZPlatformLoadBarrierTestResultInRegister = false;
++
+ #endif // OS_CPU_LINUX_X86_ZGLOBALS_LINUX_X86_HPP
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp
+--- a/src/hotspot/share/asm/codeBuffer.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/asm/codeBuffer.cpp	2024-01-30 10:00:11.944763831 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023. These
++ * modifications are Copyright (c) 2018, 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "asm/codeBuffer.hpp"
+ #include "compiler/disassembler.hpp"
+@@ -351,6 +357,7 @@
+     assert(rtype == relocInfo::none              ||
+            rtype == relocInfo::runtime_call_type ||
+            rtype == relocInfo::internal_word_type||
++           NOT_ZERO(MIPS64_ONLY(rtype == relocInfo::internal_pc_type ||))
+            rtype == relocInfo::section_word_type ||
+            rtype == relocInfo::external_word_type,
+            "code needs relocation information");
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_Compiler.cpp b/src/hotspot/share/c1/c1_Compiler.cpp
+--- a/src/hotspot/share/c1/c1_Compiler.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/c1/c1_Compiler.cpp	2024-01-30 10:00:11.944763831 +0800
+@@ -44,6 +44,12 @@
+ #include "utilities/bitMap.inline.hpp"
+ #include "utilities/macros.hpp"
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ 
+ Compiler::Compiler() : AbstractCompiler(compiler_c1) {
+ }
+@@ -211,7 +217,7 @@
+   case vmIntrinsics::_updateCRC32:
+   case vmIntrinsics::_updateBytesCRC32:
+   case vmIntrinsics::_updateByteBufferCRC32:
+-#if defined(SPARC) || defined(S390) || defined(PPC64) || defined(AARCH64)
++#if defined(SPARC) || defined(S390) || defined(PPC64) || defined(AARCH64) || defined(LOONGARCH64)
+   case vmIntrinsics::_updateBytesCRC32C:
+   case vmIntrinsics::_updateDirectByteBufferCRC32C:
+ #endif
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
+--- a/src/hotspot/share/c1/c1_LinearScan.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/c1/c1_LinearScan.cpp	2024-01-30 10:00:11.948097125 +0800
+@@ -35,6 +35,12 @@
+ #include "runtime/timerTrace.hpp"
+ #include "utilities/bitMap.inline.hpp"
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef PRODUCT
+ 
+   static LinearScanStatistic _stat_before_alloc;
+@@ -1258,6 +1264,23 @@
+       }
+       break;
+     }
++    case lir_cmp_cmove: {
++      assert(op->as_Op4() != NULL, "lir_cmp_cmove must be LIR_Op4");
++      LIR_Op4* cmove = (LIR_Op4*)op;
++
++      LIR_Opr move_from = cmove->in_opr3();
++      LIR_Opr move_to = cmove->result_opr();
++
++      if (move_to->is_register() && move_from->is_register()) {
++        Interval* from = interval_at(reg_num(move_from));
++        Interval* to = interval_at(reg_num(move_to));
++        if (from != NULL && to != NULL) {
++          to->set_register_hint(from);
++          TRACE_LINEAR_SCAN(4, tty->print_cr("operation at op_id %d: added hint from interval %d to %d", cmove->id(), from->reg_num(), to->reg_num()));
++        }
++      }
++      break;
++    }
+     default:
+       break;
+   }
+@@ -3350,7 +3373,9 @@
+           check_live = (move->patch_code() == lir_patch_none);
+         }
+         LIR_OpBranch* branch = op->as_OpBranch();
+-        if (branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) {
++        LIR_OpCmpBranch* cmp_branch = op->as_OpCmpBranch();
++        if ((branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) ||
++            (cmp_branch != NULL && cmp_branch->stub() != NULL && cmp_branch->stub()->is_exception_throw_stub())) {
+           // Don't bother checking the stub in this case since the
+           // exception stub will never return to normal control flow.
+           check_live = false;
+@@ -6206,6 +6231,16 @@
+       if (branch->ublock() == target_from) {
+         branch->change_ublock(target_to);
+       }
++    } else if (op->code() == lir_cmp_branch || op->code() == lir_cmp_float_branch) {
++      assert(op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch");
++      LIR_OpCmpBranch* branch = (LIR_OpCmpBranch*)op;
++
++      if (branch->block() == target_from) {
++        branch->change_block(target_to);
++      }
++      if (branch->ublock() == target_from) {
++        branch->change_ublock(target_to);
++      }
+     }
+   }
+ }
+@@ -6328,6 +6363,20 @@
+                 }
+               }
+             }
++          } else if (prev_op->code() == lir_cmp_branch || prev_op->code() == lir_cmp_float_branch) {
++            assert(prev_op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch");
++            LIR_OpCmpBranch* prev_branch = (LIR_OpCmpBranch*)prev_op;
++
++            if (prev_branch->stub() == NULL) {
++              if (prev_branch->block() == code->at(i + 1) && prev_branch->info() == NULL) {
++                TRACE_LINEAR_SCAN(3, tty->print_cr("Negating conditional branch and deleting unconditional branch at end of block B%d", block->block_id()));
++
++                // eliminate a conditional branch to the immediate successor
++                prev_branch->change_block(last_branch->block());
++                prev_branch->negate_cond();
++                instructions->trunc_to(instructions->length() - 1);
++              }
++            }
+           }
+         }
+       }
+@@ -6403,6 +6452,13 @@
+         assert(op_branch->block() == NULL || code->find(op_branch->block()) != -1, "branch target not valid");
+         assert(op_branch->ublock() == NULL || code->find(op_branch->ublock()) != -1, "branch target not valid");
+       }
++
++      LIR_OpCmpBranch* op_cmp_branch = instructions->at(j)->as_OpCmpBranch();
++
++      if (op_cmp_branch != NULL) {
++        assert(op_cmp_branch->block() == NULL || code->find(op_cmp_branch->block()) != -1, "branch target not valid");
++        assert(op_cmp_branch->ublock() == NULL || code->find(op_cmp_branch->ublock()) != -1, "branch target not valid");
++      }
+     }
+ 
+     for (j = 0; j < block->number_of_sux() - 1; j++) {
+@@ -6647,6 +6703,24 @@
+           break;
+         }
+ 
++        case lir_cmp_branch:
++        case lir_cmp_float_branch: {
++          LIR_OpCmpBranch* branch = op->as_OpCmpBranch();
++          if (branch->block() == NULL) {
++            inc_counter(counter_stub_branch);
++          } else {
++            inc_counter(counter_cond_branch);
++          }
++          inc_counter(counter_cmp);
++          break;
++        }
++
++        case lir_cmp_cmove: {
++          inc_counter(counter_misc_inst);
++          inc_counter(counter_cmp);
++          break;
++        }
++
+         case lir_neg:
+         case lir_add:
+         case lir_sub:
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp
+--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp	2024-01-30 10:00:11.948097125 +0800
+@@ -777,6 +777,18 @@
+ }
+ 
+ 
++void LIR_Assembler::emit_op4(LIR_Op4* op) {
++  switch (op->code()) {
++    case lir_cmp_cmove:
++      cmp_cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->in_opr3(), op->in_opr4(), op->result_opr(), op->type());
++      break;
++
++    default:
++      Unimplemented();
++      break;
++  }
++}
++
+ void LIR_Assembler::build_frame() {
+   _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+ }
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp
+--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp	2024-01-30 10:00:11.948097125 +0800
+@@ -190,7 +190,9 @@
+   void emit_op1(LIR_Op1* op);
+   void emit_op2(LIR_Op2* op);
+   void emit_op3(LIR_Op3* op);
++  void emit_op4(LIR_Op4* op);
+   void emit_opBranch(LIR_OpBranch* op);
++  void emit_opCmpBranch(LIR_OpCmpBranch* op);
+   void emit_opLabel(LIR_OpLabel* op);
+   void emit_arraycopy(LIR_OpArrayCopy* op);
+   void emit_updatecrc32(LIR_OpUpdateCRC32* op);
+@@ -223,6 +225,7 @@
+   void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);  // info set for null exceptions
+   void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op);
+   void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type);
++  void cmp_cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type);
+ 
+   void call(        LIR_OpJavaCall* op, relocInfo::relocType rtype);
+   void ic_call(     LIR_OpJavaCall* op);
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
+--- a/src/hotspot/share/c1/c1_LIR.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/c1/c1_LIR.cpp	2024-01-30 10:00:11.948097125 +0800
+@@ -250,6 +250,18 @@
+ #endif
+ }
+ 
++void LIR_Op4::verify() const {
++#ifdef ASSERT
++  switch (code()) {
++    case lir_cmp_cmove:
++      break;
++
++    default:
++      assert(!result_opr()->is_register() || !result_opr()->is_oop_register(),
++             "can't produce oops from arith");
++  }
++#endif
++}
+ 
+ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block)
+   : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+@@ -308,6 +320,56 @@
+ }
+ 
+ 
++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info)
++  : LIR_Op2(lir_cmp_branch, cond, left, right, info)
++  , _label(stub->entry())
++  , _block(NULL)
++  , _ublock(NULL)
++  , _stub(stub) {
++}
++
++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info)
++  : LIR_Op2(lir_cmp_branch, cond, left, right, info)
++  , _label(block->label())
++  , _block(block)
++  , _ublock(NULL)
++  , _stub(NULL) {
++}
++
++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info)
++  : LIR_Op2(lir_cmp_float_branch, cond, left, right, info)
++  , _label(block->label())
++  , _block(block)
++  , _ublock(ublock)
++  , _stub(NULL) {
++}
++
++void LIR_OpCmpBranch::change_block(BlockBegin* b) {
++  assert(_block != NULL, "must have old block");
++  assert(_block->label() == label(), "must be equal");
++
++  _block = b;
++  _label = b->label();
++}
++
++void LIR_OpCmpBranch::change_ublock(BlockBegin* b) {
++  assert(_ublock != NULL, "must have old block");
++
++  _ublock = b;
++}
++
++void LIR_OpCmpBranch::negate_cond() {
++  switch (condition()) {
++    case lir_cond_equal:        set_condition(lir_cond_notEqual);     break;
++    case lir_cond_notEqual:     set_condition(lir_cond_equal);        break;
++    case lir_cond_less:         set_condition(lir_cond_greaterEqual); break;
++    case lir_cond_lessEqual:    set_condition(lir_cond_greater);      break;
++    case lir_cond_greaterEqual: set_condition(lir_cond_less);         break;
++    case lir_cond_greater:      set_condition(lir_cond_lessEqual);    break;
++    default: ShouldNotReachHere();
++  }
++}
++
+ LIR_OpTypeCheck::LIR_OpTypeCheck(LIR_Code code, LIR_Opr result, LIR_Opr object, ciKlass* klass,
+                                  LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3,
+                                  bool fast_check, CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch,
+@@ -509,10 +571,7 @@
+       assert(opConvert->_info == NULL, "must be");
+       if (opConvert->_opr->is_valid())       do_input(opConvert->_opr);
+       if (opConvert->_result->is_valid())    do_output(opConvert->_result);
+-#ifdef PPC32
+-      if (opConvert->_tmp1->is_valid())      do_temp(opConvert->_tmp1);
+-      if (opConvert->_tmp2->is_valid())      do_temp(opConvert->_tmp2);
+-#endif
++      if (opConvert->_tmp->is_valid())       do_temp(opConvert->_tmp);
+       do_stub(opConvert->_stub);
+ 
+       break;
+@@ -611,6 +670,25 @@
+       break;
+     }
+ 
++// LIR_OpCmpBranch;
++    case lir_cmp_branch:               // may have info, input and result register always invalid
++    case lir_cmp_float_branch:         // may have info, input and result register always invalid
++    {
++      assert(op->as_OpCmpBranch() != NULL, "must be");
++      LIR_OpCmpBranch* opCmpBranch = (LIR_OpCmpBranch*)op;
++      assert(opCmpBranch->_tmp2->is_illegal() && opCmpBranch->_tmp3->is_illegal() &&
++             opCmpBranch->_tmp4->is_illegal() && opCmpBranch->_tmp5->is_illegal(), "not used");
++
++      if (opCmpBranch->_info)               do_info(opCmpBranch->_info);
++      if (opCmpBranch->_opr1->is_valid())   do_input(opCmpBranch->_opr1);
++      if (opCmpBranch->_opr2->is_valid())   do_input(opCmpBranch->_opr2);
++      if (opCmpBranch->_tmp1->is_valid())   do_temp(opCmpBranch->_tmp1);
++      if (opCmpBranch->_stub != NULL)       opCmpBranch->stub()->visit(this);
++      assert(opCmpBranch->_result->is_illegal(), "not used");
++
++      break;
++    }
++
+     // special handling for cmove: right input operand must not be equal
+     // to the result operand, otherwise the backend fails
+     case lir_cmove:
+@@ -711,6 +789,29 @@
+       break;
+     }
+ 
++// LIR_Op4
++    // special handling for cmp cmove: src2(opr4) operand must not be equal
++    // to the result operand, otherwise the backend fails
++    case lir_cmp_cmove:
++    {
++      assert(op->as_Op4() != NULL, "must be");
++      LIR_Op4* op4 = (LIR_Op4*)op;
++
++      assert(op4->_info == NULL, "not used");
++      assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() &&
++             op4->_opr3->is_valid() && op4->_opr4->is_valid() &&
++             op4->_result->is_valid(), "used");
++
++      do_input(op4->_opr1);
++      do_input(op4->_opr2);
++      do_input(op4->_opr3);
++      do_input(op4->_opr4);
++      do_temp(op4->_opr4);
++      do_output(op4->_result);
++
++      break;
++    }
++
+ // LIR_OpJavaCall
+     case lir_static_call:
+     case lir_optvirtual_call:
+@@ -1028,6 +1129,13 @@
+   masm->emit_op2(this);
+ }
+ 
++void LIR_OpCmpBranch::emit_code(LIR_Assembler* masm) {
++  masm->emit_opCmpBranch(this);
++  if (stub()) {
++    masm->append_code_stub(stub());
++  }
++}
++
+ void LIR_OpAllocArray::emit_code(LIR_Assembler* masm) {
+   masm->emit_alloc_array(this);
+   masm->append_code_stub(stub());
+@@ -1048,6 +1156,10 @@
+   masm->emit_op3(this);
+ }
+ 
++void LIR_Op4::emit_code(LIR_Assembler* masm) {
++  masm->emit_op4(this);
++}
++
+ void LIR_OpLock::emit_code(LIR_Assembler* masm) {
+   masm->emit_lock(this);
+   if (stub()) {
+@@ -1424,8 +1536,7 @@
+   if (deoptimize_on_null) {
+     // Emit an explicit null check and deoptimize if opr is null
+     CodeStub* deopt = new DeoptimizeStub(info, Deoptimization::Reason_null_check, Deoptimization::Action_none);
+-    cmp(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL));
+-    branch(lir_cond_equal, T_OBJECT, deopt);
++    cmp_branch(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL), T_OBJECT, deopt);
+   } else {
+     // Emit an implicit null check
+     append(new LIR_Op1(lir_null_check, opr, info));
+@@ -1680,6 +1791,8 @@
+      case lir_cmp_l2i:               s = "cmp_l2i";       break;
+      case lir_ucmp_fd2i:             s = "ucomp_fd2i";    break;
+      case lir_cmp_fd2i:              s = "comp_fd2i";     break;
++     case lir_cmp_branch:            s = "cmp_branch";    break;
++     case lir_cmp_float_branch:      s = "cmp_fbranch";   break;
+      case lir_cmove:                 s = "cmove";         break;
+      case lir_add:                   s = "add";           break;
+      case lir_sub:                   s = "sub";           break;
+@@ -1705,6 +1818,8 @@
+      case lir_irem:                  s = "irem";          break;
+      case lir_fmad:                  s = "fmad";          break;
+      case lir_fmaf:                  s = "fmaf";          break;
++     // LIR_Op4
++     case lir_cmp_cmove:             s = "cmp_cmove";     break;
+      // LIR_OpJavaCall
+      case lir_static_call:           s = "static";        break;
+      case lir_optvirtual_call:       s = "optvirtual";    break;
+@@ -1856,6 +1971,26 @@
+   }
+ }
+ 
++// LIR_OpCmpBranch
++void LIR_OpCmpBranch::print_instr(outputStream* out) const {
++  print_condition(out, condition());        out->print(" ");
++  in_opr1()->print(out);    out->print(" ");
++  in_opr2()->print(out);    out->print(" ");
++  if (block() != NULL) {
++    out->print("[B%d] ", block()->block_id());
++  } else if (stub() != NULL) {
++    out->print("[");
++    stub()->print_name(out);
++    out->print(": " INTPTR_FORMAT "]", p2i(stub()));
++    if (stub()->info() != NULL) out->print(" [bci:%d]", stub()->info()->stack()->bci());
++  } else {
++    out->print("[label:" INTPTR_FORMAT "] ", p2i(label()));
++  }
++  if (ublock() != NULL) {
++    out->print("unordered: [B%d] ", ublock()->block_id());
++  }
++}
++
+ void LIR_Op::print_condition(outputStream* out, LIR_Condition cond) {
+   switch(cond) {
+     case lir_cond_equal:           out->print("[EQ]");      break;
+@@ -1876,12 +2011,9 @@
+   print_bytecode(out, bytecode());
+   in_opr()->print(out);                  out->print(" ");
+   result_opr()->print(out);              out->print(" ");
+-#ifdef PPC32
+-  if(tmp1()->is_valid()) {
+-    tmp1()->print(out); out->print(" ");
+-    tmp2()->print(out); out->print(" ");
++  if(tmp()->is_valid()) {
++    tmp()->print(out);                   out->print(" ");
+   }
+-#endif
+ }
+ 
+ void LIR_OpConvert::print_bytecode(outputStream* out, Bytecodes::Code code) {
+@@ -1978,6 +2110,19 @@
+   result_opr()->print(out);
+ }
+ 
++
++// LIR_Op4
++void LIR_Op4::print_instr(outputStream* out) const {
++  if (code() == lir_cmp_cmove) {
++    print_condition(out, condition());         out->print(" ");
++  }
++  in_opr1()->print(out);    out->print(" ");
++  in_opr2()->print(out);    out->print(" ");
++  in_opr3()->print(out);    out->print(" ");
++  in_opr4()->print(out);    out->print(" ");
++  result_opr()->print(out);
++}
++
+ 
+ void LIR_OpLock::print_instr(outputStream* out) const {
+   hdr_opr()->print(out);   out->print(" ");
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp
+--- a/src/hotspot/share/c1/c1_LIRGenerator.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp	2024-01-30 10:00:11.948097125 +0800
+@@ -480,13 +480,11 @@
+                                     CodeEmitInfo* null_check_info, CodeEmitInfo* range_check_info) {
+   CodeStub* stub = new RangeCheckStub(range_check_info, index, array);
+   if (index->is_constant()) {
+-    cmp_mem_int(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(),
+-                index->as_jint(), null_check_info);
+-    __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch
+-  } else {
+-    cmp_reg_mem(lir_cond_aboveEqual, index, array,
+-                arrayOopDesc::length_offset_in_bytes(), T_INT, null_check_info);
+-    __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch
++    cmp_mem_int_branch(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(),
++                       index->as_jint(), stub, null_check_info); // forward branch
++  } else {
++    cmp_reg_mem_branch(lir_cond_aboveEqual, index, array, arrayOopDesc::length_offset_in_bytes(),
++                       T_INT, stub, null_check_info); // forward branch
+   }
+ }
+ 
+@@ -494,12 +492,11 @@
+ void LIRGenerator::nio_range_check(LIR_Opr buffer, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) {
+   CodeStub* stub = new RangeCheckStub(info, index);
+   if (index->is_constant()) {
+-    cmp_mem_int(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), info);
+-    __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch
++    cmp_mem_int_branch(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(),
++                       index->as_jint(), stub, info); // forward branch
+   } else {
+-    cmp_reg_mem(lir_cond_aboveEqual, index, buffer,
+-                java_nio_Buffer::limit_offset(), T_INT, info);
+-    __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch
++    cmp_reg_mem_branch(lir_cond_aboveEqual, index, buffer, java_nio_Buffer::limit_offset(),
++                       T_INT, stub, info); // forward branch
+   }
+   __ move(index, result);
+ }
+@@ -935,7 +932,7 @@
+   return tmp;
+ }
+ 
+-void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) {
++void LIRGenerator::profile_branch(If* if_instr, If::Condition cond, LIR_Opr left, LIR_Opr right) {
+   if (if_instr->should_profile()) {
+     ciMethod* method = if_instr->profiled_method();
+     assert(method != NULL, "method should be set if branch is profiled");
+@@ -956,10 +953,17 @@
+     __ metadata2reg(md->constant_encoding(), md_reg);
+ 
+     LIR_Opr data_offset_reg = new_pointer_register();
+-    __ cmove(lir_cond(cond),
+-             LIR_OprFact::intptrConst(taken_count_offset),
+-             LIR_OprFact::intptrConst(not_taken_count_offset),
+-             data_offset_reg, as_BasicType(if_instr->x()->type()));
++    if (left == LIR_OprFact::illegalOpr && right == LIR_OprFact::illegalOpr) {
++      __ cmove(lir_cond(cond),
++               LIR_OprFact::intptrConst(taken_count_offset),
++               LIR_OprFact::intptrConst(not_taken_count_offset),
++               data_offset_reg, as_BasicType(if_instr->x()->type()));
++    } else {
++      __ cmp_cmove(lir_cond(cond), left, right,
++                   LIR_OprFact::intptrConst(taken_count_offset),
++                   LIR_OprFact::intptrConst(not_taken_count_offset),
++                   data_offset_reg, as_BasicType(if_instr->x()->type()));
++    }
+ 
+     // MDO cells are intptr_t, so the data_reg width is arch-dependent.
+     LIR_Opr data_reg = new_pointer_register();
+@@ -1316,8 +1320,8 @@
+   }
+ 
+   __ move(new LIR_Address(rcvr.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), temp, info);
+-  __ cmp(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0));
+-  __ cmove(lir_cond_notEqual, LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN);
++  __ cmp_cmove(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0),
++               LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN);
+ }
+ 
+ 
+@@ -1599,8 +1603,8 @@
+ 
+   if (GenerateRangeChecks && needs_range_check) {
+     if (use_length) {
+-      __ cmp(lir_cond_belowEqual, length.result(), index.result());
+-      __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result(), array.result()));
++      CodeStub* stub = new RangeCheckStub(range_check_info, index.result(), array.result());
++      __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), T_INT, stub);
+     } else {
+       array_range_check(array.result(), index.result(), null_check_info, range_check_info);
+       // range_check also does the null check
+@@ -1778,12 +1782,9 @@
+     CodeEmitInfo* info = state_for(x);
+     CodeStub* stub = new RangeCheckStub(info, index.result());
+     if (index.result()->is_constant()) {
+-      cmp_mem_int(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), info);
+-      __ branch(lir_cond_belowEqual, T_INT, stub);
++      cmp_mem_int_branch(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), stub, info);
+     } else {
+-      cmp_reg_mem(lir_cond_aboveEqual, index.result(), buf.result(),
+-                  java_nio_Buffer::limit_offset(), T_INT, info);
+-      __ branch(lir_cond_aboveEqual, T_INT, stub);
++      cmp_reg_mem_branch(lir_cond_aboveEqual, index.result(), buf.result(), java_nio_Buffer::limit_offset(), T_INT, stub, info);
+     }
+     __ move(index.result(), result);
+   } else {
+@@ -1861,8 +1862,8 @@
+     } else if (use_length) {
+       // TODO: use a (modified) version of array_range_check that does not require a
+       //       constant length to be loaded to a register
+-      __ cmp(lir_cond_belowEqual, length.result(), index.result());
+-      __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result(), array.result()));
++      CodeStub* stub = new RangeCheckStub(range_check_info, index.result(), array.result());
++      __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), T_INT, stub);
+     } else {
+       array_range_check(array.result(), index.result(), null_check_info, range_check_info);
+       // The range check performs the null check, so clear it out for the load
+@@ -2235,19 +2236,14 @@
+     int high_key = one_range->high_key();
+     BlockBegin* dest = one_range->sux();
+     if (low_key == high_key) {
+-      __ cmp(lir_cond_equal, value, low_key);
+-      __ branch(lir_cond_equal, T_INT, dest);
++      __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest);
+     } else if (high_key - low_key == 1) {
+-      __ cmp(lir_cond_equal, value, low_key);
+-      __ branch(lir_cond_equal, T_INT, dest);
+-      __ cmp(lir_cond_equal, value, high_key);
+-      __ branch(lir_cond_equal, T_INT, dest);
++      __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest);
++      __ cmp_branch(lir_cond_equal, value, high_key, T_INT, dest);
+     } else {
+       LabelObj* L = new LabelObj();
+-      __ cmp(lir_cond_less, value, low_key);
+-      __ branch(lir_cond_less, T_INT, L->label());
+-      __ cmp(lir_cond_lessEqual, value, high_key);
+-      __ branch(lir_cond_lessEqual, T_INT, dest);
++      __ cmp_branch(lir_cond_less, value, low_key, T_INT, L->label());
++      __ cmp_branch(lir_cond_lessEqual, value, high_key, T_INT, dest);
+       __ branch_destination(L->label());
+     }
+   }
+@@ -2347,12 +2343,11 @@
+     __ move(LIR_OprFact::intptrConst(default_count_offset), data_offset_reg);
+     for (int i = 0; i < len; i++) {
+       int count_offset = md->byte_offset_of_slot(data, MultiBranchData::case_count_offset(i));
+-      __ cmp(lir_cond_equal, value, i + lo_key);
+       __ move(data_offset_reg, tmp_reg);
+-      __ cmove(lir_cond_equal,
+-               LIR_OprFact::intptrConst(count_offset),
+-               tmp_reg,
+-               data_offset_reg, T_INT);
++      __ cmp_cmove(lir_cond_equal, value, LIR_OprFact::intConst(i + lo_key),
++                   LIR_OprFact::intptrConst(count_offset),
++                   tmp_reg,
++                   data_offset_reg, T_INT);
+     }
+ 
+     LIR_Opr data_reg = new_pointer_register();
+@@ -2366,8 +2361,7 @@
+     do_SwitchRanges(create_lookup_ranges(x), value, x->default_sux());
+   } else {
+     for (int i = 0; i < len; i++) {
+-      __ cmp(lir_cond_equal, value, i + lo_key);
+-      __ branch(lir_cond_equal, T_INT, x->sux_at(i));
++      __ cmp_branch(lir_cond_equal, value, i + lo_key, T_INT, x->sux_at(i));
+     }
+     __ jump(x->default_sux());
+   }
+@@ -2405,12 +2399,11 @@
+     __ move(LIR_OprFact::intptrConst(default_count_offset), data_offset_reg);
+     for (int i = 0; i < len; i++) {
+       int count_offset = md->byte_offset_of_slot(data, MultiBranchData::case_count_offset(i));
+-      __ cmp(lir_cond_equal, value, x->key_at(i));
+       __ move(data_offset_reg, tmp_reg);
+-      __ cmove(lir_cond_equal,
+-               LIR_OprFact::intptrConst(count_offset),
+-               tmp_reg,
+-               data_offset_reg, T_INT);
++      __ cmp_cmove(lir_cond_equal, value, LIR_OprFact::intConst(x->key_at(i)),
++                   LIR_OprFact::intptrConst(count_offset),
++                   tmp_reg,
++                   data_offset_reg, T_INT);
+     }
+ 
+     LIR_Opr data_reg = new_pointer_register();
+@@ -2425,8 +2418,7 @@
+   } else {
+     int len = x->length();
+     for (int i = 0; i < len; i++) {
+-      __ cmp(lir_cond_equal, value, x->key_at(i));
+-      __ branch(lir_cond_equal, T_INT, x->sux_at(i));
++      __ cmp_branch(lir_cond_equal, value, x->key_at(i), T_INT, x->sux_at(i));
+     }
+     __ jump(x->default_sux());
+   }
+@@ -2936,8 +2928,8 @@
+   f_val.dont_load_item();
+   LIR_Opr reg = rlock_result(x);
+ 
+-  __ cmp(lir_cond(x->cond()), left.result(), right.result());
+-  __ cmove(lir_cond(x->cond()), t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type()));
++  __ cmp_cmove(lir_cond(x->cond()), left.result(), right.result(),
++               t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type()));
+ }
+ 
+ #ifdef JFR_HAVE_INTRINSICS
+@@ -2981,8 +2973,7 @@
+   __ move(LIR_OprFact::oopConst(NULL), result);
+   LIR_Opr jobj = new_register(T_METADATA);
+   __ move_wide(jobj_addr, jobj);
+-  __ cmp(lir_cond_equal, jobj, LIR_OprFact::metadataConst(0));
+-  __ branch(lir_cond_equal, T_OBJECT, L_end->label());
++  __ cmp_branch(lir_cond_equal, jobj, LIR_OprFact::metadataConst(0), T_OBJECT, L_end->label());
+ 
+   access_load(IN_NATIVE, T_OBJECT, LIR_OprFact::address(new LIR_Address(jobj, T_OBJECT)), result);
+ 
+@@ -3287,21 +3278,24 @@
+ 
+ void LIRGenerator::increment_backedge_counter_conditionally(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeEmitInfo* info, int left_bci, int right_bci, int bci) {
+   if (compilation()->count_backedges()) {
++    LIR_Opr step = new_register(T_INT);
++    LIR_Opr plus_one = LIR_OprFact::intConst(InvocationCounter::count_increment);
++    LIR_Opr zero = LIR_OprFact::intConst(0);
+ #if defined(X86) && !defined(_LP64)
+     // BEWARE! On 32-bit x86 cmp clobbers its left argument so we need a temp copy.
+     LIR_Opr left_copy = new_register(left->type());
+     __ move(left, left_copy);
+     __ cmp(cond, left_copy, right);
+-#else
+-    __ cmp(cond, left, right);
+-#endif
+-    LIR_Opr step = new_register(T_INT);
+-    LIR_Opr plus_one = LIR_OprFact::intConst(InvocationCounter::count_increment);
+-    LIR_Opr zero = LIR_OprFact::intConst(0);
+     __ cmove(cond,
+         (left_bci < bci) ? plus_one : zero,
+         (right_bci < bci) ? plus_one : zero,
+         step, left->type());
++#else
++    __ cmp_cmove(cond, left, right,
++                 (left_bci < bci) ? plus_one : zero,
++                 (right_bci < bci) ? plus_one : zero,
++                 step, left->type());
++#endif
+     increment_backedge_counter(info, step, bci);
+   }
+ }
+@@ -3340,8 +3334,7 @@
+     // DeoptimizeStub will reexecute from the current state in code info.
+     CodeStub* deopt = new DeoptimizeStub(info, Deoptimization::Reason_tenured,
+                                          Deoptimization::Action_make_not_entrant);
+-    __ cmp(lir_cond_lessEqual, result, LIR_OprFact::intConst(0));
+-    __ branch(lir_cond_lessEqual, T_INT, deopt);
++    __ cmp_branch(lir_cond_lessEqual, result, LIR_OprFact::intConst(0), T_INT, deopt);
+   }
+ }
+ 
+@@ -3387,8 +3380,7 @@
+     int freq = frequency << InvocationCounter::count_shift;
+     if (freq == 0) {
+       if (!step->is_constant()) {
+-        __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0));
+-        __ branch(lir_cond_notEqual, T_ILLEGAL, overflow);
++        __ cmp_branch(lir_cond_notEqual, step, LIR_OprFact::intConst(0), T_ILLEGAL, overflow);
+       } else {
+         __ branch(lir_cond_always, T_ILLEGAL, overflow);
+       }
+@@ -3396,12 +3388,11 @@
+       LIR_Opr mask = load_immediate(freq, T_INT);
+       if (!step->is_constant()) {
+         // If step is 0, make sure the overflow check below always fails
+-        __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0));
+-        __ cmove(lir_cond_notEqual, result, LIR_OprFact::intConst(InvocationCounter::count_increment), result, T_INT);
++        __ cmp_cmove(lir_cond_notEqual, step, LIR_OprFact::intConst(0),
++                     result, LIR_OprFact::intConst(InvocationCounter::count_increment), result, T_INT);
+       }
+       __ logical_and(result, mask, result);
+-      __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0));
+-      __ branch(lir_cond_equal, T_INT, overflow);
++      __ cmp_branch(lir_cond_equal, result, LIR_OprFact::intConst(0), T_INT, overflow);
+     }
+     __ branch_destination(overflow->continuation());
+   }
+@@ -3514,8 +3505,7 @@
+     CodeEmitInfo *info = state_for(x, x->state());
+     CodeStub* stub = new PredicateFailedStub(info);
+ 
+-    __ cmp(lir_cond(cond), left, right);
+-    __ branch(lir_cond(cond), right->type(), stub);
++    __ cmp_branch(lir_cond(cond), left, right, right->type(), stub);
+   }
+ }
+ 
+@@ -3662,8 +3652,8 @@
+   __ move(new LIR_Address(klass, in_bytes(Klass::layout_helper_offset()), T_INT), layout);
+   int diffbit = Klass::layout_helper_boolean_diffbit();
+   __ logical_and(layout, LIR_OprFact::intConst(diffbit), layout);
+-  __ cmp(lir_cond_notEqual, layout, LIR_OprFact::intConst(0));
+-  __ cmove(lir_cond_notEqual, value_fixed, value, value_fixed, T_BYTE);
++  __ cmp_cmove(lir_cond_notEqual, layout, LIR_OprFact::intConst(0),
++               value_fixed, value, value_fixed, T_BYTE);
+   value = value_fixed;
+   return value;
+ }
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_LIRGenerator.hpp b/src/hotspot/share/c1/c1_LIRGenerator.hpp
+--- a/src/hotspot/share/c1/c1_LIRGenerator.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/c1/c1_LIRGenerator.hpp	2024-01-30 10:00:11.948097125 +0800
+@@ -363,8 +363,10 @@
+   void new_instance    (LIR_Opr  dst, ciInstanceKlass* klass, bool is_unresolved, LIR_Opr  scratch1, LIR_Opr  scratch2, LIR_Opr  scratch3,  LIR_Opr scratch4, LIR_Opr  klass_reg, CodeEmitInfo* info);
+ 
+   // machine dependent
+-  void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
+-  void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info);
++  template<typename T>
++  void cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info);
++  template<typename T>
++  void cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info);
+ 
+   void arraycopy_helper(Intrinsic* x, int* flags, ciArrayKlass** expected_type);
+ 
+@@ -391,7 +393,7 @@
+ 
+   LIR_Opr safepoint_poll_register();
+ 
+-  void profile_branch(If* if_instr, If::Condition cond);
++  void profile_branch(If* if_instr, If::Condition cond, LIR_Opr left = LIR_OprFact::illegalOpr, LIR_Opr right = LIR_OprFact::illegalOpr);
+   void increment_event_counter_impl(CodeEmitInfo* info,
+                                     ciMethod *method, LIR_Opr step, int frequency,
+                                     int bci, bool backedge, bool notify);
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
+--- a/src/hotspot/share/c1/c1_LIR.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/c1/c1_LIR.hpp	2024-01-30 10:00:11.948097125 +0800
+@@ -864,9 +864,11 @@
+ class      LIR_OpAllocObj;
+ class      LIR_OpRoundFP;
+ class    LIR_Op2;
++class      LIR_OpCmpBranch;
+ class    LIR_OpDelay;
+ class    LIR_Op3;
+ class      LIR_OpAllocArray;
++class    LIR_Op4;
+ class    LIR_OpCall;
+ class      LIR_OpJavaCall;
+ class      LIR_OpRTCall;
+@@ -933,6 +935,8 @@
+       , lir_cmp_l2i
+       , lir_ucmp_fd2i
+       , lir_cmp_fd2i
++      , lir_cmp_branch
++      , lir_cmp_float_branch
+       , lir_cmove
+       , lir_add
+       , lir_sub
+@@ -964,6 +968,9 @@
+       , lir_fmad
+       , lir_fmaf
+   , end_op3
++  , begin_op4
++      , lir_cmp_cmove
++  , end_op4
+   , begin_opJavaCall
+       , lir_static_call
+       , lir_optvirtual_call
+@@ -1128,12 +1135,14 @@
+   virtual LIR_OpAllocObj* as_OpAllocObj() { return NULL; }
+   virtual LIR_OpRoundFP* as_OpRoundFP() { return NULL; }
+   virtual LIR_OpBranch* as_OpBranch() { return NULL; }
++  virtual LIR_OpCmpBranch* as_OpCmpBranch() { return NULL; }
+   virtual LIR_OpRTCall* as_OpRTCall() { return NULL; }
+   virtual LIR_OpConvert* as_OpConvert() { return NULL; }
+   virtual LIR_Op0* as_Op0() { return NULL; }
+   virtual LIR_Op1* as_Op1() { return NULL; }
+   virtual LIR_Op2* as_Op2() { return NULL; }
+   virtual LIR_Op3* as_Op3() { return NULL; }
++  virtual LIR_Op4* as_Op4() { return NULL; }
+   virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
+   virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
+   virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
+@@ -1463,15 +1472,18 @@
+  private:
+    Bytecodes::Code _bytecode;
+    ConversionStub* _stub;
++   LIR_Opr _tmp;
+ 
+  public:
+-   LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub)
++   LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub, LIR_Opr tmp)
+      : LIR_Op1(lir_convert, opr, result)
+      , _stub(stub)
+-     , _bytecode(code)                           {}
++     , _bytecode(code)
++     , _tmp(tmp)                                 {}
+ 
+   Bytecodes::Code bytecode() const               { return _bytecode; }
+   ConversionStub* stub() const                   { return _stub; }
++  LIR_Opr tmp() const                            { return _tmp; }
+ 
+   virtual void emit_code(LIR_Assembler* masm);
+   virtual LIR_OpConvert* as_OpConvert() { return this; }
+@@ -1626,7 +1638,7 @@
+     , _tmp3(LIR_OprFact::illegalOpr)
+     , _tmp4(LIR_OprFact::illegalOpr)
+     , _tmp5(LIR_OprFact::illegalOpr) {
+-    assert(code == lir_cmp || code == lir_assert, "code check");
++    assert(code == lir_cmp || code == lir_cmp_branch || code == lir_cmp_float_branch || code == lir_assert, "code check");
+   }
+ 
+   LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type)
+@@ -1658,7 +1670,7 @@
+     , _tmp3(LIR_OprFact::illegalOpr)
+     , _tmp4(LIR_OprFact::illegalOpr)
+     , _tmp5(LIR_OprFact::illegalOpr) {
+-    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
++    assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check");
+   }
+ 
+   LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
+@@ -1674,7 +1686,7 @@
+     , _tmp3(tmp3)
+     , _tmp4(tmp4)
+     , _tmp5(tmp5) {
+-    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
++    assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check");
+   }
+ 
+   LIR_Opr in_opr1() const                        { return _opr1; }
+@@ -1686,10 +1698,12 @@
+   LIR_Opr tmp4_opr() const                       { return _tmp4; }
+   LIR_Opr tmp5_opr() const                       { return _tmp5; }
+   LIR_Condition condition() const  {
+-    assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
++    assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert");
++    return _condition;
+   }
+   void set_condition(LIR_Condition condition) {
+-    assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove");  _condition = condition;
++    assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove, "only valid for cmp and cmove");
++    _condition = condition;
+   }
+ 
+   void set_fpu_stack_size(int size)              { _fpu_stack_size = size; }
+@@ -1703,6 +1717,43 @@
+   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+ };
+ 
++class LIR_OpCmpBranch: public LIR_Op2 {
++ friend class LIR_OpVisitState;
++
++ private:
++  Label*        _label;
++  BlockBegin*   _block;  // if this is a branch to a block, this is the block
++  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
++  CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
++
++ public:
++  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, Label* lbl, CodeEmitInfo* info = NULL)
++    : LIR_Op2(lir_cmp_branch, cond, left, right, info)
++    , _label(lbl)
++    , _block(NULL)
++    , _ublock(NULL)
++    , _stub(NULL) { }
++
++  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info = NULL);
++  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info = NULL);
++
++  // for unordered comparisons
++  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info = NULL);
++
++  Label*        label()       const              { return _label;  }
++  BlockBegin*   block()       const              { return _block;  }
++  BlockBegin*   ublock()      const              { return _ublock; }
++  CodeStub*     stub()        const              { return _stub;   }
++
++  void          change_block(BlockBegin* b);
++  void          change_ublock(BlockBegin* b);
++  void          negate_cond();
++
++  virtual void emit_code(LIR_Assembler* masm);
++  virtual LIR_OpCmpBranch* as_OpCmpBranch() { return this; }
++  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
++};
++
+ class LIR_OpAllocArray : public LIR_Op {
+  friend class LIR_OpVisitState;
+ 
+@@ -1767,6 +1818,48 @@
+ };
+ 
+ 
++class LIR_Op4: public LIR_Op {
++ friend class LIR_OpVisitState;
++
++ private:
++  LIR_Opr _opr1;
++  LIR_Opr _opr2;
++  LIR_Opr _opr3;
++  LIR_Opr _opr4;
++  BasicType _type;
++  LIR_Condition _condition;
++
++  void verify() const;
++
++ public:
++  LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4, LIR_Opr result, BasicType type)
++    : LIR_Op(code, result, NULL)
++    , _opr1(opr1)
++    , _opr2(opr2)
++    , _opr3(opr3)
++    , _opr4(opr4)
++    , _type(type)
++    , _condition(condition) {
++    assert(is_in_range(code, begin_op4, end_op4), "code check");
++    assert(type != T_ILLEGAL, "cmove should have type");
++  }
++  LIR_Opr in_opr1() const                        { return _opr1; }
++  LIR_Opr in_opr2() const                        { return _opr2; }
++  LIR_Opr in_opr3() const                        { return _opr3; }
++  LIR_Opr in_opr4() const                        { return _opr4; }
++  BasicType type()  const                        { return _type; }
++  LIR_Condition condition() const  {
++    assert(code() == lir_cmp_cmove, "only valid for cmp cmove"); return _condition;
++  }
++  void set_condition(LIR_Condition condition) {
++    assert(code() == lir_cmp_cmove, "only valid for cmp cmove");  _condition = condition;
++  }
++
++  virtual void emit_code(LIR_Assembler* masm);
++  virtual LIR_Op4* as_Op4() { return this; }
++  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
++};
++
+ //--------------------------------
+ class LabelObj: public CompilationResourceObj {
+  private:
+@@ -2115,7 +2208,9 @@
+ 
+   void safepoint(LIR_Opr tmp, CodeEmitInfo* info)  { append(new LIR_Op1(lir_safepoint, tmp, info)); }
+ 
+-  void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); }
++  void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL, LIR_Opr tmp = LIR_OprFact::illegalOpr) {
++    append(new LIR_OpConvert(code, left, dst, stub, tmp));
++  }
+ 
+   void logical_and (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_and,  left, right, dst)); }
+   void logical_or  (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_or,   left, right, dst)); }
+@@ -2146,6 +2241,15 @@
+     cmp(condition, left, LIR_OprFact::intConst(right), info);
+   }
+ 
++  // machine dependent
++  template<typename T>
++  void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info = NULL);
++  template<typename T>
++  void cmp_branch(LIR_Condition condition, LIR_Opr left, int right, BasicType type, T tgt, CodeEmitInfo* info = NULL) {
++    cmp_branch(condition, left, LIR_OprFact::intConst(right), type, tgt, info);
++  }
++  void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered);
++
+   void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
+   void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info);
+ 
+@@ -2153,6 +2257,9 @@
+     append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type));
+   }
+ 
++  // machine dependent
++  void cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type);
++
+   void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
+                 LIR_Opr t1, LIR_Opr t2, LIR_Opr result = LIR_OprFact::illegalOpr);
+   void cas_obj(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp
+--- a/src/hotspot/share/code/nmethod.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/code/nmethod.cpp	2024-01-30 10:00:11.968096887 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "jvm.h"
+ #include "code/codeCache.hpp"
+@@ -2155,7 +2161,8 @@
+         //verify_interrupt_point(iter.addr());
+         break;
+       case relocInfo::runtime_call_type:
+-      case relocInfo::runtime_call_w_cp_type: {
++      NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:)
++      {
+         address destination = iter.reloc()->value();
+         // Right now there is no way to find out which entries support
+         // an interrupt point.  It would be nice if we had this
+@@ -2392,7 +2399,8 @@
+           return st.as_string();
+         }
+         case relocInfo::runtime_call_type:
+-        case relocInfo::runtime_call_w_cp_type: {
++        NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:)
++        {
+           stringStream st;
+           st.print("runtime_call");
+           CallRelocation* r = (CallRelocation*)iter.reloc();
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/code/relocInfo.cpp b/src/hotspot/share/code/relocInfo.cpp
+--- a/src/hotspot/share/code/relocInfo.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/code/relocInfo.cpp	2024-01-30 10:00:11.971430179 +0800
+@@ -433,6 +433,7 @@
+   _cached_value = x0==0? NULL: address_from_scaled_offset(x0, point);
+ }
+ 
++#ifndef MIPS64
+ void runtime_call_w_cp_Relocation::pack_data_to(CodeSection * dest) {
+   short* p = pack_1_int_to((short *)dest->locs_end(), (jint)(_offset >> 2));
+   dest->set_locs_end((relocInfo*) p);
+@@ -441,6 +442,7 @@
+ void runtime_call_w_cp_Relocation::unpack_data() {
+   _offset = unpack_1_int() << 2;
+ }
++#endif
+ 
+ void static_stub_Relocation::pack_data_to(CodeSection* dest) {
+   short* p = (short*) dest->locs_end();
+@@ -910,7 +912,7 @@
+       break;
+     }
+   case relocInfo::runtime_call_type:
+-  case relocInfo::runtime_call_w_cp_type:
++  NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:)
+     {
+       CallRelocation* r = (CallRelocation*) reloc();
+       tty->print(" | [destination=" INTPTR_FORMAT "]", p2i(r->destination()));
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/code/relocInfo.hpp b/src/hotspot/share/code/relocInfo.hpp
+--- a/src/hotspot/share/code/relocInfo.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/code/relocInfo.hpp	2024-01-30 10:00:11.971430179 +0800
+@@ -269,7 +269,11 @@
+     poll_return_type        = 11, // polling instruction for safepoints at return
+     metadata_type           = 12, // metadata that used to be oops
+     trampoline_stub_type    = 13, // stub-entry for trampoline
++#ifndef MIPS64
+     runtime_call_w_cp_type  = 14, // Runtime call which may load its target from the constant pool
++#else
++    internal_pc_type        = 14, // tag for internal data
++#endif
+     data_prefix_tag         = 15, // tag for a prefix (carries data arguments)
+     type_mask               = 15  // A mask which selects only the above values
+   };
+@@ -304,13 +308,13 @@
+     visitor(static_call) \
+     visitor(static_stub) \
+     visitor(runtime_call) \
+-    visitor(runtime_call_w_cp) \
++    NOT_MIPS64(visitor(runtime_call_w_cp)) \
+     visitor(external_word) \
+     visitor(internal_word) \
+     visitor(poll) \
+     visitor(poll_return) \
+-    visitor(section_word) \
+     visitor(trampoline_stub) \
++    NOT_MIPS64(visitor(section_word))MIPS64_ONLY(ZERO_ONLY(visitor(section_word))NOT_ZERO(visitor(internal_pc)))
+ 
+ 
+  public:
+@@ -1174,6 +1178,15 @@
+ };
+ 
+ 
++#ifdef MIPS64
++// to handle the set_last_java_frame pc
++class internal_pc_Relocation : public Relocation {
++  relocInfo::relocType type() { return relocInfo::internal_pc_type; }
++ public:
++  address pc() { return pd_get_address_from_code(); }
++  void fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest);
++};
++#else
+ class runtime_call_w_cp_Relocation : public CallRelocation {
+   relocInfo::relocType type() { return relocInfo::runtime_call_w_cp_type; }
+ 
+@@ -1202,6 +1215,7 @@
+   void pack_data_to(CodeSection * dest);
+   void unpack_data();
+ };
++#endif
+ 
+ // Trampoline Relocations.
+ // A trampoline allows to encode a small branch in the code, even if there
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/code/vtableStubs.cpp b/src/hotspot/share/code/vtableStubs.cpp
+--- a/src/hotspot/share/code/vtableStubs.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/code/vtableStubs.cpp	2024-01-30 10:00:11.971430179 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "code/vtableStubs.hpp"
+ #include "compiler/compileBroker.hpp"
+@@ -98,7 +104,11 @@
+ 
+ #if defined(PRODUCT)
+   // These values are good for the PRODUCT case (no tracing).
++#if defined MIPS64 || defined LOONGARCH64
++  static const int first_vtableStub_size = 128;
++#else
+   static const int first_vtableStub_size =  64;
++#endif
+   static const int first_itableStub_size = 256;
+ #else
+   // These values are good for the non-PRODUCT case (when tracing can be switched on).
+@@ -109,6 +119,7 @@
+   //               vtable  itable
+   // aarch64:         460     324
+   // arm:               ?       ?
++  // mips64:          728     328
+   // ppc (linux, BE): 404     288
+   // ppc (linux, LE): 356     276
+   // ppc (AIX):       416     296
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp
+--- a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp	2024-01-30 10:00:11.981430060 +0800
+@@ -74,7 +74,6 @@
+   // Read the marking-in-progress flag.
+   LIR_Opr flag_val = gen->new_register(T_INT);
+   __ load(mark_active_flag_addr, flag_val);
+-  __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
+ 
+   LIR_PatchCode pre_val_patch_code = lir_patch_none;
+ 
+@@ -103,7 +102,7 @@
+     slow = new G1PreBarrierStub(pre_val);
+   }
+ 
+-  __ branch(lir_cond_notEqual, T_INT, slow);
++  __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow);
+   __ branch_destination(slow->continuation());
+ }
+ 
+@@ -168,10 +167,9 @@
+   }
+   assert(new_val->is_register(), "must be a register at this point");
+ 
+-  __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD));
+-
+   CodeStub* slow = new G1PostBarrierStub(addr, new_val);
+-  __ branch(lir_cond_notEqual, LP64_ONLY(T_LONG) NOT_LP64(T_INT), slow);
++  __ cmp_branch(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD),
++                LP64_ONLY(T_LONG) NOT_LP64(T_INT), slow);
+   __ branch_destination(slow->continuation());
+ }
+ 
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp b/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp
+--- a/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp	2024-01-30 10:00:11.991429941 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_GC_G1_G1MARKSTACK_INLINE_HPP
+ #define SHARE_VM_GC_G1_G1MARKSTACK_INLINE_HPP
+ 
+@@ -71,6 +77,7 @@
+       _oop_stack.push(obj);
+       assert(_bitmap->is_marked(obj), "Must be marked now - map self");
+     } else {
++      DEBUG_ONLY(OrderAccess::loadload());
+       assert(_bitmap->is_marked(obj) || G1ArchiveAllocator::is_closed_archive_object(obj),
+              "Must be marked by other or closed archive object");
+     }
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp b/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp
+--- a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp	2024-01-30 10:00:12.004763116 +0800
+@@ -51,8 +51,9 @@
+ inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) {
+   if (p != NULL) { // XXX: error if p != NULL here
+     oop o = RawAccess<IS_NOT_NULL>::oop_load(p);
+-    if (o->is_forwarded()) {
+-      o = o->forwardee();
++    markOop m = o->mark_raw();
++    if (m->is_marked()) {
++      o = (oop) m->decode_pointer();
+       // Card mark
+       if (PSScavenge::is_obj_in_young(o)) {
+         PSScavenge::card_table()->inline_write_ref_field_gc(p, o);
+@@ -282,13 +283,17 @@
+   assert(should_scavenge(p, true), "revisiting object?");
+ 
+   oop o = RawAccess<IS_NOT_NULL>::oop_load(p);
+-  oop new_obj = o->is_forwarded()
+-        ? o->forwardee()
+-        : copy_to_survivor_space<promote_immediately>(o);
++  oop new_obj;
++  markOop m = o->mark_raw();
++  if (m->is_marked()) {
++    new_obj = (oop) m->decode_pointer();
++  } else {
++    new_obj = copy_to_survivor_space<promote_immediately>(o);
++  }
+ 
+   // This code must come after the CAS test, or it will print incorrect
+   // information.
+-  if (log_develop_is_enabled(Trace, gc, scavenge) && o->is_forwarded()) {
++  if (log_develop_is_enabled(Trace, gc, scavenge) && m->is_marked()) {
+     log_develop_trace(gc, scavenge)("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}",
+                       "forwarding",
+                       new_obj->klass()->internal_name(), p2i((void *)o), p2i((void *)new_obj), new_obj->size());
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/parallel/psScavenge.inline.hpp b/src/hotspot/share/gc/parallel/psScavenge.inline.hpp
+--- a/src/hotspot/share/gc/parallel/psScavenge.inline.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/gc/parallel/psScavenge.inline.hpp	2024-01-30 10:00:12.008096410 +0800
+@@ -104,8 +104,9 @@
+ 
+       oop o = *p;
+       oop new_obj;
+-      if (o->is_forwarded()) {
+-        new_obj = o->forwardee();
++      markOop m = o->mark_raw();
++      if (m->is_marked()) {
++        new_obj = (oop) m->decode_pointer();
+       } else {
+         new_obj = _pm->copy_to_survivor_space</*promote_immediately=*/false>(o);
+       }
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp b/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp
+--- a/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp	2024-01-30 10:00:12.011429704 +0800
+@@ -192,8 +192,7 @@
+   /* Normalize boolean value returned by unsafe operation, i.e., value  != 0 ? value = true : value false. */
+   if (mask_boolean) {
+     LabelObj* equalZeroLabel = new LabelObj();
+-    __ cmp(lir_cond_equal, result, 0);
+-    __ branch(lir_cond_equal, T_BOOLEAN, equalZeroLabel->label());
++    __ cmp_branch(lir_cond_equal, result, 0, T_BOOLEAN, equalZeroLabel->label());
+     __ move(LIR_OprFact::intConst(1), result);
+     __ branch_destination(equalZeroLabel->label());
+   }
+@@ -320,14 +319,12 @@
+         referent_off = gen->new_register(T_LONG);
+         __ move(LIR_OprFact::longConst(java_lang_ref_Reference::referent_offset), referent_off);
+       }
+-      __ cmp(lir_cond_notEqual, offset, referent_off);
+-      __ branch(lir_cond_notEqual, offset->type(), cont->label());
++      __ cmp_branch(lir_cond_notEqual, offset, referent_off, offset->type(), cont->label());
+     }
+     if (gen_source_check) {
+       // offset is a const and equals referent offset
+       // if (source == null) -> continue
+-      __ cmp(lir_cond_equal, base_reg, LIR_OprFact::oopConst(NULL));
+-      __ branch(lir_cond_equal, T_OBJECT, cont->label());
++      __ cmp_branch(lir_cond_equal, base_reg, LIR_OprFact::oopConst(NULL), T_OBJECT, cont->label());
+     }
+     LIR_Opr src_klass = gen->new_register(T_METADATA);
+     if (gen_type_check) {
+@@ -337,8 +334,7 @@
+       LIR_Address* reference_type_addr = new LIR_Address(src_klass, in_bytes(InstanceKlass::reference_type_offset()), T_BYTE);
+       LIR_Opr reference_type = gen->new_register(T_INT);
+       __ move(reference_type_addr, reference_type);
+-      __ cmp(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE));
+-      __ branch(lir_cond_equal, T_INT, cont->label());
++      __ cmp_branch(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE), T_INT, cont->label());
+     }
+   }
+ }
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp b/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp
+--- a/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp	2024-01-30 10:00:12.011429704 +0800
+@@ -89,8 +89,7 @@
+     __ move(card_addr, cur_value);
+ 
+     LabelObj* L_already_dirty = new LabelObj();
+-    __ cmp(lir_cond_equal, cur_value, dirty);
+-    __ branch(lir_cond_equal, T_BYTE, L_already_dirty->label());
++    __ cmp_branch(lir_cond_equal, cur_value, dirty, T_BYTE, L_already_dirty->label());
+     __ move(dirty, card_addr);
+     __ branch_destination(L_already_dirty->label());
+   } else {
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp
+--- a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp	2024-01-30 10:00:12.021429583 +0800
+@@ -73,7 +73,6 @@
+   // Read the marking-in-progress flag.
+   LIR_Opr flag_val = gen->new_register(T_INT);
+   __ load(mark_active_flag_addr, flag_val);
+-  __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
+ 
+   LIR_PatchCode pre_val_patch_code = lir_patch_none;
+ 
+@@ -101,7 +100,7 @@
+     slow = new ShenandoahPreBarrierStub(pre_val);
+   }
+ 
+-  __ branch(lir_cond_notEqual, T_INT, slow);
++  __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow);
+   __ branch_destination(slow->continuation());
+ }
+ 
+@@ -144,10 +143,9 @@
+     __ logical_and(flag_val, mask_reg, masked_flag);
+     flag_val = masked_flag;
+   }
+-  __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
+ 
+   CodeStub* slow = new ShenandoahLoadReferenceBarrierStub(obj, addr, result, tmp1, tmp2);
+-  __ branch(lir_cond_notEqual, T_INT, slow);
++  __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow);
+   __ branch_destination(slow->continuation());
+ 
+   return result;
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
+--- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp	2024-01-30 10:00:12.031429464 +0800
+@@ -105,15 +105,20 @@
+ 
+   virtual void visit(LIR_OpVisitState* state) {
+     state->do_input(_opr);
++    if (_result->is_valid()) {
++      state->do_temp(_opr);
++      state->do_output(_result);
++    }
+   }
+ 
+   virtual void emit_code(LIR_Assembler* ce) {
+-    ZBarrierSet::assembler()->generate_c1_load_barrier_test(ce, _opr);
++    ZBarrierSet::assembler()->generate_c1_load_barrier_test(ce, _opr, result_opr());
+   }
+ 
+   virtual void print_instr(outputStream* out) const {
+     _opr->print(out);
+     out->print(" ");
++    result_opr()->print(out);
+   }
+ 
+ #ifndef PRODUCT
+@@ -149,13 +154,21 @@
+ #endif
+ 
+ void ZBarrierSetC1::load_barrier(LIRAccess& access, LIR_Opr result) const {
++  LIR_Op* op = new LIR_OpZLoadBarrierTest(result);
++
+   // Fast path
+-  __ append(new LIR_OpZLoadBarrierTest(result));
++  __ append(op);
+ 
+   // Slow path
+   const address runtime_stub = load_barrier_on_oop_field_preloaded_runtime_stub(access.decorators());
+   CodeStub* const stub = new ZLoadBarrierStubC1(access, result, runtime_stub);
+-  __ branch(lir_cond_notEqual, T_ADDRESS, stub);
++  if (ZPlatformLoadBarrierTestResultInRegister) {
++    LIR_Opr res = access.gen()->new_register(result->type());
++    op->set_result_opr(res);
++    __ cmp_branch(lir_cond_notEqual, res, LIR_OprFact::intptrConst(NULL_WORD), T_ADDRESS, stub);
++  } else {
++    __ branch(lir_cond_notEqual, T_ADDRESS, stub);
++  }
+   __ branch_destination(stub->continuation());
+ }
+ 
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp
+--- a/src/hotspot/share/interpreter/interpreterRuntime.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp	2024-01-30 10:00:12.041429345 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/javaClasses.inline.hpp"
+ #include "classfile/systemDictionary.hpp"
+@@ -1497,7 +1503,7 @@
+   // preparing the same method will be sure to see non-null entry & mirror.
+ IRT_END
+ 
+-#if defined(IA32) || defined(AMD64) || defined(ARM)
++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(MIPS64) || defined(LOONGARCH64)
+ IRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address))
+   if (src_address == dest_address) {
+     return;
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/interpreter/interpreterRuntime.hpp b/src/hotspot/share/interpreter/interpreterRuntime.hpp
+--- a/src/hotspot/share/interpreter/interpreterRuntime.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/interpreter/interpreterRuntime.hpp	2024-01-30 10:00:12.041429345 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP
+ #define SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP
+ 
+@@ -146,7 +152,7 @@
+                                         Method* method,
+                                         intptr_t* from, intptr_t* to);
+ 
+-#if defined(IA32) || defined(AMD64) || defined(ARM)
++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(MIPS64) || defined(LOONGARCH64)
+   // Popframe support (only needed on x86, AMD64 and ARM)
+   static void popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address);
+ #endif
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp
+--- a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp	2024-01-30 10:00:12.044762639 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP
+ #define SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP
+ 
+@@ -114,9 +120,9 @@
+   void restore_native_result(void);
+ #endif // SPARC
+ 
+-#ifdef AARCH64
++#if defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH64)
+   void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs);
+-#endif // AARCH64
++#endif // AARCH64 || MIPS64 || LOONGARCH64
+ 
+ #ifdef PPC
+   void lock_method(Register Rflags, Register Rscratch1, Register Rscratch2, bool flags_preloaded=false);
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
+--- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp	2024-01-30 10:00:12.054762520 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_JFR_UTILITIES_JFRBIGENDIAN_HPP
+ #define SHARE_VM_JFR_UTILITIES_JFRBIGENDIAN_HPP
+ 
+@@ -102,7 +108,7 @@
+ inline bool JfrBigEndian::platform_supports_unaligned_reads(void) {
+ #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390)
+   return true;
+-#elif defined(SPARC) || defined(ARM) || defined(AARCH64)
++#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH)
+   return false;
+ #else
+   #warning "Unconfigured platform"
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
+--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp	2024-01-30 10:00:12.061429106 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "code/codeBlob.hpp"
+ #include "compiler/abstractCompiler.hpp"
+@@ -714,6 +720,35 @@
+ 
+ #endif
+ 
++
++#ifdef LOONGARCH64
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++  volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
++
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
++  declare_constant(VM_Version::CPU_LA32)           \
++  declare_constant(VM_Version::CPU_LA64)           \
++  declare_constant(VM_Version::CPU_LLEXC)          \
++  declare_constant(VM_Version::CPU_SCDLY)          \
++  declare_constant(VM_Version::CPU_LLDBAR)         \
++  declare_constant(VM_Version::CPU_LBT_X86)        \
++  declare_constant(VM_Version::CPU_LBT_ARM)        \
++  declare_constant(VM_Version::CPU_LBT_MIPS)       \
++  declare_constant(VM_Version::CPU_CCDMA)          \
++  declare_constant(VM_Version::CPU_COMPLEX)        \
++  declare_constant(VM_Version::CPU_FP)             \
++  declare_constant(VM_Version::CPU_CRYPTO)         \
++  declare_constant(VM_Version::CPU_LSX)            \
++  declare_constant(VM_Version::CPU_LASX)           \
++  declare_constant(VM_Version::CPU_LAM)            \
++  declare_constant(VM_Version::CPU_LLSYNC)         \
++  declare_constant(VM_Version::CPU_TGTSYNC)        \
++  declare_constant(VM_Version::CPU_ULSYNC)         \
++  declare_constant(VM_Version::CPU_UAL)
++
++#endif
++
+ 
+ #ifdef X86
+ 
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp
+--- a/src/hotspot/share/memory/metaspace.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/memory/metaspace.cpp	2024-01-30 10:00:12.064762400 +0800
+@@ -1083,12 +1083,12 @@
+   // Don't use large pages for the class space.
+   bool large_pages = false;
+ 
+-#if !(defined(AARCH64) || defined(PPC64))
++#if !(defined(AARCH64) || defined(PPC64) || defined(MIPS64) || defined(LOONGARCH64))
+   ReservedSpace metaspace_rs = ReservedSpace(compressed_class_space_size(),
+                                              _reserve_alignment,
+                                              large_pages,
+                                              requested_addr);
+-#else // AARCH64 || PPC64
++#else // AARCH64 || PPC64 || MIPS64 || LOONGARCH64
+ 
+   ReservedSpace metaspace_rs;
+ 
+@@ -1114,7 +1114,8 @@
+     // below 32g to get a zerobased CCS. For simplicity we reuse the search
+     // strategy for AARCH64.
+ 
+-    size_t increment = AARCH64_ONLY(4*)G;
++    // MIPS: Cannot mmap for 1G space at 4G position, and prepare for future optimization.
++    size_t increment = AARCH64_ONLY(4*)MIPS64_ONLY(4*)LOONGARCH64_ONLY(4*)G;
+     for (char *a = align_up(requested_addr, increment);
+          a < (char*)(1024*G);
+          a += increment) {
+@@ -1145,7 +1146,7 @@
+     }
+   }
+ 
+-#endif // AARCH64 || PPC64
++#endif // AARCH64 || PPC64 || MIPS64 || LOONGARCH64
+ 
+   if (!metaspace_rs.is_reserved()) {
+ #if INCLUDE_CDS
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/oops/oop.inline.hpp b/src/hotspot/share/oops/oop.inline.hpp
+--- a/src/hotspot/share/oops/oop.inline.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/oops/oop.inline.hpp	2024-01-30 10:00:12.074762281 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_OOPS_OOP_INLINE_HPP
+ #define SHARE_VM_OOPS_OOP_INLINE_HPP
+ 
+@@ -389,7 +395,7 @@
+     // forwarding pointer.
+     oldMark = curMark;
+   }
+-  return forwardee();
++  return (oop)oldMark->decode_pointer();
+ }
+ 
+ // Note that the forwardee is not the same thing as the displaced_mark.
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp
+--- a/src/hotspot/share/opto/compile.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/opto/compile.hpp	2024-01-30 10:00:12.081428868 +0800
+@@ -1204,7 +1204,7 @@
+   bool           in_scratch_emit_size() const   { return _in_scratch_emit_size;     }
+ 
+   enum ScratchBufferBlob {
+-#if defined(PPC64)
++#if defined(PPC64) || defined(MIPS64) || defined(LOONGARCH64)
+     MAX_inst_size       = 2048,
+ #else
+     MAX_inst_size       = 1024,
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp
+--- a/src/hotspot/share/opto/output.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/opto/output.cpp	2024-01-30 10:00:12.094762043 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "asm/assembler.inline.hpp"
+ #include "asm/macroAssembler.inline.hpp"
+@@ -731,6 +737,27 @@
+   // Add the safepoint in the DebugInfoRecorder
+   if( !mach->is_MachCall() ) {
+     mcall = NULL;
++#if defined(MIPS) || defined(LOONGARCH)
++    // safepoint_pc_offset should point to tha last instruction in safePoint.
++    // In X86 and sparc, their safePoints only contain one instruction.
++    // However, we should add current_offset with the size of safePoint in MIPS.
++    // 0x2d6ff22c: lw s2, 0x14(s2)
++    // last_pd->pc_offset()=308, pc_offset=304, bci=64
++    // last_pd->pc_offset()=312, pc_offset=312, bci=64
++    // src/hotspot/share/code/debugInfoRec.cpp:295, assert(last_pd->pc_offset() == pc_offset, "must be last pc")
++    //
++    // ;; Safepoint:
++    // ---> pc_offset=304
++    // 0x2d6ff230: lui at, 0x2b7a            ; OopMap{s2=Oop s5=Oop t4=Oop off=308}
++    //                                       ;*goto
++    //                                       ; - java.util.Hashtable::get@64 (line 353)
++    // ---> last_pd(308)
++    // 0x2d6ff234: lw at, 0xffffc100(at)     ;*goto
++    //                                       ; - java.util.Hashtable::get@64 (line 353)
++    //                                       ;   {poll}
++    // 0x2d6ff238: addiu s0, zero, 0x0
++    safepoint_pc_offset += sfn->size(_regalloc) - 4;
++#endif
+     debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map);
+   } else {
+     mcall = mach->as_MachCall();
+@@ -1393,6 +1420,22 @@
+       DEBUG_ONLY(uint instr_offset = cb->insts_size());
+       n->emit(*cb, _regalloc);
+       current_offset  = cb->insts_size();
++#if defined(MIPS) || defined(LOONGARCH)
++      if (!n->is_Proj() && (cb->insts()->end() != badAddress)) {
++        // For MIPS, the first instruction of the previous node (usually a instruction sequence) sometime
++        // is not the instruction which access memory. adjust is needed. previous_offset points to the
++        // instruction which access memory. Instruction size is 4. cb->insts_size() and
++        // cb->insts()->end() are the location of current instruction.
++        int adjust = 4;
++        NativeInstruction* inst = (NativeInstruction*) (cb->insts()->end() - 4);
++        if (inst->is_sync()) {
++          // a sync may be the last instruction, see store_B_immI_enc_sync
++          adjust += 4;
++          inst = (NativeInstruction*) (cb->insts()->end() - 8);
++        }
++        previous_offset = current_offset - adjust;
++      }
++#endif
+ 
+       // Above we only verified that there is enough space in the instruction section.
+       // However, the instruction may emit stubs that cause code buffer expansion.
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp
+--- a/src/hotspot/share/opto/type.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/opto/type.cpp	2024-01-30 10:00:12.101428630 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "ci/ciMethodData.hpp"
+ #include "ci/ciTypeFlow.hpp"
+@@ -78,6 +84,12 @@
+   { Bad,             T_ILLEGAL,    "vectorx:",      false, 0,                    relocInfo::none          },  // VectorX
+   { Bad,             T_ILLEGAL,    "vectory:",      false, 0,                    relocInfo::none          },  // VectorY
+   { Bad,             T_ILLEGAL,    "vectorz:",      false, 0,                    relocInfo::none          },  // VectorZ
++#elif defined(LOONGARCH64)
++  { Bad,             T_ILLEGAL,    "vectors:",      false, 0,                    relocInfo::none          },  // VectorS
++  { Bad,             T_ILLEGAL,    "vectord:",      false, 0,                    relocInfo::none          },  // VectorD
++  { Bad,             T_ILLEGAL,    "vectorx:",      false, Op_VecX,              relocInfo::none          },  // VectorX
++  { Bad,             T_ILLEGAL,    "vectory:",      false, Op_VecY,              relocInfo::none          },  // VectorY
++  { Bad,             T_ILLEGAL,    "vectorz:",      false, 0,                    relocInfo::none          },  // VectorZ
+ #else // all other
+   { Bad,             T_ILLEGAL,    "vectors:",      false, Op_VecS,              relocInfo::none          },  // VectorS
+   { Bad,             T_ILLEGAL,    "vectord:",      false, Op_VecD,              relocInfo::none          },  // VectorD
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/runtime/java.cpp b/src/hotspot/share/runtime/java.cpp
+--- a/src/hotspot/share/runtime/java.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/runtime/java.cpp	2024-01-30 10:00:12.118095097 +0800
+@@ -68,6 +68,7 @@
+ #include "runtime/thread.inline.hpp"
+ #include "runtime/timer.hpp"
+ #include "runtime/vmOperations.hpp"
++#include "runtime/vmThread.hpp"
+ #include "services/memTracker.hpp"
+ #include "utilities/dtrace.hpp"
+ #include "utilities/globalDefinitions.hpp"
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp
+--- a/src/hotspot/share/runtime/objectMonitor.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/runtime/objectMonitor.cpp	2024-01-30 10:00:12.121428391 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/vmSymbols.hpp"
+ #include "jfr/jfrEvents.hpp"
+@@ -308,6 +314,9 @@
+   }
+ 
+   assert(_owner != Self, "invariant");
++  // Thread _succ != current assertion load reording before Thread if (_succ == current) _succ = nullptr.
++  // But expect order is firstly if (_succ == current) _succ = nullptr then _succ != current assertion.
++  DEBUG_ONLY(LOONGARCH64_ONLY(__asm__ __volatile__ ("dbar 0x700\n");)MIPS64_ONLY(OrderAccess::loadload();))
+   assert(_succ != Self, "invariant");
+   assert(Self->is_Java_thread(), "invariant");
+   JavaThread * jt = (JavaThread *) Self;
+@@ -469,6 +478,7 @@
+   }
+ 
+   // The Spin failed -- Enqueue and park the thread ...
++  DEBUG_ONLY(LOONGARCH64_ONLY(__asm__ __volatile__ ("dbar 0x700\n");)MIPS64_ONLY(OrderAccess::loadload();))
+   assert(_succ != Self, "invariant");
+   assert(_owner != Self, "invariant");
+   assert(_Responsible != Self, "invariant");
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp
+--- a/src/hotspot/share/runtime/os.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/runtime/os.cpp	2024-01-30 10:00:12.121428391 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "jvm.h"
+ #include "classfile/classLoader.hpp"
+@@ -1242,7 +1248,8 @@
+   if ((uintptr_t)fr->sender_sp() == (uintptr_t)-1 || is_pointer_bad(fr->sender_sp())) return true;
+ 
+   uintptr_t old_fp = (uintptr_t)fr->link_or_null();
+-  if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp ||
++  // The check for old_fp and ufp is harmful on LoongArch and MIPS due to their special ABIs.
++  if (old_fp == 0 || old_fp == (uintptr_t)-1 NOT_LOONGARCH64_AND_MIPS64(|| old_fp == ufp) ||
+     is_pointer_bad(fr->link_or_null())) return true;
+ 
+   // stack grows downwards; if old_fp is below current fp or if the stack
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp
+--- a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp	2024-01-30 10:00:12.128094978 +0800
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2015, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
++
+ #include "precompiled.hpp"
+ #include "jni.h"
+ #include "runtime/interfaceSupport.inline.hpp"
+@@ -512,6 +519,14 @@
+  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
+  */
+ 
++#if defined(MIPS)|| defined(LOONGARCH)
++#undef S1
++#undef S2
++#undef S3
++#undef S4
++#undef S5
++#undef S6
++#endif
+ static const double
+ S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
+ S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/utilities/globalDefinitions.hpp b/src/hotspot/share/utilities/globalDefinitions.hpp
+--- a/src/hotspot/share/utilities/globalDefinitions.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/utilities/globalDefinitions.hpp	2024-01-30 10:00:12.141428153 +0800
+@@ -1161,6 +1161,15 @@
+   return log2_long(x);
+ }
+ 
++#if defined(MIPS64) || defined(LOONGARCH64)
++// returns integer round-up to the nearest multiple of s (s must be a power of two)
++inline intptr_t round_to(intptr_t x, uintx s) {
++  assert(is_power_of_2(s), "s must be a power of 2: " JLONG_FORMAT, x);
++  const uintx m = s - 1;
++  return mask_bits(x + m, ~m);
++}
++#endif
++
+ inline bool is_odd (intx x) { return x & 1;      }
+ inline bool is_even(intx x) { return !is_odd(x); }
+ 
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
+--- a/src/hotspot/share/utilities/macros.hpp	2024-01-10 05:19:49.000000000 +0800
++++ b/src/hotspot/share/utilities/macros.hpp	2024-01-30 10:00:12.144761447 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_UTILITIES_MACROS_HPP
+ #define SHARE_VM_UTILITIES_MACROS_HPP
+ 
+@@ -535,6 +541,38 @@
+ #define NOT_SPARC(code) code
+ #endif
+ 
++#ifdef MIPS64
++#ifndef MIPS
++#define MIPS
++#endif
++#define MIPS64_ONLY(code) code
++#define NOT_MIPS64(code)
++#else
++#undef MIPS
++#define MIPS64_ONLY(code)
++#define NOT_MIPS64(code) code
++#endif
++
++#ifdef LOONGARCH64
++#ifndef LOONGARCH
++#define LOONGARCH
++#endif
++#define LOONGARCH64_ONLY(code) code
++#define NOT_LOONGARCH64(code)
++#else
++#undef LOONGARCH
++#define LOONGARCH64_ONLY(code)
++#define NOT_LOONGARCH64(code) code
++#endif
++
++#if defined(MIPS64) || defined(LOONGARCH64)
++#define LOONGARCH64_AND_MIPS64_ONLY(code) code
++#define NOT_LOONGARCH64_AND_MIPS64(code)
++#else
++#define LOONGARCH64_AND_MIPS64_ONLY(code)
++#define NOT_LOONGARCH64_AND_MIPS64(code) code
++#endif
++
+ #if defined(PPC32) || defined(PPC64)
+ #ifndef PPC
+ #define PPC
+@@ -627,16 +665,34 @@
+ //   OS_CPU_HEADER(vmStructs)          --> vmStructs_linux_sparc.hpp
+ //
+ // basename<cpu>.hpp / basename<cpu>.inline.hpp
++#if defined(MIPS) && !defined(ZERO)
++#define CPU_HEADER_H(basename)         XSTR(basename ## _mips.h)
++#define CPU_HEADER(basename)           XSTR(basename ## _mips.hpp)
++#define CPU_HEADER_INLINE(basename)    XSTR(basename ## _mips.inline.hpp)
++#elif defined(LOONGARCH) && !defined(ZERO)
++#define CPU_HEADER_H(basename)         XSTR(basename ## _loongarch.h)
++#define CPU_HEADER(basename)           XSTR(basename ## _loongarch.hpp)
++#define CPU_HEADER_INLINE(basename)    XSTR(basename ## _loongarch.inline.hpp)
++#else
+ #define CPU_HEADER_H(basename)         XSTR(CPU_HEADER_STEM(basename).h)
+ #define CPU_HEADER(basename)           XSTR(CPU_HEADER_STEM(basename).hpp)
+ #define CPU_HEADER_INLINE(basename)    XSTR(CPU_HEADER_STEM(basename).inline.hpp)
++#endif
+ // basename<os>.hpp / basename<os>.inline.hpp
+ #define OS_HEADER_H(basename)          XSTR(OS_HEADER_STEM(basename).h)
+ #define OS_HEADER(basename)            XSTR(OS_HEADER_STEM(basename).hpp)
+ #define OS_HEADER_INLINE(basename)     XSTR(OS_HEADER_STEM(basename).inline.hpp)
+ // basename<os><cpu>.hpp / basename<os><cpu>.inline.hpp
++#if defined(MIPS) && !defined(ZERO)
++#define OS_CPU_HEADER(basename)        XSTR(basename ## _linux_mips.hpp)
++#define OS_CPU_HEADER_INLINE(basename) XSTR(basename ## _linux_mips.inline.hpp)
++#elif defined(LOONGARCH) && !defined(ZERO)
++#define OS_CPU_HEADER(basename)        XSTR(basename ## _linux_loongarch.hpp)
++#define OS_CPU_HEADER_INLINE(basename) XSTR(basename ## _linux_loongarch.inline.hpp)
++#else
+ #define OS_CPU_HEADER(basename)        XSTR(OS_CPU_HEADER_STEM(basename).hpp)
+ #define OS_CPU_HEADER_INLINE(basename) XSTR(OS_CPU_HEADER_STEM(basename).inline.hpp)
++#endif
+ // basename<compiler>.hpp / basename<compiler>.inline.hpp
+ #define COMPILER_HEADER(basename)        XSTR(COMPILER_HEADER_STEM(basename).hpp)
+ #define COMPILER_HEADER_INLINE(basename) XSTR(COMPILER_HEADER_STEM(basename).inline.hpp)
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h	2024-01-10 05:19:49.000000000 +0800
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h	2024-01-30 10:00:13.224748568 +0800
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
++
+ #ifndef _LIBPROC_H_
+ #define _LIBPROC_H_
+ 
+@@ -37,13 +44,17 @@
+ #include <asm/ptrace.h>
+ #define user_regs_struct  pt_regs
+ #endif
+-#if defined(aarch64) || defined(arm64)
++#if defined(aarch64) || defined(arm64) || defined(loongarch64)
+ #include <asm/ptrace.h>
+ #define user_regs_struct user_pt_regs
+ #elif defined(arm)
+ #include <asm/ptrace.h>
+ #define user_regs_struct  pt_regs
+ #endif
++#if defined(mips) || defined(mipsel) || defined(mips64) || defined(mips64el)
++#include <asm/ptrace.h>
++#define user_regs_struct  pt_regs
++#endif
+ 
+ // This C bool type must be int for compatibility with Linux calls and
+ // it would be a mistake to equivalence it to C++ bool on many platforms
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c	2024-01-10 05:19:49.000000000 +0800
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c	2024-01-30 10:00:13.224748568 +0800
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
++
+ #include <jni.h>
+ #include "libproc.h"
+ #include "proc_service.h"
+@@ -54,10 +61,18 @@
+ #include "sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext.h"
+ #endif
+ 
++#if defined(mips64) || defined(mips64el)
++#include "sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext.h"
++#endif
++
+ #ifdef aarch64
+ #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h"
+ #endif
+ 
++#ifdef loongarch64
++#include "sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext.h"
++#endif
++
+ static jfieldID p_ps_prochandle_ID = 0;
+ static jfieldID threadList_ID = 0;
+ static jfieldID loadObjectList_ID = 0;
+@@ -397,7 +412,7 @@
+   return (err == PS_OK)? array : 0;
+ }
+ 
+-#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64)
++#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(loongarch64)
+ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
+   (JNIEnv *env, jobject this_obj, jint lwp_id) {
+ 
+@@ -425,9 +440,15 @@
+ #if defined(sparc) || defined(sparcv9)
+ #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG
+ #endif
++#ifdef loongarch64
++#define NPRGREG sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_NPRGREG
++#endif
+ #if defined(ppc64) || defined(ppc64le)
+ #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG
+ #endif
++#if defined(mips64) || defined(mips64el)
++#define NPRGREG sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_NPRGREG
++#endif
+ 
+ 
+   array = (*env)->NewLongArray(env, NPRGREG);
+@@ -534,6 +555,18 @@
+   }
+ #endif /* aarch64 */
+ 
++#if defined(loongarch64)
++
++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_##reg
++
++  {
++    int i;
++    for (i = 0; i < 31; i++)
++      regs[i] = gregs.regs[i];
++    regs[REG_INDEX(PC)] = gregs.csr_era;
++  }
++#endif /* loongarch64 */
++
+ #if defined(ppc64) || defined(ppc64le)
+ #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg
+ 
+@@ -574,6 +607,45 @@
+ 
+ #endif
+ 
++#if defined(mips64) || defined(mips64el)
++
++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_##reg
++
++  regs[REG_INDEX(ZERO)]  = gregs.regs[0];
++  regs[REG_INDEX(AT)]  = gregs.regs[1];
++  regs[REG_INDEX(V0)]  = gregs.regs[2];
++  regs[REG_INDEX(V1)]  = gregs.regs[3];
++  regs[REG_INDEX(A0)]  = gregs.regs[4];
++  regs[REG_INDEX(A1)]  = gregs.regs[5];
++  regs[REG_INDEX(A2)]  = gregs.regs[6];
++  regs[REG_INDEX(A3)]  = gregs.regs[7];
++  regs[REG_INDEX(T0)]  = gregs.regs[8];
++  regs[REG_INDEX(T1)]  = gregs.regs[9];
++  regs[REG_INDEX(T2)]  = gregs.regs[10];
++  regs[REG_INDEX(T3)]  = gregs.regs[11];
++  regs[REG_INDEX(T4)]  = gregs.regs[12];
++  regs[REG_INDEX(T5)]  = gregs.regs[13];
++  regs[REG_INDEX(T6)]  = gregs.regs[14];
++  regs[REG_INDEX(T7)]  = gregs.regs[15];
++  regs[REG_INDEX(S0)]  = gregs.regs[16];
++  regs[REG_INDEX(S1)]  = gregs.regs[17];
++  regs[REG_INDEX(S2)]  = gregs.regs[18];
++  regs[REG_INDEX(S3)]  = gregs.regs[19];
++  regs[REG_INDEX(S4)]  = gregs.regs[20];
++  regs[REG_INDEX(S5)]  = gregs.regs[21];
++  regs[REG_INDEX(S6)]  = gregs.regs[22];
++  regs[REG_INDEX(S7)]  = gregs.regs[23];
++  regs[REG_INDEX(T8)]  = gregs.regs[24];
++  regs[REG_INDEX(T9)]  = gregs.regs[25];
++  regs[REG_INDEX(K0)]  = gregs.regs[26];
++  regs[REG_INDEX(K1)]  = gregs.regs[27];
++  regs[REG_INDEX(GP)]  = gregs.regs[28];
++  regs[REG_INDEX(SP)]  = gregs.regs[29];
++  regs[REG_INDEX(FP)]  = gregs.regs[30];
++  regs[REG_INDEX(S8)]  = gregs.regs[30];
++  regs[REG_INDEX(RA)]  = gregs.regs[31];
++#endif /* mips */
++
+   (*env)->ReleaseLongArrayElements(env, array, regs, JNI_COMMIT);
+   return array;
+ }
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c	2024-01-10 05:19:49.000000000 +0800
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c	2024-01-30 10:00:13.224748568 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+@@ -142,7 +148,7 @@
+ #define PTRACE_GETREGS_REQ PT_GETREGS
+ #endif
+ 
+-#ifdef PTRACE_GETREGS_REQ
++#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64)
+  if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
+    print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid);
+    return false;
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java	2024-01-10 05:19:49.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java	2024-01-30 10:00:13.238081742 +0800
+@@ -23,6 +23,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package sun.jvm.hotspot.debugger.linux;
+ 
+ import java.io.*;
+@@ -34,12 +40,16 @@
+ import sun.jvm.hotspot.debugger.amd64.*;
+ import sun.jvm.hotspot.debugger.aarch64.*;
+ import sun.jvm.hotspot.debugger.sparc.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
+ import sun.jvm.hotspot.debugger.ppc64.*;
+ import sun.jvm.hotspot.debugger.linux.x86.*;
+ import sun.jvm.hotspot.debugger.linux.amd64.*;
+ import sun.jvm.hotspot.debugger.linux.sparc.*;
+ import sun.jvm.hotspot.debugger.linux.ppc64.*;
+ import sun.jvm.hotspot.debugger.linux.aarch64.*;
++import sun.jvm.hotspot.debugger.linux.mips64.*;
++import sun.jvm.hotspot.debugger.linux.loongarch64.*;
+ import sun.jvm.hotspot.utilities.*;
+ 
+ class LinuxCDebugger implements CDebugger {
+@@ -102,7 +112,21 @@
+        Address pc  = context.getRegisterAsAddress(SPARCThreadContext.R_O7);
+        if (pc == null) return null;
+        return new LinuxSPARCCFrame(dbg, sp, pc, LinuxDebuggerLocal.getAddressSize());
+-    }  else if (cpu.equals("ppc64")) {
++    } else if (cpu.equals("mips64")) {
++       MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext();
++       Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++       if (sp == null) return null;
++       Address pc  = context.getRegisterAsAddress(MIPS64ThreadContext.PC);
++       if (pc == null) return null;
++       return new LinuxMIPS64CFrame(dbg, sp, pc);
++    } else if (cpu.equals("loongarch64")) {
++       LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext();
++       Address fp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP);
++       if (fp == null) return null;
++       Address pc  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC);
++       if (pc == null) return null;
++       return new LinuxLOONGARCH64CFrame(dbg, fp, pc);
++    } else if (cpu.equals("ppc64")) {
+         PPC64ThreadContext context = (PPC64ThreadContext) thread.getContext();
+         Address sp = context.getRegisterAsAddress(PPC64ThreadContext.SP);
+         if (sp == null) return null;
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java	2024-01-10 05:19:49.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java	2024-01-30 10:00:13.241415036 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package sun.jvm.hotspot.debugger.linux;
+ 
+ import java.lang.reflect.*;
+@@ -30,6 +36,8 @@
+ import sun.jvm.hotspot.debugger.linux.x86.*;
+ import sun.jvm.hotspot.debugger.linux.ppc64.*;
+ import sun.jvm.hotspot.debugger.linux.sparc.*;
++import sun.jvm.hotspot.debugger.linux.mips64.*;
++import sun.jvm.hotspot.debugger.linux.loongarch64.*;
+ 
+ class LinuxThreadContextFactory {
+    static ThreadContext createThreadContext(LinuxDebugger dbg) {
+@@ -40,7 +48,11 @@
+          return new LinuxAMD64ThreadContext(dbg);
+       } else if (cpu.equals("sparc")) {
+          return new LinuxSPARCThreadContext(dbg);
+-      }  else if (cpu.equals("ppc64")) {
++      } else if (cpu.equals("mips64")) {
++         return new LinuxMIPS64ThreadContext(dbg);
++      } else if (cpu.equals("loongarch64")) {
++         return new LinuxLOONGARCH64ThreadContext(dbg);
++      } else if (cpu.equals("ppc64")) {
+           return new LinuxPPC64ThreadContext(dbg);
+       } else  {
+         try {
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java	2024-01-30 10:00:13.241415036 +0800
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.linux.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++import sun.jvm.hotspot.debugger.cdbg.basic.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++
++final public class LinuxLOONGARCH64CFrame extends BasicCFrame {
++   // package/class internals only
++   public LinuxLOONGARCH64CFrame(LinuxDebugger dbg, Address fp, Address pc) {
++      super(dbg.getCDebugger());
++      this.fp = fp;
++      this.pc = pc;
++      this.dbg = dbg;
++   }
++
++   // override base class impl to avoid ELF parsing
++   public ClosestSymbol closestSymbolToPC() {
++      // try native lookup in debugger.
++      return dbg.lookup(dbg.getAddressValue(pc()));
++   }
++
++   public Address pc() {
++      return pc;
++   }
++
++   public Address localVariableBase() {
++      return fp;
++   }
++
++   public CFrame sender(ThreadProxy thread) {
++      LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext();
++      Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
++      Address nextFP;
++      Address nextPC;
++
++      if ((fp == null) || fp.lessThan(sp)) {
++        return null;
++      }
++
++      try {
++        nextFP = fp.getAddressAt(-2 * ADDRESS_SIZE);
++      } catch (Exception e) {
++        return null;
++      }
++      if (nextFP == null) {
++        return null;
++      }
++
++      try {
++        nextPC  = fp.getAddressAt(-1 * ADDRESS_SIZE);
++      } catch (Exception e) {
++        return null;
++      }
++      if (nextPC == null) {
++        return null;
++      }
++
++      return new LinuxLOONGARCH64CFrame(dbg, nextFP, nextPC);
++   }
++
++   private static final int ADDRESS_SIZE = 8;
++   private Address pc;
++   private Address fp;
++   private LinuxDebugger dbg;
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java	2024-01-30 10:00:13.241415036 +0800
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.linux.*;
++
++public class LinuxLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext {
++  private LinuxDebugger debugger;
++
++  public LinuxLOONGARCH64ThreadContext(LinuxDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java	2024-01-30 10:00:13.241415036 +0800
+@@ -0,0 +1,80 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.linux.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++import sun.jvm.hotspot.debugger.cdbg.basic.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++
++final public class LinuxMIPS64CFrame extends BasicCFrame {
++   // package/class internals only
++   public LinuxMIPS64CFrame(LinuxDebugger dbg, Address ebp, Address pc) {
++      super(dbg.getCDebugger());
++      this.ebp = ebp;
++      this.pc = pc;
++      this.dbg = dbg;
++   }
++
++   // override base class impl to avoid ELF parsing
++   public ClosestSymbol closestSymbolToPC() {
++      // try native lookup in debugger.
++      return dbg.lookup(dbg.getAddressValue(pc()));
++   }
++
++   public Address pc() {
++      return pc;
++   }
++
++   public Address localVariableBase() {
++      return ebp;
++   }
++
++   public CFrame sender(ThreadProxy thread) {
++      MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext();
++      Address esp = context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++
++      if ( (ebp == null) || ebp.lessThan(esp) ) {
++        return null;
++      }
++
++      Address nextEBP = ebp.getAddressAt( 0 * ADDRESS_SIZE);
++      if (nextEBP == null) {
++        return null;
++      }
++      Address nextPC  = ebp.getAddressAt( 1 * ADDRESS_SIZE);
++      if (nextPC == null) {
++        return null;
++      }
++      return new LinuxMIPS64CFrame(dbg, nextEBP, nextPC);
++   }
++
++   private static final int ADDRESS_SIZE = 4;
++   private Address pc;
++   private Address ebp;
++   private LinuxDebugger dbg;
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java	2024-01-30 10:00:13.241415036 +0800
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.linux.*;
++
++public class LinuxMIPS64ThreadContext extends MIPS64ThreadContext {
++  private LinuxDebugger debugger;
++
++  public LinuxMIPS64ThreadContext(LinuxDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java	2024-01-30 10:00:13.241415036 +0800
+@@ -0,0 +1,128 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.loongarch64;
++
++import java.lang.annotation.Native;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++
++/** Specifies the thread context on loongarch64 platforms; only a sub-portion
++    of the context is guaranteed to be present on all operating
++    systems. */
++
++public abstract class LOONGARCH64ThreadContext implements ThreadContext {
++
++  // NOTE: the indices for the various registers must be maintained as
++  // listed across various operating systems. However, only a small
++  // subset of the registers' values are guaranteed to be present (and
++  // must be present for the SA's stack walking to work): EAX, EBX,
++  // ECX, EDX, ESI, EDI, EBP, ESP, and EIP.
++
++  // One instance of the Native annotation is enough to trigger header generation
++  // for this file.
++  @Native
++  public static final int ZERO = 0;
++  public static final int RA = 1;
++  public static final int TP = 2;
++  public static final int SP = 3;
++  public static final int A0 = 4;
++  public static final int A1 = 5;
++  public static final int A2 = 6;
++  public static final int A3 = 7;
++  public static final int A4 = 8;
++  public static final int A5 = 9;
++  public static final int A6 = 10;
++  public static final int A7 = 11;
++  public static final int T0 = 12;
++  public static final int T1 = 13;
++  public static final int T2 = 14;
++  public static final int T3 = 15;
++  public static final int T4 = 16;
++  public static final int T5 = 17;
++  public static final int T6 = 18;
++  public static final int T7 = 19;
++  public static final int T8 = 20;
++  public static final int RX = 21;
++  public static final int FP = 22;
++  public static final int S0 = 23;
++  public static final int S1 = 24;
++  public static final int S2 = 25;
++  public static final int S3 = 26;
++  public static final int S4 = 27;
++  public static final int S5 = 28;
++  public static final int S6 = 29;
++  public static final int S7 = 30;
++  public static final int S8 = 31;
++  public static final int PC = 32;
++  public static final int NPRGREG = 33;
++
++  private static final String[] regNames = {
++    "ZERO",    "RA",    "TP",    "SP",
++    "A0",      "A1",    "A2",    "A3",
++    "A4",      "A5",    "A6",    "A7",
++    "T0",      "T1",    "T2",    "T3",
++    "T4",      "T5",    "T6",    "T7",
++    "T8",      "RX",    "FP",    "S0",
++    "S1",      "S2",    "S3",    "S4",
++    "S5",      "S6",    "S7",    "S8",
++    "PC"
++  };
++
++  private long[] data;
++
++  public LOONGARCH64ThreadContext() {
++    data = new long[NPRGREG];
++  }
++
++  public int getNumRegisters() {
++    return NPRGREG;
++  }
++
++  public String getRegisterName(int index) {
++    return regNames[index];
++  }
++
++  public void setRegister(int index, long value) {
++    data[index] = value;
++  }
++
++  public long getRegister(int index) {
++    return data[index];
++  }
++
++  public CFrame getTopFrame(Debugger dbg) {
++    return null;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract void setRegisterAsAddress(int index, Address value);
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract Address getRegisterAsAddress(int index);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java	2024-01-30 10:00:13.234748449 +0800
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger;
++
++public class MachineDescriptionLOONGARCH64 extends MachineDescriptionTwosComplement implements MachineDescription {
++  public long getAddressSize() {
++    return 8;
++  }
++
++
++  public boolean isBigEndian() {
++    return false;
++  }
++
++  public boolean isLP64() {
++    return true;
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java	2024-01-30 10:00:13.234748449 +0800
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger;
++
++public class MachineDescriptionMIPS64 extends MachineDescriptionTwosComplement implements MachineDescription {
++  public long getAddressSize() {
++    return 8;
++  }
++
++
++  public boolean isBigEndian() {
++    return "big".equals(System.getProperty("sun.cpu.endian"));
++  }
++
++  public boolean isLP64() {
++    return true;
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java	2024-01-30 10:00:13.241415036 +0800
+@@ -0,0 +1,128 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.mips64;
++
++import java.lang.annotation.Native;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++
++/** Specifies the thread context on mips64 platforms; only a sub-portion
++    of the context is guaranteed to be present on all operating
++    systems. */
++
++public abstract class MIPS64ThreadContext implements ThreadContext {
++
++  // NOTE: the indices for the various registers must be maintained as
++  // listed across various operating systems. However, only a small
++  // subset of the registers' values are guaranteed to be present (and
++  // must be present for the SA's stack walking to work): EAX, EBX,
++  // ECX, EDX, ESI, EDI, EBP, ESP, and EIP.
++
++  // One instance of the Native annotation is enough to trigger header generation
++  // for this file.
++  @Native
++  public static final int ZERO = 0;
++  public static final int AT = 1;
++  public static final int V0 = 2;
++  public static final int V1 = 3;
++  public static final int A0 = 4;
++  public static final int A1 = 5;
++  public static final int A2 = 6;
++  public static final int A3 = 7;
++  public static final int T0 = 8;
++  public static final int T1 = 9;
++  public static final int T2 = 10;
++  public static final int T3 = 11;
++  public static final int T4 = 12;
++  public static final int T5 = 13;
++  public static final int T6 = 14;
++  public static final int T7 = 15;
++  public static final int S0 = 16;
++  public static final int S1 = 17;
++  public static final int S2 = 18;
++  public static final int S3 = 19;
++  public static final int S4 = 20;
++  public static final int S5 = 21;
++  public static final int S6 = 22;
++  public static final int S7 = 23;
++  public static final int T8 = 24;
++  public static final int T9 = 25;
++  public static final int K0 = 26;
++  public static final int K1 = 27;
++  public static final int GP = 28;
++  public static final int SP = 29;
++  public static final int FP = 30;
++  public static final int RA = 31;
++  public static final int PC = 32;
++  public static final int NPRGREG = 33;
++
++  private static final String[] regNames = {
++    "ZERO",    "AT",    "V0",    "V1",
++    "A0",      "A1",    "A2",    "A3",
++    "T0",      "T1",    "T2",    "T3",
++    "T4",      "T5",    "T6",    "T7",
++    "S0",      "S1",    "S2",    "S3",
++    "S4",      "S5",    "S6",    "S7",
++    "T8",      "T9",    "K0",    "K1",
++    "GP",      "SP",    "FP",    "RA",
++    "PC"
++  };
++
++  private long[] data;
++
++  public MIPS64ThreadContext() {
++    data = new long[NPRGREG];
++  }
++
++  public int getNumRegisters() {
++    return NPRGREG;
++  }
++
++  public String getRegisterName(int index) {
++    return regNames[index];
++  }
++
++  public void setRegister(int index, long value) {
++    data[index] = value;
++  }
++
++  public long getRegister(int index) {
++    return data[index];
++  }
++
++  public CFrame getTopFrame(Debugger dbg) {
++    return null;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract void setRegisterAsAddress(int index, Address value);
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public abstract Address getRegisterAsAddress(int index);
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java	2024-01-10 05:19:49.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java	2024-01-30 10:00:13.241415036 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package sun.jvm.hotspot.debugger.posix.elf;
+ 
+ import java.io.FileInputStream;
+@@ -63,6 +69,8 @@
+     public static final int ARCH_i860 = 7;
+     /** MIPS architecture type. */
+     public static final int ARCH_MIPS = 8;
++    /** LOONGARCH architecture type. */
++    public static final int ARCH_LOONGARCH = 9;
+ 
+     /** Returns a file type which is defined by the file type constants. */
+     public short getFileType();
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java	2024-01-30 10:00:13.241415036 +0800
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext {
++  private ProcDebugger debugger;
++
++  public ProcLOONGARCH64ThreadContext(ProcDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java	2024-01-30 10:00:13.241415036 +0800
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcLOONGARCH64ThreadFactory implements ProcThreadFactory {
++  private ProcDebugger debugger;
++
++  public ProcLOONGARCH64ThreadFactory(ProcDebugger debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new ProcLOONGARCH64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new ProcLOONGARCH64Thread(debugger, id);
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java	2024-01-30 10:00:13.241415036 +0800
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class ProcLOONGARCH64Thread implements ThreadProxy {
++  private ProcDebugger debugger;
++  private int         id;
++
++  public ProcLOONGARCH64Thread(ProcDebugger debugger, Address addr) {
++    this.debugger = debugger;
++
++    // FIXME: the size here should be configurable. However, making it
++    // so would produce a dependency on the "types" package from the
++    // debugger package, which is not desired.
++    this.id       = (int) addr.getCIntegerAt(0, 4, true);
++  }
++
++  public ProcLOONGARCH64Thread(ProcDebugger debugger, long id) {
++    this.debugger = debugger;
++    this.id = (int) id;
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    ProcLOONGARCH64ThreadContext context = new ProcLOONGARCH64ThreadContext(debugger);
++    long[] regs = debugger.getThreadIntegerRegisterSet(id);
++    /*
++       _NGREG in reg.h is defined to be 19. Because we have included
++       debug registers LOONGARCH64ThreadContext.NPRGREG is 25.
++    */
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length <= LOONGARCH64ThreadContext.NPRGREG, "size of register set is greater than " + LOONGARCH64ThreadContext.NPRGREG);
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++
++  public boolean canSetContext() throws DebuggerException {
++    return false;
++  }
++
++  public void setContext(ThreadContext context)
++    throws IllegalThreadStateException, DebuggerException {
++    throw new DebuggerException("Unimplemented");
++  }
++
++  public String toString() {
++    return "t@" + id;
++  }
++
++  public boolean equals(Object obj) {
++    if ((obj == null) || !(obj instanceof ProcLOONGARCH64Thread)) {
++      return false;
++    }
++
++    return (((ProcLOONGARCH64Thread) obj).id == id);
++  }
++
++  public int hashCode() {
++    return id;
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java	2024-01-30 10:00:13.241415036 +0800
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcMIPS64ThreadContext extends MIPS64ThreadContext {
++  private ProcDebugger debugger;
++
++  public ProcMIPS64ThreadContext(ProcDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java	2024-01-30 10:00:13.241415036 +0800
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcMIPS64ThreadFactory implements ProcThreadFactory {
++  private ProcDebugger debugger;
++
++  public ProcMIPS64ThreadFactory(ProcDebugger debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new ProcMIPS64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new ProcMIPS64Thread(debugger, id);
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java	2024-01-30 10:00:13.241415036 +0800
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class ProcMIPS64Thread implements ThreadProxy {
++  private ProcDebugger debugger;
++  private int         id;
++
++  public ProcMIPS64Thread(ProcDebugger debugger, Address addr) {
++    this.debugger = debugger;
++
++    // FIXME: the size here should be configurable. However, making it
++    // so would produce a dependency on the "types" package from the
++    // debugger package, which is not desired.
++    this.id       = (int) addr.getCIntegerAt(0, 4, true);
++  }
++
++  public ProcMIPS64Thread(ProcDebugger debugger, long id) {
++    this.debugger = debugger;
++    this.id = (int) id;
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    ProcMIPS64ThreadContext context = new ProcMIPS64ThreadContext(debugger);
++    long[] regs = debugger.getThreadIntegerRegisterSet(id);
++    /*
++       _NGREG in reg.h is defined to be 19. Because we have included
++       debug registers MIPS64ThreadContext.NPRGREG is 25.
++    */
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length <= MIPS64ThreadContext.NPRGREG, "size of register set is greater than " + MIPS64ThreadContext.NPRGREG);
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++
++  public boolean canSetContext() throws DebuggerException {
++    return false;
++  }
++
++  public void setContext(ThreadContext context)
++    throws IllegalThreadStateException, DebuggerException {
++    throw new DebuggerException("Unimplemented");
++  }
++
++  public String toString() {
++    return "t@" + id;
++  }
++
++  public boolean equals(Object obj) {
++    if ((obj == null) || !(obj instanceof ProcMIPS64Thread)) {
++      return false;
++    }
++
++    return (((ProcMIPS64Thread) obj).id == id);
++  }
++
++  public int hashCode() {
++    return id;
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java	2024-01-10 05:19:49.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java	2024-01-30 10:00:13.241415036 +0800
+@@ -32,11 +32,13 @@
+ import sun.jvm.hotspot.debugger.cdbg.*;
+ import sun.jvm.hotspot.debugger.proc.amd64.*;
+ import sun.jvm.hotspot.debugger.proc.aarch64.*;
++import sun.jvm.hotspot.debugger.proc.mips64.*;
+ import sun.jvm.hotspot.debugger.proc.sparc.*;
+ import sun.jvm.hotspot.debugger.proc.ppc64.*;
+ import sun.jvm.hotspot.debugger.proc.x86.*;
+ import sun.jvm.hotspot.debugger.ppc64.*;
+ import sun.jvm.hotspot.debugger.amd64.*;
++import sun.jvm.hotspot.debugger.mips64.*;
+ import sun.jvm.hotspot.debugger.aarch64.*;
+ import sun.jvm.hotspot.debugger.sparc.*;
+ import sun.jvm.hotspot.debugger.x86.*;
+@@ -90,6 +92,10 @@
+             threadFactory = new ProcAMD64ThreadFactory(this);
+             pcRegIndex = AMD64ThreadContext.RIP;
+             fpRegIndex = AMD64ThreadContext.RBP;
++        } else if (cpu.equals("mips64") || cpu.equals("mips64el")) {
++            threadFactory = new ProcMIPS64ThreadFactory(this);
++            pcRegIndex = MIPS64ThreadContext.PC;
++            fpRegIndex = MIPS64ThreadContext.FP;
+         } else if (cpu.equals("aarch64")) {
+             threadFactory = new ProcAARCH64ThreadFactory(this);
+             pcRegIndex = AARCH64ThreadContext.PC;
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java	2024-01-30 10:00:13.244748330 +0800
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteLOONGARCH64ThreadContext(RemoteDebuggerClient debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java	2024-01-30 10:00:13.244748330 +0800
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteLOONGARCH64ThreadFactory implements RemoteThreadFactory {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteLOONGARCH64ThreadFactory(RemoteDebuggerClient debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new RemoteLOONGARCH64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new RemoteLOONGARCH64Thread(debugger, id);
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java	2024-01-30 10:00:13.244748330 +0800
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class RemoteLOONGARCH64Thread extends RemoteThread  {
++  public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, Address addr) {
++     super(debugger, addr);
++  }
++
++  public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, long id) {
++     super(debugger, id);
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    RemoteLOONGARCH64ThreadContext context = new RemoteLOONGARCH64ThreadContext(debugger);
++    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
++                                  debugger.getThreadIntegerRegisterSet(id);
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length == LOONGARCH64ThreadContext.NPRGREG, "size of register set must match");
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java	2024-01-30 10:00:13.244748330 +0800
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteMIPS64ThreadContext extends MIPS64ThreadContext {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteMIPS64ThreadContext(RemoteDebuggerClient debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  /** This can't be implemented in this class since we would have to
++      tie the implementation to, for example, the debugging system */
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java	2024-01-30 10:00:13.244748330 +0800
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteMIPS64ThreadFactory implements RemoteThreadFactory {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteMIPS64ThreadFactory(RemoteDebuggerClient debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new RemoteMIPS64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new RemoteMIPS64Thread(debugger, id);
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java	2024-01-30 10:00:13.244748330 +0800
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class RemoteMIPS64Thread extends RemoteThread  {
++  public RemoteMIPS64Thread(RemoteDebuggerClient debugger, Address addr) {
++     super(debugger, addr);
++  }
++
++  public RemoteMIPS64Thread(RemoteDebuggerClient debugger, long id) {
++     super(debugger, id);
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    RemoteMIPS64ThreadContext context = new RemoteMIPS64ThreadContext(debugger);
++    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
++                                  debugger.getThreadIntegerRegisterSet(id);
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length == MIPS64ThreadContext.NPRGREG, "size of register set must match");
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java	2024-01-10 05:19:49.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java	2024-01-30 10:00:13.241415036 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package sun.jvm.hotspot.debugger.remote;
+ 
+ import java.rmi.*;
+@@ -34,6 +40,8 @@
+ import sun.jvm.hotspot.debugger.remote.x86.*;
+ import sun.jvm.hotspot.debugger.remote.amd64.*;
+ import sun.jvm.hotspot.debugger.remote.ppc64.*;
++import sun.jvm.hotspot.debugger.remote.mips64.*;
++import sun.jvm.hotspot.debugger.remote.loongarch64.*;
+ 
+ /** An implementation of Debugger which wraps a
+     RemoteDebugger, providing remote debugging via RMI.
+@@ -76,6 +84,16 @@
+         cachePageSize = 4096;
+         cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
+         unalignedAccessesOkay = true;
++      } else if (cpu.equals("mips64") || cpu.equals("mips64el")) {
++        threadFactory = new RemoteMIPS64ThreadFactory(this);
++        cachePageSize = 4096;
++        cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
++        unalignedAccessesOkay = true;
++      } else if (cpu.equals("loongarch64")) {
++        threadFactory = new RemoteLOONGARCH64ThreadFactory(this);
++        cachePageSize = 4096;
++        cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
++        unalignedAccessesOkay = true;
+       } else {
+         try {
+           Class tf = Class.forName("sun.jvm.hotspot.debugger.remote." +
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java	2024-01-10 05:19:49.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java	2024-01-30 10:00:13.228081861 +0800
+@@ -23,6 +23,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
+ package sun.jvm.hotspot;
+ 
+ import java.rmi.RemoteException;
+@@ -39,6 +45,8 @@
+ import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86;
+ import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit;
+ import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit;
++import sun.jvm.hotspot.debugger.MachineDescriptionMIPS64;
++import sun.jvm.hotspot.debugger.MachineDescriptionLOONGARCH64;
+ import sun.jvm.hotspot.debugger.NoSuchSymbolException;
+ import sun.jvm.hotspot.debugger.bsd.BsdDebuggerLocal;
+ import sun.jvm.hotspot.debugger.linux.LinuxDebuggerLocal;
+@@ -598,6 +606,10 @@
+             } else {
+                     machDesc = new MachineDescriptionSPARC32Bit();
+             }
++        } else if (cpu.equals("mips64")) {
++            machDesc = new MachineDescriptionMIPS64();
++        } else if (cpu.equals("loongarch64")) {
++            machDesc = new MachineDescriptionLOONGARCH64();
+         } else {
+           try {
+             machDesc = (MachineDescription)
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java	2024-01-30 10:00:13.264748090 +0800
+@@ -0,0 +1,133 @@
++/*
++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.linux_loongarch64;
++
++import java.io.*;
++import java.util.*;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.runtime.loongarch64.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class LinuxLOONGARCH64JavaThreadPDAccess implements JavaThreadPDAccess {
++  private static AddressField  lastJavaFPField;
++  private static AddressField  osThreadField;
++
++  // Field from OSThread
++  private static CIntegerField osThreadThreadIDField;
++
++  // This is currently unneeded but is being kept in case we change
++  // the currentFrameGuess algorithm
++  private static final long GUESS_SCAN_RANGE = 128 * 1024;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaThread");
++    osThreadField = type.getAddressField("_osthread");
++
++    Type anchorType = db.lookupType("JavaFrameAnchor");
++    lastJavaFPField = anchorType.getAddressField("_last_Java_fp");
++
++    Type osThreadType = db.lookupType("OSThread");
++    osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id");
++  }
++
++  public Address getLastJavaFP(Address addr) {
++    return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
++  }
++
++  public Address getLastJavaPC(Address addr) {
++    return null;
++  }
++
++  public Address getBaseOfStackPointer(Address addr) {
++    return null;
++  }
++
++  public Frame getLastFramePD(JavaThread thread, Address addr) {
++    Address fp = thread.getLastJavaFP();
++    if (fp == null) {
++      return null; // no information
++    }
++    return new LOONGARCH64Frame(thread.getLastJavaSP(), fp);
++  }
++
++  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
++    return new LOONGARCH64RegisterMap(thread, updateMap);
++  }
++
++  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext();
++    LOONGARCH64CurrentFrameGuess guesser = new LOONGARCH64CurrentFrameGuess(context, thread);
++    if (!guesser.run(GUESS_SCAN_RANGE)) {
++      return null;
++    }
++    if (guesser.getPC() == null) {
++      return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP());
++    } else {
++      return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
++    }
++  }
++
++  public void printThreadIDOn(Address addr, PrintStream tty) {
++    tty.print(getThreadProxy(addr));
++  }
++
++  public void printInfoOn(Address threadAddr, PrintStream tty) {
++    tty.print("Thread id: ");
++    printThreadIDOn(threadAddr, tty);
++    // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr));
++  }
++
++  public Address getLastSP(Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext();
++    return context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
++  }
++
++  public ThreadProxy getThreadProxy(Address addr) {
++    // Addr is the address of the JavaThread.
++    // Fetch the OSThread (for now and for simplicity, not making a
++    // separate "OSThread" class in this package)
++    Address osThreadAddr = osThreadField.getValue(addr);
++    // Get the address of the _thread_id from the OSThread
++    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
++
++    JVMDebugger debugger = VM.getVM().getDebugger();
++    return debugger.getThreadForIdentifierAddress(threadIdAddr);
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java	2024-01-30 10:00:13.264748090 +0800
+@@ -0,0 +1,133 @@
++/*
++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.linux_mips64;
++
++import java.io.*;
++import java.util.*;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.runtime.mips64.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class LinuxMIPS64JavaThreadPDAccess implements JavaThreadPDAccess {
++  private static AddressField  osThreadField;
++
++  // Field from OSThread
++  private static CIntegerField osThreadThreadIDField;
++
++  // This is currently unneeded but is being kept in case we change
++  // the currentFrameGuess algorithm
++  private static final long GUESS_SCAN_RANGE = 128 * 1024;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaThread");
++    osThreadField = type.getAddressField("_osthread");
++
++    Type osThreadType = db.lookupType("OSThread");
++    osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id");
++  }
++
++  public Address getLastJavaFP(Address addr) {
++    return null;
++  }
++
++  public Address getLastJavaPC(Address addr) {
++    return null;
++  }
++
++  public Address getBaseOfStackPointer(Address addr) {
++    return null;
++  }
++
++  public Frame getLastFramePD(JavaThread thread, Address addr) {
++    Address fp = thread.getLastJavaFP();
++    if (fp == null) {
++      return null; // no information
++    }
++    return new MIPS64Frame(thread.getLastJavaSP(), fp);
++  }
++
++  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
++    return new MIPS64RegisterMap(thread, updateMap);
++  }
++
++  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext();
++    MIPS64CurrentFrameGuess guesser = new MIPS64CurrentFrameGuess(context, thread);
++    if (!guesser.run(GUESS_SCAN_RANGE)) {
++      return null;
++    }
++    if (guesser.getPC() == null) {
++      return new MIPS64Frame(guesser.getSP(), guesser.getFP());
++    } else {
++      return new MIPS64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
++    }
++  }
++
++  public void printThreadIDOn(Address addr, PrintStream tty) {
++    tty.print(getThreadProxy(addr));
++  }
++
++  public void printInfoOn(Address threadAddr, PrintStream tty) {
++    tty.print("Thread id: ");
++    printThreadIDOn(threadAddr, tty);
++    // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr));
++  }
++
++  public Address getLastSP(Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext();
++    return context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++  }
++
++  public Address getLastFP(Address addr) {
++    return getLastSP(addr).getAddressAt(0);
++  }
++
++  public ThreadProxy getThreadProxy(Address addr) {
++    // Addr is the address of the JavaThread.
++    // Fetch the OSThread (for now and for simplicity, not making a
++    // separate "OSThread" class in this package)
++    Address osThreadAddr = osThreadField.getValue(addr);
++    // Get the address of the _thread_id from the OSThread
++    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
++
++    JVMDebugger debugger = VM.getVM().getDebugger();
++    return debugger.getThreadForIdentifierAddress(threadIdAddr);
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java	2024-01-30 10:00:13.264748090 +0800
+@@ -0,0 +1,250 @@
++/*
++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.loongarch64.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.interpreter.*;
++import sun.jvm.hotspot.runtime.*;
++
++/** <P> Should be able to be used on all loongarch64 platforms we support
++    (Win32, Solaris/loongarch64, and soon Linux) to implement JavaThread's
++    "currentFrameGuess()" functionality. Input is an LOONGARCH64ThreadContext;
++    output is SP, FP, and PC for an LOONGARCH64Frame. Instantiation of the
++    LOONGARCH64Frame is left to the caller, since we may need to subclass
++    LOONGARCH64Frame to support signal handler frames on Unix platforms. </P>
++
++    <P> Algorithm is to walk up the stack within a given range (say,
++    512K at most) looking for a plausible PC and SP for a Java frame,
++    also considering those coming in from the context. If we find a PC
++    that belongs to the VM (i.e., in generated code like the
++    interpreter or CodeCache) then we try to find an associated EBP.
++    We repeat this until we either find a complete frame or run out of
++    stack to look at. </P> */
++
++public class LOONGARCH64CurrentFrameGuess {
++  private LOONGARCH64ThreadContext context;
++  private JavaThread       thread;
++  private Address          spFound;
++  private Address          fpFound;
++  private Address          pcFound;
++
++  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG")
++                                       != null;
++
++  public LOONGARCH64CurrentFrameGuess(LOONGARCH64ThreadContext context,
++                              JavaThread thread) {
++    this.context = context;
++    this.thread  = thread;
++  }
++
++  /** Returns false if not able to find a frame within a reasonable range. */
++  public boolean run(long regionInBytesToSearch) {
++    Address sp  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
++    Address pc  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC);
++    Address fp  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP);
++    if (sp == null) {
++      // Bail out if no last java frame eithe
++      if (thread.getLastJavaSP() != null) {
++        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++        return true;
++      }
++      // Bail out
++      return false;
++    }
++    Address end = sp.addOffsetTo(regionInBytesToSearch);
++    VM vm       = VM.getVM();
++
++    setValues(null, null, null); // Assume we're not going to find anything
++
++    if (vm.isJavaPCDbg(pc)) {
++      if (vm.isClientCompiler()) {
++        // If the topmost frame is a Java frame, we are (pretty much)
++        // guaranteed to have a viable EBP. We should be more robust
++        // than this (we have the potential for losing entire threads'
++        // stack traces) but need to see how much work we really have
++        // to do here. Searching the stack for an (SP, FP) pair is
++        // hard since it's easy to misinterpret inter-frame stack
++        // pointers as base-of-frame pointers; we also don't know the
++        // sizes of C1 frames (not registered in the nmethod) so can't
++        // derive them from ESP.
++
++        setValues(sp, fp, pc);
++        return true;
++      } else {
++        if (vm.getInterpreter().contains(pc)) {
++          if (DEBUG) {
++            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
++                               sp + ", fp = " + fp + ", pc = " + pc);
++          }
++          setValues(sp, fp, pc);
++          return true;
++        }
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. In addition, an earlier attempt at a
++        // non-searching algorithm (see below) failed because the
++        // stack pointer from the thread context was pointing
++        // (considerably) beyond the ostensible end of the stack, into
++        // garbage; walking from the topmost frame back caused a crash.
++        //
++        // This algorithm takes the current PC as a given and tries to
++        // find the correct corresponding SP by walking up the stack
++        // and repeatedly performing stackwalks (very inefficient).
++        //
++        // FIXME: there is something wrong with stackwalking across
++        // adapter frames...this is likely to be the root cause of the
++        // failure with the simpler algorithm below.
++
++        for (long offset = 0;
++             offset < regionInBytesToSearch;
++             offset += vm.getAddressSize()) {
++          try {
++            Address curSP = sp.addOffsetTo(offset);
++            Frame frame = new LOONGARCH64Frame(curSP, null, pc);
++            RegisterMap map = thread.newRegisterMap(false);
++            while (frame != null) {
++              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
++                // We were able to traverse all the way to the
++                // bottommost Java frame.
++                // This sp looks good. Keep it.
++                if (DEBUG) {
++                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
++                }
++                setValues(curSP, null, pc);
++                return true;
++              }
++              frame = frame.sender(map);
++            }
++          } catch (Exception e) {
++            if (DEBUG) {
++              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
++            }
++            // Bad SP. Try another.
++          }
++        }
++
++        // Were not able to find a plausible SP to go with this PC.
++        // Bail out.
++        return false;
++
++        /*
++        // Original algorithm which does not work because SP was
++        // pointing beyond where it should have:
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. We see whether the PC is in the
++        // interpreter and take care of that, otherwise we run code
++        // (unfortunately) duplicated from LOONGARCH64Frame.senderForCompiledFrame.
++
++        CodeCache cc = vm.getCodeCache();
++        if (cc.contains(pc)) {
++          CodeBlob cb = cc.findBlob(pc);
++
++          // See if we can derive a frame pointer from SP and PC
++          // NOTE: This is the code duplicated from LOONGARCH64Frame
++          Address saved_fp = null;
++          int llink_offset = cb.getLinkOffset();
++          if (llink_offset >= 0) {
++            // Restore base-pointer, since next frame might be an interpreter frame.
++            Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset);
++            saved_fp = fp_addr.getAddressAt(0);
++          }
++
++          setValues(sp, saved_fp, pc);
++          return true;
++        }
++        */
++      }
++    } else {
++      // If the current program counter was not known to us as a Java
++      // PC, we currently assume that we are in the run-time system
++      // and attempt to look to thread-local storage for saved ESP and
++      // EBP. Note that if these are null (because we were, in fact,
++      // in Java code, i.e., vtable stubs or similar, and the SA
++      // didn't have enough insight into the target VM to understand
++      // that) then we are going to lose the entire stack trace for
++      // the thread, which is sub-optimal. FIXME.
++
++      if (DEBUG) {
++        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
++                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
++      }
++      if (thread.getLastJavaSP() == null) {
++        return false; // No known Java frames on stack
++      }
++
++      // The runtime has a nasty habit of not saving fp in the frame
++      // anchor, leaving us to grovel about in the stack to find a
++      // plausible address.  Fortunately, this only happens in
++      // compiled code; there we always have a valid PC, and we always
++      // push LR and FP onto the stack as a pair, with FP at the lower
++      // address.
++      pc = thread.getLastJavaPC();
++      fp = thread.getLastJavaFP();
++      sp = thread.getLastJavaSP();
++
++      if (fp == null) {
++        CodeCache cc = vm.getCodeCache();
++        if (cc.contains(pc)) {
++          CodeBlob cb = cc.findBlob(pc);
++          if (DEBUG) {
++            System.out.println("FP is null.  Found blob frame size " + cb.getFrameSize());
++          }
++          // See if we can derive a frame pointer from SP and PC
++          long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize();
++          if (link_offset >= 0) {
++            fp = sp.addOffsetTo(link_offset);
++          }
++        }
++      }
++
++      // We found a PC in the frame anchor. Check that it's plausible, and
++      // if it is, use it.
++      if (vm.isJavaPCDbg(pc)) {
++        setValues(sp, fp, pc);
++      } else {
++        setValues(sp, fp, null);
++      }
++
++      return true;
++    }
++  }
++
++  public Address getSP() { return spFound; }
++  public Address getFP() { return fpFound; }
++  /** May be null if getting values from thread-local storage; take
++      care to call the correct LOONGARCH64Frame constructor to recover this if
++      necessary */
++  public Address getPC() { return pcFound; }
++
++  private void setValues(Address sp, Address fp, Address pc) {
++    spFound = sp;
++    fpFound = fp;
++    pcFound = pc;
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java	2024-01-30 10:00:13.264748090 +0800
+@@ -0,0 +1,526 @@
++/*
++ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import java.util.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.compiler.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.oops.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++
++/** Specialization of and implementation of abstract methods of the
++    Frame class for the loongarch64 family of CPUs. */
++
++public class LOONGARCH64Frame extends Frame {
++  private static final boolean DEBUG;
++  static {
++    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") != null;
++  }
++
++  // Java frames
++  private static final int JAVA_FRAME_LINK_OFFSET             =  0;
++  private static final int JAVA_FRAME_RETURN_ADDR_OFFSET      =  1;
++  private static final int JAVA_FRAME_SENDER_SP_OFFSET        =  2;
++
++  // Native frames
++  private static final int NATIVE_FRAME_LINK_OFFSET           =  -2;
++  private static final int NATIVE_FRAME_RETURN_ADDR_OFFSET    =  -1;
++  private static final int NATIVE_FRAME_SENDER_SP_OFFSET      =  0;
++
++  // Interpreter frames
++  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1;
++  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_LOCALS_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_MIRROR_OFFSET    = INTERPRETER_FRAME_METHOD_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_MDX_OFFSET       = INTERPRETER_FRAME_MIRROR_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_CACHE_OFFSET     = INTERPRETER_FRAME_MDX_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_BCX_OFFSET       = INTERPRETER_FRAME_CACHE_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++  private static final int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++
++  // Entry frames
++  private static final int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -9;
++
++  private static VMReg fp = new VMReg(22 << 1);
++
++  // an additional field beyond sp and pc:
++  Address raw_fp; // frame pointer
++  private Address raw_unextendedSP;
++
++  private LOONGARCH64Frame() {
++  }
++
++  private void adjustForDeopt() {
++    if ( pc != null) {
++      // Look for a deopt pc and if it is deopted convert to original pc
++      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
++      if (cb != null && cb.isJavaMethod()) {
++        NMethod nm = (NMethod) cb;
++        if (pc.equals(nm.deoptHandlerBegin())) {
++          if (Assert.ASSERTS_ENABLED) {
++            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
++          }
++          // adjust pc if frame is deoptimized.
++          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
++          deoptimized = true;
++        }
++      }
++    }
++  }
++
++  public LOONGARCH64Frame(Address raw_sp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("LOONGARCH64Frame(sp, fp, pc): " + this);
++      dumpStack();
++    }
++  }
++
++  public LOONGARCH64Frame(Address raw_sp, Address raw_fp) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = raw_fp.getAddressAt(1 * VM.getVM().getAddressSize());
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("LOONGARCH64Frame(sp, fp): " + this);
++      dumpStack();
++    }
++  }
++
++  public LOONGARCH64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_unextendedSp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("LOONGARCH64Frame(sp, unextendedSP, fp, pc): " + this);
++      dumpStack();
++    }
++
++  }
++
++  public Object clone() {
++    LOONGARCH64Frame frame = new LOONGARCH64Frame();
++    frame.raw_sp = raw_sp;
++    frame.raw_unextendedSP = raw_unextendedSP;
++    frame.raw_fp = raw_fp;
++    frame.pc = pc;
++    frame.deoptimized = deoptimized;
++    return frame;
++  }
++
++  public boolean equals(Object arg) {
++    if (arg == null) {
++      return false;
++    }
++
++    if (!(arg instanceof LOONGARCH64Frame)) {
++      return false;
++    }
++
++    LOONGARCH64Frame other = (LOONGARCH64Frame) arg;
++
++    return (AddressOps.equal(getSP(), other.getSP()) &&
++            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
++            AddressOps.equal(getFP(), other.getFP()) &&
++            AddressOps.equal(getPC(), other.getPC()));
++  }
++
++  public int hashCode() {
++    if (raw_sp == null) {
++      return 0;
++    }
++
++    return raw_sp.hashCode();
++  }
++
++  public String toString() {
++    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
++         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
++         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
++         ", pc: " + (pc == null? "null" : pc.toString());
++  }
++
++  // accessors for the instance variables
++  public Address getFP() { return raw_fp; }
++  public Address getSP() { return raw_sp; }
++  public Address getID() { return raw_sp; }
++
++  // FIXME: not implemented yet (should be done for Solaris/LOONGARCH)
++  public boolean isSignalHandlerFrameDbg() { return false; }
++  public int     getSignalNumberDbg()      { return 0;     }
++  public String  getSignalNameDbg()        { return null;  }
++
++  public boolean isInterpretedFrameValid() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
++    }
++
++    // These are reasonable sanity checks
++    if (getFP() == null || getFP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getSP() == null || getSP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
++      return false;
++    }
++
++    // These are hacks to keep us out of trouble.
++    // The problem with these is that they mask other problems
++    if (getFP().lessThanOrEqual(getSP())) {
++      // this attempts to deal with unsigned comparison above
++      return false;
++    }
++
++    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
++      // stack frames shouldn't be large.
++      return false;
++    }
++
++    return true;
++  }
++
++  // FIXME: not applicable in current system
++  //  void    patch_pc(Thread* thread, address pc);
++
++  public Frame sender(RegisterMap regMap, CodeBlob cb) {
++    LOONGARCH64RegisterMap map = (LOONGARCH64RegisterMap) regMap;
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // Default is we done have to follow them. The sender_for_xxx will
++    // update it accordingly
++    map.setIncludeArgumentOops(false);
++
++    if (isEntryFrame())       return senderForEntryFrame(map);
++    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
++
++    if(cb == null) {
++      cb = VM.getVM().getCodeCache().findBlob(getPC());
++    } else {
++      if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
++      }
++    }
++
++    if (cb != null) {
++      return senderForCompiledFrame(map, cb);
++    }
++
++    // Must be native-compiled frame, i.e. the marshaling code for native
++    // methods that exists in the core system.
++    return new LOONGARCH64Frame(getSenderSP(), getLink(), getSenderPC());
++  }
++
++  private Frame senderForEntryFrame(LOONGARCH64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForEntryFrame");
++    }
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++    // Java frame called from C; skip all C frames and return top C
++    // frame of that chunk as the sender
++    LOONGARCH64JavaCallWrapper jcw = (LOONGARCH64JavaCallWrapper) getEntryFrameCallWrapper();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
++      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
++    }
++    LOONGARCH64Frame fr;
++    if (jcw.getLastJavaPC() != null) {
++      fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
++    } else {
++      fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
++    }
++    map.clear();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
++    }
++    return fr;
++  }
++
++  //------------------------------------------------------------------------------
++  // frame::adjust_unextended_sp
++  private void adjustUnextendedSP() {
++    // On loongarch, sites calling method handle intrinsics and lambda forms are treated
++    // as any other call site. Therefore, no special action is needed when we are
++    // returning to any of these call sites.
++
++    CodeBlob cb = cb();
++    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
++    if (senderNm != null) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (senderNm.isDeoptEntry(getPC()) ||
++          senderNm.isDeoptMhEntry(getPC())) {
++        // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
++      }
++    }
++  }
++
++  private Frame senderForInterpreterFrame(LOONGARCH64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForInterpreterFrame");
++    }
++    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++    Address sp = getSenderSP();
++    // We do not need to update the callee-save register mapping because above
++    // us is either another interpreter frame or a converter-frame, but never
++    // directly a compiled frame.
++    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
++    // However c2 no longer uses callee save register for java calls so there
++    // are no callee register to find.
++
++    if (map.getUpdateMap())
++      updateMapWithSavedLink(map, addressOfStackSlot(JAVA_FRAME_LINK_OFFSET));
++
++    return new LOONGARCH64Frame(sp, unextendedSP, getLink(), getSenderPC());
++  }
++
++  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
++    map.setLocation(fp, savedFPAddr);
++  }
++
++  private Frame senderForCompiledFrame(LOONGARCH64RegisterMap map, CodeBlob cb) {
++    if (DEBUG) {
++      System.out.println("senderForCompiledFrame");
++    }
++
++    //
++    // NOTE: some of this code is (unfortunately) duplicated in LOONGARCH64CurrentFrameGuess
++    //
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // frame owned by optimizing compiler
++    if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
++    }
++    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
++
++    // On Intel the return_address is always the word on the stack
++    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
++
++    // This is the saved value of EBP which may or may not really be an FP.
++    // It is only an FP if the sender is an interpreter frame (or C1?).
++    Address savedFPAddr = senderSP.addOffsetTo(- JAVA_FRAME_SENDER_SP_OFFSET * VM.getVM().getAddressSize());
++
++    if (map.getUpdateMap()) {
++      // Tell GC to use argument oopmaps for some runtime stubs that need it.
++      // For C1, the runtime stub might not have oop maps, so set this flag
++      // outside of update_register_map.
++      map.setIncludeArgumentOops(cb.callerMustGCArguments());
++
++      if (cb.getOopMaps() != null) {
++        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
++      }
++
++      // Since the prolog does the save and restore of EBP there is no oopmap
++      // for it so we must fill in its location as if there was an oopmap entry
++      // since if our caller was compiled code there could be live jvm state in it.
++      updateMapWithSavedLink(map, savedFPAddr);
++    }
++
++    return new LOONGARCH64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
++  }
++
++  protected boolean hasSenderPD() {
++    // FIXME
++    // Check for null ebp? Need to do some tests.
++    return true;
++  }
++
++  public long frameSize() {
++    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
++  }
++
++  public Address getLink() {
++    if (isJavaFrame())
++      return addressOfStackSlot(JAVA_FRAME_LINK_OFFSET).getAddressAt(0);
++    return addressOfStackSlot(NATIVE_FRAME_LINK_OFFSET).getAddressAt(0);
++  }
++
++  public Address getUnextendedSP() { return raw_unextendedSP; }
++
++  // Return address:
++  public Address getSenderPCAddr() {
++    if (isJavaFrame())
++      return addressOfStackSlot(JAVA_FRAME_RETURN_ADDR_OFFSET);
++    return addressOfStackSlot(NATIVE_FRAME_RETURN_ADDR_OFFSET);
++  }
++
++  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
++
++  public Address getSenderSP()     {
++    if (isJavaFrame())
++      return addressOfStackSlot(JAVA_FRAME_SENDER_SP_OFFSET);
++    return addressOfStackSlot(NATIVE_FRAME_SENDER_SP_OFFSET);
++  }
++
++  public Address addressOfInterpreterFrameLocals() {
++    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
++  }
++
++  private Address addressOfInterpreterFrameBCX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
++  }
++
++  public int getInterpreterFrameBCI() {
++    // FIXME: this is not atomic with respect to GC and is unsuitable
++    // for use in a non-debugging, or reflective, system. Need to
++    // figure out how to express this.
++    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
++    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
++    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
++    return bcpToBci(bcp, method);
++  }
++
++  public Address addressOfInterpreterFrameMDX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
++  }
++
++  // FIXME
++  //inline int frame::interpreter_frame_monitor_size() {
++  //  return BasicObjectLock::size();
++  //}
++
++  // expression stack
++  // (the max_stack arguments are used by the GC; see class FrameClosure)
++
++  public Address addressOfInterpreterFrameExpressionStack() {
++    Address monitorEnd = interpreterFrameMonitorEnd().address();
++    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
++  }
++
++  public int getInterpreterFrameExpressionStackDirection() { return -1; }
++
++  // top of expression stack
++  public Address addressOfInterpreterFrameTOS() {
++    return getSP();
++  }
++
++  /** Expression stack from top down */
++  public Address addressOfInterpreterFrameTOSAt(int slot) {
++    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
++  }
++
++  public Address getInterpreterFrameSenderSP() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "interpreted frame expected");
++    }
++    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++  }
++
++  // Monitors
++  public BasicObjectLock interpreterFrameMonitorBegin() {
++    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
++  }
++
++  public BasicObjectLock interpreterFrameMonitorEnd() {
++    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
++    if (Assert.ASSERTS_ENABLED) {
++      // make sure the pointer points inside the frame
++      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
++      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
++    }
++    return new BasicObjectLock(result);
++  }
++
++  public int interpreterFrameMonitorSize() {
++    return BasicObjectLock.size();
++  }
++
++  // Method
++  public Address addressOfInterpreterFrameMethod() {
++    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
++  }
++
++  // Constant pool cache
++  public Address addressOfInterpreterFrameCPCache() {
++    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
++  }
++
++  // Entry frames
++  public JavaCallWrapper getEntryFrameCallWrapper() {
++    return new LOONGARCH64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
++  }
++
++  protected Address addressOfSavedOopResult() {
++    // offset is 2 for compiler2 and 3 for compiler1
++    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
++                               VM.getVM().getAddressSize());
++  }
++
++  protected Address addressOfSavedReceiver() {
++    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++  }
++
++  private void dumpStack() {
++    if (getFP() != null) {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    } else {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    }
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java	2024-01-30 10:00:13.264748090 +0800
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class LOONGARCH64JavaCallWrapper extends JavaCallWrapper {
++  private static AddressField lastJavaFPField;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaFrameAnchor");
++
++    lastJavaFPField  = type.getAddressField("_last_Java_fp");
++  }
++
++  public LOONGARCH64JavaCallWrapper(Address addr) {
++    super(addr);
++  }
++
++  public Address getLastJavaFP() {
++    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java	2024-01-30 10:00:13.264748090 +0800
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.loongarch64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class LOONGARCH64RegisterMap extends RegisterMap {
++
++  /** This is the only public constructor */
++  public LOONGARCH64RegisterMap(JavaThread thread, boolean updateMap) {
++    super(thread, updateMap);
++  }
++
++  protected LOONGARCH64RegisterMap(RegisterMap map) {
++    super(map);
++  }
++
++  public Object clone() {
++    LOONGARCH64RegisterMap retval = new LOONGARCH64RegisterMap(this);
++    return retval;
++  }
++
++  // no PD state to clear or copy:
++  protected void clearPD() {}
++  protected void initializePD() {}
++  protected void initializeFromPD(RegisterMap map) {}
++  protected Address getLocationPD(VMReg reg) { return null; }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java	2024-01-30 10:00:13.264748090 +0800
+@@ -0,0 +1,217 @@
++/*
++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.mips64.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.interpreter.*;
++import sun.jvm.hotspot.runtime.*;
++
++/** <P> Should be able to be used on all mips64 platforms we support
++    (Win32, Solaris/mips64, and soon Linux) to implement JavaThread's
++    "currentFrameGuess()" functionality. Input is an MIPS64ThreadContext;
++    output is SP, FP, and PC for an MIPS64Frame. Instantiation of the
++    MIPS64Frame is left to the caller, since we may need to subclass
++    MIPS64Frame to support signal handler frames on Unix platforms. </P>
++
++    <P> Algorithm is to walk up the stack within a given range (say,
++    512K at most) looking for a plausible PC and SP for a Java frame,
++    also considering those coming in from the context. If we find a PC
++    that belongs to the VM (i.e., in generated code like the
++    interpreter or CodeCache) then we try to find an associated EBP.
++    We repeat this until we either find a complete frame or run out of
++    stack to look at. </P> */
++
++public class MIPS64CurrentFrameGuess {
++  private MIPS64ThreadContext context;
++  private JavaThread       thread;
++  private Address          spFound;
++  private Address          fpFound;
++  private Address          pcFound;
++
++  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG")
++                                       != null;
++
++  public MIPS64CurrentFrameGuess(MIPS64ThreadContext context,
++                              JavaThread thread) {
++    this.context = context;
++    this.thread  = thread;
++  }
++
++  /** Returns false if not able to find a frame within a reasonable range. */
++  public boolean run(long regionInBytesToSearch) {
++    Address sp  = context.getRegisterAsAddress(MIPS64ThreadContext.SP);
++    Address pc  = context.getRegisterAsAddress(MIPS64ThreadContext.PC);
++    Address fp  = context.getRegisterAsAddress(MIPS64ThreadContext.FP);
++    if (sp == null) {
++      // Bail out if no last java frame eithe
++      if (thread.getLastJavaSP() != null) {
++        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++        return true;
++      }
++      // Bail out
++      return false;
++    }
++    Address end = sp.addOffsetTo(regionInBytesToSearch);
++    VM vm       = VM.getVM();
++
++    setValues(null, null, null); // Assume we're not going to find anything
++
++    if (vm.isJavaPCDbg(pc)) {
++      if (vm.isClientCompiler()) {
++        // If the topmost frame is a Java frame, we are (pretty much)
++        // guaranteed to have a viable EBP. We should be more robust
++        // than this (we have the potential for losing entire threads'
++        // stack traces) but need to see how much work we really have
++        // to do here. Searching the stack for an (SP, FP) pair is
++        // hard since it's easy to misinterpret inter-frame stack
++        // pointers as base-of-frame pointers; we also don't know the
++        // sizes of C1 frames (not registered in the nmethod) so can't
++        // derive them from ESP.
++
++        setValues(sp, fp, pc);
++        return true;
++      } else {
++        if (vm.getInterpreter().contains(pc)) {
++          if (DEBUG) {
++            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
++                               sp + ", fp = " + fp + ", pc = " + pc);
++          }
++          setValues(sp, fp, pc);
++          return true;
++        }
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. In addition, an earlier attempt at a
++        // non-searching algorithm (see below) failed because the
++        // stack pointer from the thread context was pointing
++        // (considerably) beyond the ostensible end of the stack, into
++        // garbage; walking from the topmost frame back caused a crash.
++        //
++        // This algorithm takes the current PC as a given and tries to
++        // find the correct corresponding SP by walking up the stack
++        // and repeatedly performing stackwalks (very inefficient).
++        //
++        // FIXME: there is something wrong with stackwalking across
++        // adapter frames...this is likely to be the root cause of the
++        // failure with the simpler algorithm below.
++
++        for (long offset = 0;
++             offset < regionInBytesToSearch;
++             offset += vm.getAddressSize()) {
++          try {
++            Address curSP = sp.addOffsetTo(offset);
++            Frame frame = new MIPS64Frame(curSP, null, pc);
++            RegisterMap map = thread.newRegisterMap(false);
++            while (frame != null) {
++              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
++                // We were able to traverse all the way to the
++                // bottommost Java frame.
++                // This sp looks good. Keep it.
++                if (DEBUG) {
++                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
++                }
++                setValues(curSP, null, pc);
++                return true;
++              }
++              frame = frame.sender(map);
++            }
++          } catch (Exception e) {
++            if (DEBUG) {
++              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
++            }
++            // Bad SP. Try another.
++          }
++        }
++
++        // Were not able to find a plausible SP to go with this PC.
++        // Bail out.
++        return false;
++
++        /*
++        // Original algorithm which does not work because SP was
++        // pointing beyond where it should have:
++
++        // For the server compiler, EBP is not guaranteed to be valid
++        // for compiled code. We see whether the PC is in the
++        // interpreter and take care of that, otherwise we run code
++        // (unfortunately) duplicated from MIPS64Frame.senderForCompiledFrame.
++
++        CodeCache cc = vm.getCodeCache();
++        if (cc.contains(pc)) {
++          CodeBlob cb = cc.findBlob(pc);
++
++          // See if we can derive a frame pointer from SP and PC
++          // NOTE: This is the code duplicated from MIPS64Frame
++          Address saved_fp = null;
++          int llink_offset = cb.getLinkOffset();
++          if (llink_offset >= 0) {
++            // Restore base-pointer, since next frame might be an interpreter frame.
++            Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset);
++            saved_fp = fp_addr.getAddressAt(0);
++          }
++
++          setValues(sp, saved_fp, pc);
++          return true;
++        }
++        */
++      }
++    } else {
++      // If the current program counter was not known to us as a Java
++      // PC, we currently assume that we are in the run-time system
++      // and attempt to look to thread-local storage for saved ESP and
++      // EBP. Note that if these are null (because we were, in fact,
++      // in Java code, i.e., vtable stubs or similar, and the SA
++      // didn't have enough insight into the target VM to understand
++      // that) then we are going to lose the entire stack trace for
++      // the thread, which is sub-optimal. FIXME.
++
++      if (DEBUG) {
++        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
++                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
++      }
++      if (thread.getLastJavaSP() == null) {
++        return false; // No known Java frames on stack
++      }
++      setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++      return true;
++    }
++  }
++
++  public Address getSP() { return spFound; }
++  public Address getFP() { return fpFound; }
++  /** May be null if getting values from thread-local storage; take
++      care to call the correct MIPS64Frame constructor to recover this if
++      necessary */
++  public Address getPC() { return pcFound; }
++
++  private void setValues(Address sp, Address fp, Address pc) {
++    spFound = sp;
++    fpFound = fp;
++    pcFound = pc;
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java	2024-01-30 10:00:13.264748090 +0800
+@@ -0,0 +1,537 @@
++/*
++ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import java.util.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.compiler.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.oops.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++
++/** Specialization of and implementation of abstract methods of the
++    Frame class for the mips64 family of CPUs. */
++
++public class MIPS64Frame extends Frame {
++  private static final boolean DEBUG;
++  static {
++    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") != null;
++  }
++
++  // All frames
++  private static final int LINK_OFFSET                =  0;
++  private static final int RETURN_ADDR_OFFSET         =  1;
++  private static final int SENDER_SP_OFFSET           =  2;
++
++  // Interpreter frames
++  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1;
++  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
++  private static       int INTERPRETER_FRAME_MIRROR_OFFSET;
++  private static       int INTERPRETER_FRAME_MDX_OFFSET;         // Non-core builds only
++  private static       int INTERPRETER_FRAME_CACHE_OFFSET;
++  private static       int INTERPRETER_FRAME_LOCALS_OFFSET;
++  private static       int INTERPRETER_FRAME_BCX_OFFSET;
++  private static       int INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET;
++  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
++
++  // Entry frames
++  private static       int ENTRY_FRAME_CALL_WRAPPER_OFFSET;
++
++  private static VMReg rbp;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    INTERPRETER_FRAME_MIRROR_OFFSET               = INTERPRETER_FRAME_METHOD_OFFSET - 1;
++    INTERPRETER_FRAME_MDX_OFFSET                  = INTERPRETER_FRAME_MIRROR_OFFSET - 1;
++    INTERPRETER_FRAME_CACHE_OFFSET                = INTERPRETER_FRAME_MDX_OFFSET - 1;
++    INTERPRETER_FRAME_LOCALS_OFFSET               = INTERPRETER_FRAME_CACHE_OFFSET - 1;
++    INTERPRETER_FRAME_BCX_OFFSET                  = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
++    INTERPRETER_FRAME_INITIAL_SP_OFFSET           = INTERPRETER_FRAME_BCX_OFFSET - 1;
++    INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET    = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++    INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++
++    ENTRY_FRAME_CALL_WRAPPER_OFFSET = db.lookupIntConstant("frame::entry_frame_call_wrapper_offset");
++    if (VM.getVM().getAddressSize() == 4) {
++      rbp = new VMReg(5);
++    } else {
++      rbp = new VMReg(5 << 1);
++    }
++  }
++
++
++  // an additional field beyond sp and pc:
++  Address raw_fp; // frame pointer
++  private Address raw_unextendedSP;
++
++  private MIPS64Frame() {
++  }
++
++  private void adjustForDeopt() {
++    if ( pc != null) {
++      // Look for a deopt pc and if it is deopted convert to original pc
++      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
++      if (cb != null && cb.isJavaMethod()) {
++        NMethod nm = (NMethod) cb;
++        if (pc.equals(nm.deoptHandlerBegin())) {
++          if (Assert.ASSERTS_ENABLED) {
++            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
++          }
++          // adjust pc if frame is deoptimized.
++          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
++          deoptimized = true;
++        }
++      }
++    }
++  }
++
++  public MIPS64Frame(Address raw_sp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("MIPS64Frame(sp, fp, pc): " + this);
++      dumpStack();
++    }
++  }
++
++  public MIPS64Frame(Address raw_sp, Address raw_fp) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("MIPS64Frame(sp, fp): " + this);
++      dumpStack();
++    }
++  }
++
++  public MIPS64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_unextendedSp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("MIPS64Frame(sp, unextendedSP, fp, pc): " + this);
++      dumpStack();
++    }
++
++  }
++
++  public Object clone() {
++    MIPS64Frame frame = new MIPS64Frame();
++    frame.raw_sp = raw_sp;
++    frame.raw_unextendedSP = raw_unextendedSP;
++    frame.raw_fp = raw_fp;
++    frame.pc = pc;
++    frame.deoptimized = deoptimized;
++    return frame;
++  }
++
++  public boolean equals(Object arg) {
++    if (arg == null) {
++      return false;
++    }
++
++    if (!(arg instanceof MIPS64Frame)) {
++      return false;
++    }
++
++    MIPS64Frame other = (MIPS64Frame) arg;
++
++    return (AddressOps.equal(getSP(), other.getSP()) &&
++            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
++            AddressOps.equal(getFP(), other.getFP()) &&
++            AddressOps.equal(getPC(), other.getPC()));
++  }
++
++  public int hashCode() {
++    if (raw_sp == null) {
++      return 0;
++    }
++
++    return raw_sp.hashCode();
++  }
++
++  public String toString() {
++    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
++         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
++         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
++         ", pc: " + (pc == null? "null" : pc.toString());
++  }
++
++  // accessors for the instance variables
++  public Address getFP() { return raw_fp; }
++  public Address getSP() { return raw_sp; }
++  public Address getID() { return raw_sp; }
++
++  // FIXME: not implemented yet (should be done for Solaris/MIPS)
++  public boolean isSignalHandlerFrameDbg() { return false; }
++  public int     getSignalNumberDbg()      { return 0;     }
++  public String  getSignalNameDbg()        { return null;  }
++
++  public boolean isInterpretedFrameValid() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
++    }
++
++    // These are reasonable sanity checks
++    if (getFP() == null || getFP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getSP() == null || getSP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
++      return false;
++    }
++
++    // These are hacks to keep us out of trouble.
++    // The problem with these is that they mask other problems
++    if (getFP().lessThanOrEqual(getSP())) {
++      // this attempts to deal with unsigned comparison above
++      return false;
++    }
++
++    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
++      // stack frames shouldn't be large.
++      return false;
++    }
++
++    return true;
++  }
++
++  // FIXME: not applicable in current system
++  //  void    patch_pc(Thread* thread, address pc);
++
++  public Frame sender(RegisterMap regMap, CodeBlob cb) {
++    MIPS64RegisterMap map = (MIPS64RegisterMap) regMap;
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // Default is we done have to follow them. The sender_for_xxx will
++    // update it accordingly
++    map.setIncludeArgumentOops(false);
++
++    if (isEntryFrame())       return senderForEntryFrame(map);
++    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
++
++    if(cb == null) {
++      cb = VM.getVM().getCodeCache().findBlob(getPC());
++    } else {
++      if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
++      }
++    }
++
++    if (cb != null) {
++      return senderForCompiledFrame(map, cb);
++    }
++
++    // Must be native-compiled frame, i.e. the marshaling code for native
++    // methods that exists in the core system.
++    return new MIPS64Frame(getSenderSP(), getLink(), getSenderPC());
++  }
++
++  private Frame senderForEntryFrame(MIPS64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForEntryFrame");
++    }
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++    // Java frame called from C; skip all C frames and return top C
++    // frame of that chunk as the sender
++    MIPS64JavaCallWrapper jcw = (MIPS64JavaCallWrapper) getEntryFrameCallWrapper();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
++      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
++    }
++    MIPS64Frame fr;
++    if (jcw.getLastJavaPC() != null) {
++      fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
++    } else {
++      fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
++    }
++    map.clear();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
++    }
++    return fr;
++  }
++
++  //------------------------------------------------------------------------------
++  // frame::adjust_unextended_sp
++  private void adjustUnextendedSP() {
++    // On mips, sites calling method handle intrinsics and lambda forms are treated
++    // as any other call site. Therefore, no special action is needed when we are
++    // returning to any of these call sites.
++
++    CodeBlob cb = cb();
++    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
++    if (senderNm != null) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (senderNm.isDeoptEntry(getPC()) ||
++          senderNm.isDeoptMhEntry(getPC())) {
++        // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
++      }
++    }
++  }
++
++  private Frame senderForInterpreterFrame(MIPS64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForInterpreterFrame");
++    }
++    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++    Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
++    // We do not need to update the callee-save register mapping because above
++    // us is either another interpreter frame or a converter-frame, but never
++    // directly a compiled frame.
++    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
++    // However c2 no longer uses callee save register for java calls so there
++    // are no callee register to find.
++
++    if (map.getUpdateMap())
++      updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
++
++    return new MIPS64Frame(sp, unextendedSP, getLink(), getSenderPC());
++  }
++
++  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
++    map.setLocation(rbp, savedFPAddr);
++  }
++
++  private Frame senderForCompiledFrame(MIPS64RegisterMap map, CodeBlob cb) {
++    if (DEBUG) {
++      System.out.println("senderForCompiledFrame");
++    }
++
++    //
++    // NOTE: some of this code is (unfortunately) duplicated in MIPS64CurrentFrameGuess
++    //
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // frame owned by optimizing compiler
++    if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
++    }
++    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
++
++    // On Intel the return_address is always the word on the stack
++    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
++
++    // This is the saved value of EBP which may or may not really be an FP.
++    // It is only an FP if the sender is an interpreter frame (or C1?).
++    Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize());
++
++    if (map.getUpdateMap()) {
++      // Tell GC to use argument oopmaps for some runtime stubs that need it.
++      // For C1, the runtime stub might not have oop maps, so set this flag
++      // outside of update_register_map.
++      map.setIncludeArgumentOops(cb.callerMustGCArguments());
++
++      if (cb.getOopMaps() != null) {
++        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
++      }
++
++      // Since the prolog does the save and restore of EBP there is no oopmap
++      // for it so we must fill in its location as if there was an oopmap entry
++      // since if our caller was compiled code there could be live jvm state in it.
++      updateMapWithSavedLink(map, savedFPAddr);
++    }
++
++    return new MIPS64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
++  }
++
++  protected boolean hasSenderPD() {
++    // FIXME
++    // Check for null ebp? Need to do some tests.
++    return true;
++  }
++
++  public long frameSize() {
++    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
++  }
++
++  public Address getLink() {
++    return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
++  }
++
++  public Address getUnextendedSP() { return raw_unextendedSP; }
++
++  // Return address:
++  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
++  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
++
++  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
++
++  public Address addressOfInterpreterFrameLocals() {
++    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
++  }
++
++  private Address addressOfInterpreterFrameBCX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
++  }
++
++  public int getInterpreterFrameBCI() {
++    // FIXME: this is not atomic with respect to GC and is unsuitable
++    // for use in a non-debugging, or reflective, system. Need to
++    // figure out how to express this.
++    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
++    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
++    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
++    return bcpToBci(bcp, method);
++  }
++
++  public Address addressOfInterpreterFrameMDX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
++  }
++
++  // FIXME
++  //inline int frame::interpreter_frame_monitor_size() {
++  //  return BasicObjectLock::size();
++  //}
++
++  // expression stack
++  // (the max_stack arguments are used by the GC; see class FrameClosure)
++
++  public Address addressOfInterpreterFrameExpressionStack() {
++    Address monitorEnd = interpreterFrameMonitorEnd().address();
++    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
++  }
++
++  public int getInterpreterFrameExpressionStackDirection() { return -1; }
++
++  // top of expression stack
++  public Address addressOfInterpreterFrameTOS() {
++    return getSP();
++  }
++
++  /** Expression stack from top down */
++  public Address addressOfInterpreterFrameTOSAt(int slot) {
++    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
++  }
++
++  public Address getInterpreterFrameSenderSP() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "interpreted frame expected");
++    }
++    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++  }
++
++  // Monitors
++  public BasicObjectLock interpreterFrameMonitorBegin() {
++    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
++  }
++
++  public BasicObjectLock interpreterFrameMonitorEnd() {
++    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
++    if (Assert.ASSERTS_ENABLED) {
++      // make sure the pointer points inside the frame
++      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
++      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
++    }
++    return new BasicObjectLock(result);
++  }
++
++  public int interpreterFrameMonitorSize() {
++    return BasicObjectLock.size();
++  }
++
++  // Method
++  public Address addressOfInterpreterFrameMethod() {
++    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
++  }
++
++  // Constant pool cache
++  public Address addressOfInterpreterFrameCPCache() {
++    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
++  }
++
++  // Entry frames
++  public JavaCallWrapper getEntryFrameCallWrapper() {
++    return new MIPS64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
++  }
++
++  protected Address addressOfSavedOopResult() {
++    // offset is 2 for compiler2 and 3 for compiler1
++    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
++                               VM.getVM().getAddressSize());
++  }
++
++  protected Address addressOfSavedReceiver() {
++    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++  }
++
++  private void dumpStack() {
++    if (getFP() != null) {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    } else {
++      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
++           AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
++           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++        System.out.println(addr + ": " + addr.getAddressAt(0));
++      }
++    }
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java	2024-01-30 10:00:13.264748090 +0800
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class MIPS64JavaCallWrapper extends JavaCallWrapper {
++  private static AddressField lastJavaFPField;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaFrameAnchor");
++
++    lastJavaFPField  = type.getAddressField("_last_Java_fp");
++  }
++
++  public MIPS64JavaCallWrapper(Address addr) {
++    super(addr);
++  }
++
++  public Address getLastJavaFP() {
++    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
++  }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java	2024-01-30 10:00:13.268081384 +0800
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class MIPS64RegisterMap extends RegisterMap {
++
++  /** This is the only public constructor */
++  public MIPS64RegisterMap(JavaThread thread, boolean updateMap) {
++    super(thread, updateMap);
++  }
++
++  protected MIPS64RegisterMap(RegisterMap map) {
++    super(map);
++  }
++
++  public Object clone() {
++    MIPS64RegisterMap retval = new MIPS64RegisterMap(this);
++    return retval;
++  }
++
++  // no PD state to clear or copy:
++  protected void clearPD() {}
++  protected void initializePD() {}
++  protected void initializeFromPD(RegisterMap map) {}
++  protected Address getLocationPD(VMReg reg) { return null; }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java	2024-01-10 05:19:49.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java	2024-01-30 10:00:13.264748090 +0800
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package sun.jvm.hotspot.runtime;
+ 
+ import java.util.*;
+@@ -39,6 +45,8 @@
+ import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess;
++import sun.jvm.hotspot.runtime.linux_mips64.LinuxMIPS64JavaThreadPDAccess;
++import sun.jvm.hotspot.runtime.linux_loongarch64.LinuxLOONGARCH64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.bsd_amd64.BsdAMD64JavaThreadPDAccess;
+@@ -99,6 +107,10 @@
+                 access = new LinuxPPC64JavaThreadPDAccess();
+             } else if (cpu.equals("aarch64")) {
+                 access = new LinuxAARCH64JavaThreadPDAccess();
++            } else if (cpu.equals("mips64")) {
++                access = new LinuxMIPS64JavaThreadPDAccess();
++            } else if (cpu.equals("loongarch64")) {
++                access = new LinuxLOONGARCH64JavaThreadPDAccess();
+             } else {
+               try {
+                 access = (JavaThreadPDAccess)
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java	2024-01-10 05:19:49.000000000 +0800
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java	2024-01-30 10:00:13.274747971 +0800
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
++
+ package sun.jvm.hotspot.utilities;
+ 
+ /** Provides canonicalized OS and CPU information for the rest of the
+@@ -54,7 +61,7 @@
+ 
+   public static boolean knownCPU(String cpu) {
+     final String[] KNOWN =
+-        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"};
++        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "mips64", "mips64el", "loongarch64"};
+ 
+     for(String s : KNOWN) {
+       if(s.equals(cpu))
+@@ -101,6 +108,12 @@
+     if (cpu.equals("ppc64le"))
+       return "ppc64";
+ 
++    if (cpu.equals("mips64el"))
++      return "mips64";
++
++    if (cpu.equals("loongarch64"))
++      return "loongarch64";
++
+     return cpu;
+ 
+   }
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java
+--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java	2024-01-30 10:00:13.304747615 +0800
+@@ -0,0 +1,220 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
++
++import static java.util.Collections.emptyMap;
++import static jdk.vm.ci.common.InitTimer.timer;
++
++import java.util.EnumSet;
++import java.util.Map;
++
++import jdk.vm.ci.loongarch64.LoongArch64;
++import jdk.vm.ci.loongarch64.LoongArch64.CPUFeature;
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.RegisterConfig;
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.code.stack.StackIntrospection;
++import jdk.vm.ci.common.InitTimer;
++import jdk.vm.ci.hotspot.HotSpotCodeCacheProvider;
++import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider;
++import jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory;
++import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime;
++import jdk.vm.ci.hotspot.HotSpotMetaAccessProvider;
++import jdk.vm.ci.hotspot.HotSpotStackIntrospection;
++import jdk.vm.ci.meta.ConstantReflectionProvider;
++import jdk.vm.ci.runtime.JVMCIBackend;
++
++public class LoongArch64HotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFactory {
++
++    protected EnumSet<LoongArch64.CPUFeature> computeFeatures(@SuppressWarnings("unused") LoongArch64HotSpotVMConfig config) {
++        // Configure the feature set using the HotSpot flag settings.
++        EnumSet<LoongArch64.CPUFeature> features = EnumSet.noneOf(LoongArch64.CPUFeature.class);
++
++        if ((config.vmVersionFeatures & config.loongarch64LA32) != 0) {
++            features.add(LoongArch64.CPUFeature.LA32);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LA64) != 0) {
++            features.add(LoongArch64.CPUFeature.LA64);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LLEXC) != 0) {
++            features.add(LoongArch64.CPUFeature.LLEXC);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64SCDLY) != 0) {
++            features.add(LoongArch64.CPUFeature.SCDLY);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LLDBAR) != 0) {
++            features.add(LoongArch64.CPUFeature.LLDBAR);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LBT_X86) != 0) {
++            features.add(LoongArch64.CPUFeature.LBT_X86);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LBT_ARM) != 0) {
++            features.add(LoongArch64.CPUFeature.LBT_ARM);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LBT_MIPS) != 0) {
++            features.add(LoongArch64.CPUFeature.LBT_MIPS);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64CCDMA) != 0) {
++            features.add(LoongArch64.CPUFeature.CCDMA);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64COMPLEX) != 0) {
++            features.add(LoongArch64.CPUFeature.COMPLEX);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64FP) != 0) {
++            features.add(LoongArch64.CPUFeature.FP);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64CRYPTO) != 0) {
++            features.add(LoongArch64.CPUFeature.CRYPTO);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LSX) != 0) {
++            features.add(LoongArch64.CPUFeature.LSX);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LASX) != 0) {
++            features.add(LoongArch64.CPUFeature.LASX);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LAM) != 0) {
++            features.add(LoongArch64.CPUFeature.LAM);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LLSYNC) != 0) {
++            features.add(LoongArch64.CPUFeature.LLSYNC);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64TGTSYNC) != 0) {
++            features.add(LoongArch64.CPUFeature.TGTSYNC);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64ULSYNC) != 0) {
++            features.add(LoongArch64.CPUFeature.ULSYNC);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64UAL) != 0) {
++            features.add(LoongArch64.CPUFeature.UAL);
++        }
++
++        return features;
++    }
++
++    protected EnumSet<LoongArch64.Flag> computeFlags(@SuppressWarnings("unused") LoongArch64HotSpotVMConfig config) {
++        EnumSet<LoongArch64.Flag> flags = EnumSet.noneOf(LoongArch64.Flag.class);
++
++        if (config.useLSX) {
++            flags.add(LoongArch64.Flag.useLSX);
++        }
++
++        if (config.useLASX) {
++            flags.add(LoongArch64.Flag.useLASX);
++        }
++
++        return flags;
++    }
++
++    protected TargetDescription createTarget(LoongArch64HotSpotVMConfig config) {
++        final int stackFrameAlignment = 16;
++        final int implicitNullCheckLimit = 4096;
++        final boolean inlineObjects = true;
++        Architecture arch = new LoongArch64(computeFeatures(config), computeFlags(config));
++        return new TargetDescription(arch, true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects);
++    }
++
++    protected HotSpotConstantReflectionProvider createConstantReflection(HotSpotJVMCIRuntime runtime) {
++        return new HotSpotConstantReflectionProvider(runtime);
++    }
++
++    protected RegisterConfig createRegisterConfig(LoongArch64HotSpotVMConfig config, TargetDescription target) {
++        return new LoongArch64HotSpotRegisterConfig(target, config.useCompressedOops);
++    }
++
++    protected HotSpotCodeCacheProvider createCodeCache(HotSpotJVMCIRuntime runtime, TargetDescription target, RegisterConfig regConfig) {
++        return new HotSpotCodeCacheProvider(runtime, runtime.getConfig(), target, regConfig);
++    }
++
++    protected HotSpotMetaAccessProvider createMetaAccess(HotSpotJVMCIRuntime runtime) {
++        return new HotSpotMetaAccessProvider(runtime);
++    }
++
++    @Override
++    public String getArchitecture() {
++        return "loongarch64";
++    }
++
++    @Override
++    public String toString() {
++        return "JVMCIBackend:" + getArchitecture();
++    }
++
++    @Override
++    @SuppressWarnings("try")
++    public JVMCIBackend createJVMCIBackend(HotSpotJVMCIRuntime runtime, JVMCIBackend host) {
++
++        assert host == null;
++        LoongArch64HotSpotVMConfig config = new LoongArch64HotSpotVMConfig(runtime.getConfigStore());
++        TargetDescription target = createTarget(config);
++
++        RegisterConfig regConfig;
++        HotSpotCodeCacheProvider codeCache;
++        ConstantReflectionProvider constantReflection;
++        HotSpotMetaAccessProvider metaAccess;
++        StackIntrospection stackIntrospection;
++        try (InitTimer t = timer("create providers")) {
++            try (InitTimer rt = timer("create MetaAccess provider")) {
++                metaAccess = createMetaAccess(runtime);
++            }
++            try (InitTimer rt = timer("create RegisterConfig")) {
++                regConfig = createRegisterConfig(config, target);
++            }
++            try (InitTimer rt = timer("create CodeCache provider")) {
++                codeCache = createCodeCache(runtime, target, regConfig);
++            }
++            try (InitTimer rt = timer("create ConstantReflection provider")) {
++                constantReflection = createConstantReflection(runtime);
++            }
++            try (InitTimer rt = timer("create StackIntrospection provider")) {
++                stackIntrospection = new HotSpotStackIntrospection(runtime);
++            }
++        }
++        try (InitTimer rt = timer("instantiate backend")) {
++            return createBackend(metaAccess, codeCache, constantReflection, stackIntrospection);
++        }
++    }
++
++    protected JVMCIBackend createBackend(HotSpotMetaAccessProvider metaAccess, HotSpotCodeCacheProvider codeCache, ConstantReflectionProvider constantReflection,
++                    StackIntrospection stackIntrospection) {
++        return new JVMCIBackend(metaAccess, codeCache, constantReflection, stackIntrospection);
++    }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java
+--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java	2024-01-30 10:00:13.304747615 +0800
+@@ -0,0 +1,297 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
++
++import static jdk.vm.ci.loongarch64.LoongArch64.ra;
++import static jdk.vm.ci.loongarch64.LoongArch64.a0;
++import static jdk.vm.ci.loongarch64.LoongArch64.a1;
++import static jdk.vm.ci.loongarch64.LoongArch64.a2;
++import static jdk.vm.ci.loongarch64.LoongArch64.a3;
++import static jdk.vm.ci.loongarch64.LoongArch64.a4;
++import static jdk.vm.ci.loongarch64.LoongArch64.a5;
++import static jdk.vm.ci.loongarch64.LoongArch64.a6;
++import static jdk.vm.ci.loongarch64.LoongArch64.a7;
++import static jdk.vm.ci.loongarch64.LoongArch64.SCR1;
++import static jdk.vm.ci.loongarch64.LoongArch64.SCR2;
++import static jdk.vm.ci.loongarch64.LoongArch64.t0;
++import static jdk.vm.ci.loongarch64.LoongArch64.v0;
++import static jdk.vm.ci.loongarch64.LoongArch64.s5;
++import static jdk.vm.ci.loongarch64.LoongArch64.s6;
++import static jdk.vm.ci.loongarch64.LoongArch64.sp;
++import static jdk.vm.ci.loongarch64.LoongArch64.fp;
++import static jdk.vm.ci.loongarch64.LoongArch64.tp;
++import static jdk.vm.ci.loongarch64.LoongArch64.rx;
++import static jdk.vm.ci.loongarch64.LoongArch64.f0;
++import static jdk.vm.ci.loongarch64.LoongArch64.f1;
++import static jdk.vm.ci.loongarch64.LoongArch64.f2;
++import static jdk.vm.ci.loongarch64.LoongArch64.f3;
++import static jdk.vm.ci.loongarch64.LoongArch64.f4;
++import static jdk.vm.ci.loongarch64.LoongArch64.f5;
++import static jdk.vm.ci.loongarch64.LoongArch64.f6;
++import static jdk.vm.ci.loongarch64.LoongArch64.f7;
++import static jdk.vm.ci.loongarch64.LoongArch64.fv0;
++import static jdk.vm.ci.loongarch64.LoongArch64.zero;
++
++import java.util.ArrayList;
++import java.util.HashSet;
++import java.util.List;
++import java.util.Set;
++
++import jdk.vm.ci.loongarch64.LoongArch64;
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.CallingConvention.Type;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterArray;
++import jdk.vm.ci.code.RegisterAttributes;
++import jdk.vm.ci.code.RegisterConfig;
++import jdk.vm.ci.code.StackSlot;
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.code.ValueKindFactory;
++import jdk.vm.ci.common.JVMCIError;
++import jdk.vm.ci.hotspot.HotSpotCallingConventionType;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.JavaType;
++import jdk.vm.ci.meta.PlatformKind;
++import jdk.vm.ci.meta.Value;
++import jdk.vm.ci.meta.ValueKind;
++
++public class LoongArch64HotSpotRegisterConfig implements RegisterConfig {
++
++    private final TargetDescription target;
++
++    private final RegisterArray allocatable;
++
++    /**
++     * The caller saved registers always include all parameter registers.
++     */
++    private final RegisterArray callerSaved;
++
++    private final boolean allAllocatableAreCallerSaved;
++
++    private final RegisterAttributes[] attributesMap;
++
++    @Override
++    public RegisterArray getAllocatableRegisters() {
++        return allocatable;
++    }
++
++    @Override
++    public RegisterArray filterAllocatableRegisters(PlatformKind kind, RegisterArray registers) {
++        ArrayList<Register> list = new ArrayList<>();
++        for (Register reg : registers) {
++            if (target.arch.canStoreValue(reg.getRegisterCategory(), kind)) {
++                list.add(reg);
++            }
++        }
++
++        return new RegisterArray(list);
++    }
++
++    @Override
++    public RegisterAttributes[] getAttributesMap() {
++        return attributesMap.clone();
++    }
++
++    private final RegisterArray javaGeneralParameterRegisters = new RegisterArray(t0, a0, a1, a2, a3, a4, a5, a6, a7);
++    private final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(a0, a1, a2, a3, a4, a5, a6, a7);
++    private final RegisterArray floatParameterRegisters = new RegisterArray(f0, f1, f2, f3, f4, f5, f6, f7);
++
++    public static final Register heapBaseRegister = s5;
++    public static final Register TREG = s6;
++
++    private static final RegisterArray reservedRegisters = new RegisterArray(fp, ra, zero, sp, tp, rx, SCR1, SCR2, TREG);
++
++    private static RegisterArray initAllocatable(Architecture arch, boolean reserveForHeapBase) {
++        RegisterArray allRegisters = arch.getAvailableValueRegisters();
++        Register[] registers = new Register[allRegisters.size() - reservedRegisters.size() - (reserveForHeapBase ? 1 : 0)];
++        List<Register> reservedRegistersList = reservedRegisters.asList();
++
++        int idx = 0;
++        for (Register reg : allRegisters) {
++            if (reservedRegistersList.contains(reg)) {
++                // skip reserved registers
++                continue;
++            }
++            if (reserveForHeapBase && reg.equals(heapBaseRegister)) {
++                // skip heap base register
++                continue;
++            }
++
++            registers[idx++] = reg;
++        }
++
++        assert idx == registers.length;
++        return new RegisterArray(registers);
++    }
++
++    public LoongArch64HotSpotRegisterConfig(TargetDescription target, boolean useCompressedOops) {
++        this(target, initAllocatable(target.arch, useCompressedOops));
++        assert callerSaved.size() >= allocatable.size();
++    }
++
++    public LoongArch64HotSpotRegisterConfig(TargetDescription target, RegisterArray allocatable) {
++        this.target = target;
++
++        this.allocatable = allocatable;
++        Set<Register> callerSaveSet = new HashSet<>();
++        allocatable.addTo(callerSaveSet);
++        floatParameterRegisters.addTo(callerSaveSet);
++        javaGeneralParameterRegisters.addTo(callerSaveSet);
++        nativeGeneralParameterRegisters.addTo(callerSaveSet);
++        callerSaved = new RegisterArray(callerSaveSet);
++
++        allAllocatableAreCallerSaved = true;
++        attributesMap = RegisterAttributes.createMap(this, LoongArch64.allRegisters);
++    }
++
++    @Override
++    public RegisterArray getCallerSaveRegisters() {
++        return callerSaved;
++    }
++
++    @Override
++    public RegisterArray getCalleeSaveRegisters() {
++        return null;
++    }
++
++    @Override
++    public boolean areAllAllocatableRegistersCallerSaved() {
++        return allAllocatableAreCallerSaved;
++    }
++
++    @Override
++    public CallingConvention getCallingConvention(Type type, JavaType returnType, JavaType[] parameterTypes, ValueKindFactory<?> valueKindFactory) {
++        HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type;
++        if (type == HotSpotCallingConventionType.NativeCall) {
++            return callingConvention(nativeGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory);
++        }
++        // On x64, parameter locations are the same whether viewed
++        // from the caller or callee perspective
++        return callingConvention(javaGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory);
++    }
++
++    @Override
++    public RegisterArray getCallingConventionRegisters(Type type, JavaKind kind) {
++        HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type;
++        switch (kind) {
++            case Boolean:
++            case Byte:
++            case Short:
++            case Char:
++            case Int:
++            case Long:
++            case Object:
++                return hotspotType == HotSpotCallingConventionType.NativeCall ? nativeGeneralParameterRegisters : javaGeneralParameterRegisters;
++            case Float:
++            case Double:
++                return floatParameterRegisters;
++            default:
++                throw JVMCIError.shouldNotReachHere();
++        }
++    }
++
++    private CallingConvention callingConvention(RegisterArray generalParameterRegisters, JavaType returnType, JavaType[] parameterTypes, HotSpotCallingConventionType type,
++                    ValueKindFactory<?> valueKindFactory) {
++        AllocatableValue[] locations = new AllocatableValue[parameterTypes.length];
++
++        int currentGeneral = 0;
++        int currentFloat = 0;
++        int currentStackOffset = 0;
++
++        for (int i = 0; i < parameterTypes.length; i++) {
++            final JavaKind kind = parameterTypes[i].getJavaKind().getStackKind();
++
++            switch (kind) {
++                case Byte:
++                case Boolean:
++                case Short:
++                case Char:
++                case Int:
++                case Long:
++                case Object:
++                    if (currentGeneral < generalParameterRegisters.size()) {
++                        Register register = generalParameterRegisters.get(currentGeneral++);
++                        locations[i] = register.asValue(valueKindFactory.getValueKind(kind));
++                    }
++                    break;
++                case Float:
++                case Double:
++                    if (currentFloat < floatParameterRegisters.size()) {
++                        Register register = floatParameterRegisters.get(currentFloat++);
++                        locations[i] = register.asValue(valueKindFactory.getValueKind(kind));
++                    } else if (currentGeneral < generalParameterRegisters.size()) {
++                        Register register = generalParameterRegisters.get(currentGeneral++);
++                        locations[i] = register.asValue(valueKindFactory.getValueKind(kind));
++                    }
++                    break;
++                default:
++                    throw JVMCIError.shouldNotReachHere();
++            }
++
++            if (locations[i] == null) {
++                ValueKind<?> valueKind = valueKindFactory.getValueKind(kind);
++                locations[i] = StackSlot.get(valueKind, currentStackOffset, !type.out);
++                currentStackOffset += Math.max(valueKind.getPlatformKind().getSizeInBytes(), target.wordSize);
++            }
++        }
++
++        JavaKind returnKind = returnType == null ? JavaKind.Void : returnType.getJavaKind();
++        AllocatableValue returnLocation = returnKind == JavaKind.Void ? Value.ILLEGAL : getReturnRegister(returnKind).asValue(valueKindFactory.getValueKind(returnKind.getStackKind()));
++        return new CallingConvention(currentStackOffset, returnLocation, locations);
++    }
++
++    @Override
++    public Register getReturnRegister(JavaKind kind) {
++        switch (kind) {
++            case Boolean:
++            case Byte:
++            case Char:
++            case Short:
++            case Int:
++            case Long:
++            case Object:
++                return v0;
++            case Float:
++            case Double:
++                return fv0;
++            case Void:
++            case Illegal:
++                return null;
++            default:
++                throw new UnsupportedOperationException("no return register for type " + kind);
++        }
++    }
++
++    @Override
++    public Register getFrameRegister() {
++        return sp;
++    }
++
++    @Override
++    public String toString() {
++        return String.format("Allocatable: " + getAllocatableRegisters() + "%n" + "CallerSave:  " + getCallerSaveRegisters() + "%n");
++    }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java
+--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java	2024-01-30 10:00:13.304747615 +0800
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
++
++import jdk.vm.ci.hotspot.HotSpotVMConfigAccess;
++import jdk.vm.ci.hotspot.HotSpotVMConfigStore;
++import jdk.vm.ci.services.Services;
++
++/**
++ * Used to access native configuration details.
++ *
++ * All non-static, public fields in this class are so that they can be compiled as constants.
++ */
++class LoongArch64HotSpotVMConfig extends HotSpotVMConfigAccess {
++
++    LoongArch64HotSpotVMConfig(HotSpotVMConfigStore config) {
++        super(config);
++    }
++
++    final boolean useCompressedOops = getFlag("UseCompressedOops", Boolean.class);
++
++    // CPU Capabilities
++
++    /*
++     * These flags are set based on the corresponding command line flags.
++     */
++    final boolean useLSX = getFlag("UseLSX", Boolean.class);
++    final boolean useLASX = getFlag("UseLASX", Boolean.class);
++
++    final long vmVersionFeatures = getFieldValue("Abstract_VM_Version::_features", Long.class, "uint64_t");
++
++    /*
++     * These flags are set if the corresponding support is in the hardware.
++     */
++    // Checkstyle: stop
++    final long loongarch64LA32 = getConstant("VM_Version::CPU_LA32", Long.class);
++    final long loongarch64LA64 = getConstant("VM_Version::CPU_LA64", Long.class);
++    final long loongarch64LLEXC = getConstant("VM_Version::CPU_LLEXC", Long.class);
++    final long loongarch64SCDLY = getConstant("VM_Version::CPU_SCDLY", Long.class);
++    final long loongarch64LLDBAR = getConstant("VM_Version::CPU_LLDBAR", Long.class);
++    final long loongarch64LBT_X86 = getConstant("VM_Version::CPU_LBT_X86", Long.class);
++    final long loongarch64LBT_ARM = getConstant("VM_Version::CPU_LBT_ARM", Long.class);
++    final long loongarch64LBT_MIPS = getConstant("VM_Version::CPU_LBT_MIPS", Long.class);
++    final long loongarch64CCDMA = getConstant("VM_Version::CPU_CCDMA", Long.class);
++    final long loongarch64COMPLEX = getConstant("VM_Version::CPU_COMPLEX", Long.class);
++    final long loongarch64FP = getConstant("VM_Version::CPU_FP", Long.class);
++    final long loongarch64CRYPTO = getConstant("VM_Version::CPU_CRYPTO", Long.class);
++    final long loongarch64LSX = getConstant("VM_Version::CPU_LSX", Long.class);
++    final long loongarch64LASX = getConstant("VM_Version::CPU_LASX", Long.class);
++    final long loongarch64LAM = getConstant("VM_Version::CPU_LAM", Long.class);
++    final long loongarch64LLSYNC = getConstant("VM_Version::CPU_LLSYNC", Long.class);
++    final long loongarch64TGTSYNC = getConstant("VM_Version::CPU_TGTSYNC", Long.class);
++    final long loongarch64ULSYNC = getConstant("VM_Version::CPU_ULSYNC", Long.class);
++    final long loongarch64UAL = getConstant("VM_Version::CPU_UAL", Long.class);
++    // Checkstyle: resume
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java
+--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java	2024-01-30 10:00:13.304747615 +0800
+@@ -0,0 +1,28 @@
++/*
++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++/**
++ * The LoongArch64 HotSpot specific portions of the JVMCI API.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java
+--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java	2024-01-30 10:00:13.308080909 +0800
+@@ -0,0 +1,247 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.loongarch64;
++
++import java.nio.ByteOrder;
++import java.util.EnumSet;
++
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.Register.RegisterCategory;
++import jdk.vm.ci.code.RegisterArray;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.PlatformKind;
++
++/**
++ * Represents the LoongArch64 architecture.
++ */
++public class LoongArch64 extends Architecture {
++
++    public static final RegisterCategory CPU = new RegisterCategory("CPU");
++
++    // General purpose CPU registers
++    public static final Register zero = new Register(0, 0, "r0", CPU);
++    public static final Register ra = new Register(1, 1, "r1", CPU);
++    public static final Register tp = new Register(2, 2, "r2", CPU);
++    public static final Register sp = new Register(3, 3, "r3", CPU);
++    public static final Register a0 = new Register(4, 4, "r4", CPU);
++    public static final Register a1 = new Register(5, 5, "r5", CPU);
++    public static final Register a2 = new Register(6, 6, "r6", CPU);
++    public static final Register a3 = new Register(7, 7, "r7", CPU);
++    public static final Register a4 = new Register(8, 8, "r8", CPU);
++    public static final Register a5 = new Register(9, 9, "r9", CPU);
++    public static final Register a6 = new Register(10, 10, "r10", CPU);
++    public static final Register a7 = new Register(11, 11, "r11", CPU);
++    public static final Register t0 = new Register(12, 12, "r12", CPU);
++    public static final Register t1 = new Register(13, 13, "r13", CPU);
++    public static final Register t2 = new Register(14, 14, "r14", CPU);
++    public static final Register t3 = new Register(15, 15, "r15", CPU);
++    public static final Register t4 = new Register(16, 16, "r16", CPU);
++    public static final Register t5 = new Register(17, 17, "r17", CPU);
++    public static final Register t6 = new Register(18, 18, "r18", CPU);
++    public static final Register t7 = new Register(19, 19, "r19", CPU);
++    public static final Register t8 = new Register(20, 20, "r20", CPU);
++    public static final Register rx = new Register(21, 21, "r21", CPU);
++    public static final Register fp = new Register(22, 22, "r22", CPU);
++    public static final Register s0 = new Register(23, 23, "r23", CPU);
++    public static final Register s1 = new Register(24, 24, "r24", CPU);
++    public static final Register s2 = new Register(25, 25, "r25", CPU);
++    public static final Register s3 = new Register(26, 26, "r26", CPU);
++    public static final Register s4 = new Register(27, 27, "r27", CPU);
++    public static final Register s5 = new Register(28, 28, "r28", CPU);
++    public static final Register s6 = new Register(29, 29, "r29", CPU);
++    public static final Register s7 = new Register(30, 30, "r30", CPU);
++    public static final Register s8 = new Register(31, 31, "r31", CPU);
++
++    public static final Register SCR1 = t7;
++    public static final Register SCR2 = t4;
++    public static final Register v0 = a0;
++
++    // @formatter:off
++    public static final RegisterArray cpuRegisters = new RegisterArray(
++        zero, ra,  tp,  sp,  a0,  a1,  a2,  a3,
++        a4,   a5,  a6,  a7,  t0,  t1,  t2,  t3,
++        t4,   t5,  t6,  t7,  t8,  rx,  fp,  s0,
++        s1,   s2,  s3,  s4,  s5,  s6,  s7,  s8
++    );
++    // @formatter:on
++
++    public static final RegisterCategory SIMD = new RegisterCategory("SIMD");
++
++    // Simd registers
++    public static final Register f0 = new Register(32, 0, "f0", SIMD);
++    public static final Register f1 = new Register(33, 1, "f1", SIMD);
++    public static final Register f2 = new Register(34, 2, "f2", SIMD);
++    public static final Register f3 = new Register(35, 3, "f3", SIMD);
++    public static final Register f4 = new Register(36, 4, "f4", SIMD);
++    public static final Register f5 = new Register(37, 5, "f5", SIMD);
++    public static final Register f6 = new Register(38, 6, "f6", SIMD);
++    public static final Register f7 = new Register(39, 7, "f7", SIMD);
++    public static final Register f8 = new Register(40, 8, "f8", SIMD);
++    public static final Register f9 = new Register(41, 9, "f9", SIMD);
++    public static final Register f10 = new Register(42, 10, "f10", SIMD);
++    public static final Register f11 = new Register(43, 11, "f11", SIMD);
++    public static final Register f12 = new Register(44, 12, "f12", SIMD);
++    public static final Register f13 = new Register(45, 13, "f13", SIMD);
++    public static final Register f14 = new Register(46, 14, "f14", SIMD);
++    public static final Register f15 = new Register(47, 15, "f15", SIMD);
++    public static final Register f16 = new Register(48, 16, "f16", SIMD);
++    public static final Register f17 = new Register(49, 17, "f17", SIMD);
++    public static final Register f18 = new Register(50, 18, "f18", SIMD);
++    public static final Register f19 = new Register(51, 19, "f19", SIMD);
++    public static final Register f20 = new Register(52, 20, "f20", SIMD);
++    public static final Register f21 = new Register(53, 21, "f21", SIMD);
++    public static final Register f22 = new Register(54, 22, "f22", SIMD);
++    public static final Register f23 = new Register(55, 23, "f23", SIMD);
++    public static final Register f24 = new Register(56, 24, "f24", SIMD);
++    public static final Register f25 = new Register(57, 25, "f25", SIMD);
++    public static final Register f26 = new Register(58, 26, "f26", SIMD);
++    public static final Register f27 = new Register(59, 27, "f27", SIMD);
++    public static final Register f28 = new Register(60, 28, "f28", SIMD);
++    public static final Register f29 = new Register(61, 29, "f29", SIMD);
++    public static final Register f30 = new Register(62, 30, "f30", SIMD);
++    public static final Register f31 = new Register(63, 31, "f31", SIMD);
++
++    public static final Register fv0 = f0;
++
++    // @formatter:off
++    public static final RegisterArray simdRegisters = new RegisterArray(
++        f0,  f1,  f2,  f3,  f4,  f5,  f6,  f7,
++        f8,  f9,  f10, f11, f12, f13, f14, f15,
++        f16, f17, f18, f19, f20, f21, f22, f23,
++        f24, f25, f26, f27, f28, f29, f30, f31
++    );
++    // @formatter:on
++
++    // @formatter:off
++    public static final RegisterArray allRegisters = new RegisterArray(
++        zero, ra,  tp,  sp,  a0,  a1,  a2,  a3,
++        a4,   a5,  a6,  a7,  t0,  t1,  t2,  t3,
++        t4,   t5,  t6,  t7,  t8,  rx,  fp,  s0,
++        s1,   s2,  s3,  s4,  s5,  s6,  s7,  s8,
++
++        f0,   f1,  f2,  f3,  f4,  f5,  f6,  f7,
++        f8,   f9,  f10, f11, f12, f13, f14, f15,
++        f16,  f17, f18, f19, f20, f21, f22, f23,
++        f24,  f25, f26, f27, f28, f29, f30, f31
++    );
++    // @formatter:on
++
++    /**
++     * Basic set of CPU features mirroring what is returned from the cpuid instruction. See:
++     * {@code VM_Version::cpuFeatureFlags}.
++     */
++    public enum CPUFeature {
++        LA32,
++        LA64,
++        LLEXC,
++        SCDLY,
++        LLDBAR,
++        LBT_X86,
++        LBT_ARM,
++        LBT_MIPS,
++        CCDMA,
++        COMPLEX,
++        FP,
++        CRYPTO,
++        LSX,
++        LASX,
++        LAM,
++        LLSYNC,
++        TGTSYNC,
++        ULSYNC,
++        UAL
++    }
++
++    private final EnumSet<CPUFeature> features;
++
++    /**
++     * Set of flags to control code emission.
++     */
++    public enum Flag {
++        useLSX,
++        useLASX
++    }
++
++    private final EnumSet<Flag> flags;
++
++    public LoongArch64(EnumSet<CPUFeature> features, EnumSet<Flag> flags) {
++        super("loongarch64", LoongArch64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, 0, 0, 0);
++        this.features = features;
++        this.flags = flags;
++    }
++
++    public EnumSet<CPUFeature> getFeatures() {
++        return features;
++    }
++
++    public EnumSet<Flag> getFlags() {
++        return flags;
++    }
++
++    @Override
++    public PlatformKind getPlatformKind(JavaKind javaKind) {
++        switch (javaKind) {
++            case Boolean:
++            case Byte:
++                return LoongArch64Kind.BYTE;
++            case Short:
++            case Char:
++                return LoongArch64Kind.WORD;
++            case Int:
++                return LoongArch64Kind.DWORD;
++            case Long:
++            case Object:
++                return LoongArch64Kind.QWORD;
++            case Float:
++                return LoongArch64Kind.SINGLE;
++            case Double:
++                return LoongArch64Kind.DOUBLE;
++            default:
++                return null;
++        }
++    }
++
++    @Override
++    public boolean canStoreValue(RegisterCategory category, PlatformKind platformKind) {
++        LoongArch64Kind kind = (LoongArch64Kind) platformKind;
++        if (kind.isInteger()) {
++            return category.equals(CPU);
++        } else if (kind.isSIMD()) {
++            return category.equals(SIMD);
++        }
++        return false;
++    }
++
++    @Override
++    public LoongArch64Kind getLargestStorableKind(RegisterCategory category) {
++        if (category.equals(CPU)) {
++            return LoongArch64Kind.QWORD;
++        } else if (category.equals(SIMD)) {
++            return LoongArch64Kind.V256_QWORD;
++        } else {
++            return null;
++        }
++    }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java
+--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java	2024-01-30 10:00:13.308080909 +0800
+@@ -0,0 +1,163 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.loongarch64;
++
++import jdk.vm.ci.meta.PlatformKind;
++
++public enum LoongArch64Kind implements PlatformKind {
++
++    // scalar
++    BYTE(1),
++    WORD(2),
++    DWORD(4),
++    QWORD(8),
++    UBYTE(1),
++    UWORD(2),
++    UDWORD(4),
++    SINGLE(4),
++    DOUBLE(8),
++
++    // SIMD
++    V128_BYTE(16, BYTE),
++    V128_WORD(16, WORD),
++    V128_DWORD(16, DWORD),
++    V128_QWORD(16, QWORD),
++    V128_SINGLE(16, SINGLE),
++    V128_DOUBLE(16, DOUBLE),
++    V256_BYTE(32, BYTE),
++    V256_WORD(32, WORD),
++    V256_DWORD(32, DWORD),
++    V256_QWORD(32, QWORD),
++    V256_SINGLE(32, SINGLE),
++    V256_DOUBLE(32, DOUBLE);
++
++    private final int size;
++    private final int vectorLength;
++
++    private final LoongArch64Kind scalar;
++    private final EnumKey<LoongArch64Kind> key = new EnumKey<>(this);
++
++    LoongArch64Kind(int size) {
++        this.size = size;
++        this.scalar = this;
++        this.vectorLength = 1;
++    }
++
++    LoongArch64Kind(int size, LoongArch64Kind scalar) {
++        this.size = size;
++        this.scalar = scalar;
++
++        assert size % scalar.size == 0;
++        this.vectorLength = size / scalar.size;
++    }
++
++    public LoongArch64Kind getScalar() {
++        return scalar;
++    }
++
++    @Override
++    public int getSizeInBytes() {
++        return size;
++    }
++
++    @Override
++    public int getVectorLength() {
++        return vectorLength;
++    }
++
++    @Override
++    public Key getKey() {
++        return key;
++    }
++
++    public boolean isInteger() {
++        switch (this) {
++            case BYTE:
++            case WORD:
++            case DWORD:
++            case QWORD:
++            case UBYTE:
++            case UWORD:
++            case UDWORD:
++                return true;
++            default:
++                return false;
++        }
++    }
++
++    public boolean isSIMD() {
++        switch (this) {
++            case SINGLE:
++            case DOUBLE:
++            case V128_BYTE:
++            case V128_WORD:
++            case V128_DWORD:
++            case V128_QWORD:
++            case V128_SINGLE:
++            case V128_DOUBLE:
++            case V256_BYTE:
++            case V256_WORD:
++            case V256_DWORD:
++            case V256_QWORD:
++            case V256_SINGLE:
++            case V256_DOUBLE:
++                return true;
++            default:
++                return false;
++        }
++    }
++
++    @Override
++    public char getTypeChar() {
++        switch (this) {
++            case BYTE:
++                return 'b';
++            case WORD:
++                return 'w';
++            case DWORD:
++                return 'd';
++            case QWORD:
++                return 'q';
++            case SINGLE:
++                return 'S';
++            case DOUBLE:
++                return 'D';
++            case V128_BYTE:
++            case V128_WORD:
++            case V128_DWORD:
++            case V128_QWORD:
++            case V128_SINGLE:
++            case V128_DOUBLE:
++            case V256_BYTE:
++            case V256_WORD:
++            case V256_DWORD:
++            case V256_QWORD:
++            case V256_SINGLE:
++            case V256_DOUBLE:
++                return 'v';
++            default:
++                return '-';
++        }
++    }
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java
+--- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java	1970-01-01 08:00:00.000000000 +0800
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java	2024-01-30 10:00:13.308080909 +0800
+@@ -0,0 +1,28 @@
++/*
++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++/**
++ * The LoongArch64 platform independent portions of the JVMCI API.
++ */
++package jdk.vm.ci.loongarch64;
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/jdk.internal.vm.ci/share/classes/module-info.java b/src/jdk.internal.vm.ci/share/classes/module-info.java
+--- a/src/jdk.internal.vm.ci/share/classes/module-info.java	2024-01-10 05:19:49.000000000 +0800
++++ b/src/jdk.internal.vm.ci/share/classes/module-info.java	2024-01-30 10:00:13.314747494 +0800
+@@ -23,6 +23,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ module jdk.internal.vm.ci {
+     exports jdk.vm.ci.services to jdk.internal.vm.compiler;
+     exports jdk.vm.ci.runtime to
+@@ -37,6 +43,7 @@
+ 
+     provides jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory with
+         jdk.vm.ci.hotspot.aarch64.AArch64HotSpotJVMCIBackendFactory,
++        jdk.vm.ci.hotspot.loongarch64.LoongArch64HotSpotJVMCIBackendFactory,
+         jdk.vm.ci.hotspot.amd64.AMD64HotSpotJVMCIBackendFactory,
+         jdk.vm.ci.hotspot.sparc.SPARCHotSpotJVMCIBackendFactory;
+ }
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/src/utils/hsdis/Makefile b/src/utils/hsdis/Makefile
+--- a/src/utils/hsdis/Makefile	2024-01-10 05:19:49.000000000 +0800
++++ b/src/utils/hsdis/Makefile	2024-01-30 10:00:13.851407763 +0800
+@@ -94,6 +94,9 @@
+ endif
+ CFLAGS		+= -O
+ DLDFLAGS	+= -shared
++ifeq ($(ARCH), mips64)
++DLDFLAGS	+= -Wl,-z,noexecstack
++endif
+ LDFLAGS         += -ldl
+ OUTFLAGS	+= -o $@
+ else
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java
+--- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java	2024-01-30 10:00:13.961406452 +0800
+@@ -22,11 +22,17 @@
+  */
+ 
+ /*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
++/*
+  * @test
+  * @library /test/lib /
+  * @modules java.base/jdk.internal.misc
+  *          java.management
+- * @requires vm.cpu.features ~= ".*aes.*" & !vm.graal.enabled
++ * @requires (vm.cpu.features ~= ".*aes.*" | os.arch == "loongarch64") & !vm.graal.enabled
+  * @build sun.hotspot.WhiteBox
+  * @run driver ClassFileInstaller sun.hotspot.WhiteBox
+  *                                sun.hotspot.WhiteBox$WhiteBoxPermission
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java
+--- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java	2024-01-30 10:00:13.961406452 +0800
+@@ -22,13 +22,19 @@
+  */
+ 
+ /*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
++/*
+  * @test
+  * @library /test/lib /
+  * @modules java.base/jdk.internal.misc
+  *          java.management
+  *
+  * @build sun.hotspot.WhiteBox
+- * @requires !(vm.cpu.features ~= ".*aes.*")
++ * @requires !(vm.cpu.features ~= ".*aes.*" | os.arch == "loongarch64")
+  * @requires vm.compiler1.enabled | !vm.graal.enabled
+  * @run driver ClassFileInstaller sun.hotspot.WhiteBox
+  *                                sun.hotspot.WhiteBox$WhiteBoxPermission
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java	2024-01-30 10:00:13.974739627 +0800
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package compiler.intrinsics.sha.cli.testcases;
+ 
+ import compiler.intrinsics.sha.cli.SHAOptionsBase;
+@@ -32,19 +38,20 @@
+ 
+ /**
+  * Generic test case for SHA-related options targeted to any CPU except
+- * AArch64, PPC, S390x, SPARC and X86.
++ * AArch64, PPC, S390x, SPARC, LoongArch64 and X86.
+  */
+ public class GenericTestCaseForOtherCPU extends
+         SHAOptionsBase.TestCase {
+     public GenericTestCaseForOtherCPU(String optionName) {
+-        // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86.
++        // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC, LoongArch64 and X86.
+         super(optionName, new NotPredicate(
+                               new OrPredicate(Platform::isAArch64,
+                               new OrPredicate(Platform::isS390x,
+                               new OrPredicate(Platform::isSparc,
+                               new OrPredicate(Platform::isPPC,
++                              new OrPredicate(Platform::isLoongArch64,
+                               new OrPredicate(Platform::isX64,
+-                                              Platform::isX86)))))));
++                                              Platform::isX86))))))));
+     }
+ 
+     @Override
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java	2024-01-30 10:00:13.984739508 +0800
+@@ -29,6 +29,7 @@
+ import jdk.vm.ci.code.TargetDescription;
+ import jdk.vm.ci.code.test.amd64.AMD64TestAssembler;
+ import jdk.vm.ci.code.test.sparc.SPARCTestAssembler;
++import jdk.vm.ci.code.test.loongarch64.LoongArch64TestAssembler;
+ import jdk.vm.ci.hotspot.HotSpotCompiledCode;
+ import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime;
+ import jdk.vm.ci.hotspot.HotSpotResolvedJavaMethod;
+@@ -37,6 +38,7 @@
+ import jdk.vm.ci.runtime.JVMCI;
+ import jdk.vm.ci.runtime.JVMCIBackend;
+ import jdk.vm.ci.sparc.SPARC;
++import jdk.vm.ci.loongarch64.LoongArch64;
+ import org.junit.Assert;
+ 
+ import java.lang.reflect.Method;
+@@ -72,6 +74,8 @@
+             return new AMD64TestAssembler(codeCache, config);
+         } else if (arch instanceof SPARC) {
+             return new SPARCTestAssembler(codeCache, config);
++        } else if (arch instanceof LoongArch64) {
++            return new LoongArch64TestAssembler(codeCache, config);
+         } else {
+             Assert.fail("unsupported architecture");
+             return null;
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java	2024-01-30 10:00:13.984739508 +0800
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,7 +32,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.DataPatchTest
+  */
+ 
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java	2024-01-30 10:00:13.984739508 +0800
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.code
+  *          jdk.internal.vm.ci/jdk.vm.ci.code.site
+@@ -32,7 +32,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.common
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.InterpreterFrameSizeTest
+  */
+ 
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java	1970-01-01 08:00:00.000000000 +0800
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java	2024-01-30 10:00:13.984739508 +0800
+@@ -0,0 +1,568 @@
++/*
++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++package jdk.vm.ci.code.test.loongarch64;
++
++import jdk.vm.ci.loongarch64.LoongArch64;
++import jdk.vm.ci.loongarch64.LoongArch64Kind;
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.CodeCacheProvider;
++import jdk.vm.ci.code.DebugInfo;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterArray;
++import jdk.vm.ci.code.RegisterValue;
++import jdk.vm.ci.code.StackSlot;
++import jdk.vm.ci.code.site.ConstantReference;
++import jdk.vm.ci.code.site.DataSectionReference;
++import jdk.vm.ci.code.test.TestAssembler;
++import jdk.vm.ci.code.test.TestHotSpotVMConfig;
++import jdk.vm.ci.hotspot.HotSpotCallingConventionType;
++import jdk.vm.ci.hotspot.HotSpotConstant;
++import jdk.vm.ci.hotspot.HotSpotForeignCallTarget;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.VMConstant;
++
++public class LoongArch64TestAssembler extends TestAssembler {
++
++    private static final Register scratchRegister = LoongArch64.SCR1;
++    private static final Register doubleScratch = LoongArch64.f23;
++    private static final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(LoongArch64.a0,
++                                                                      LoongArch64.a1, LoongArch64.a2,
++                                                                      LoongArch64.a3, LoongArch64.a4,
++                                                                      LoongArch64.a5, LoongArch64.a6,
++                                                                      LoongArch64.a7);
++    private static final RegisterArray floatParameterRegisters = new RegisterArray(LoongArch64.f0,
++                                                                      LoongArch64.f1, LoongArch64.f2,
++                                                                      LoongArch64.f3, LoongArch64.f4,
++                                                                      LoongArch64.f5, LoongArch64.f6,
++                                                                      LoongArch64.f7);
++    private static int currentGeneral = 0;
++    private static int currentFloat = 0;
++    public LoongArch64TestAssembler(CodeCacheProvider codeCache, TestHotSpotVMConfig config) {
++        super(codeCache, config,
++              16 /* initialFrameSize */, 16 /* stackAlignment */,
++              LoongArch64Kind.UDWORD /* narrowOopKind */,
++              /* registers */
++              LoongArch64.a0, LoongArch64.a1, LoongArch64.a2, LoongArch64.a3,
++              LoongArch64.a4, LoongArch64.a5, LoongArch64.a6, LoongArch64.a7);
++    }
++
++    private static int low(int x, int l) {
++        assert l < 32;
++        return (x >> 0) & ((1 << l)-1);
++    }
++
++    private static int low16(int x) {
++        return low(x, 16);
++    }
++
++    private void emitNop() {
++        code.emitInt(0x3400000);
++    }
++
++    private void emitPcaddu12i(Register rj, int si20) {
++        // pcaddu12i
++        code.emitInt((0b0001110 << 25)
++                     | (low(si20, 20) << 5)
++                     | rj.encoding);
++    }
++
++    private void emitAdd(Register rd, Register rj, Register rk) {
++        // add_d
++        code.emitInt((0b00000000000100001 << 15)
++                     | (rk.encoding << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitAdd(Register rd, Register rj, int si12) {
++        // addi_d
++        code.emitInt((0b0000001011 << 22)
++                     | (low(si12, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitSub(Register rd, Register rj, Register rk) {
++        // sub_d
++        code.emitInt((0b00000000000100011 << 15)
++                     | (rk.encoding << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitShiftLeft(Register rd, Register rj, int shift) {
++        // slli_d
++        code.emitInt((0b00000000010000 << 18)
++                     | (low(( (0b01  << 6) | shift ), 8) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLu12i_w(Register rj, int imm20) {
++        // lu12i_w
++        code.emitInt((0b0001010 << 25)
++                     | (low(imm20, 20)<<5)
++                     | rj.encoding);
++    }
++
++    private void emitOri(Register rd, Register rj, int ui12) {
++        // ori
++        code.emitInt((0b0000001110 << 22)
++                     | (low(ui12, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLu32i_d(Register rj, int imm20) {
++         // lu32i_d
++        code.emitInt((0b0001011 << 25)
++                     | (low(imm20, 20)<<5)
++                     | rj.encoding);
++    }
++
++    private void emitLu52i_d(Register rd, Register rj, int imm12) {
++        // lu52i_d
++        code.emitInt((0b0000001100 << 22)
++                     | (low(imm12, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLoadImmediate(Register rd, int imm32) {
++        emitLu12i_w(rd, (imm32 >> 12) & 0xfffff);
++        emitOri(rd, rd, imm32 & 0xfff);
++    }
++
++    private void emitLi52(Register rj, long imm) {
++        emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff));
++        emitOri(rj, rj, (int) (imm & 0xfff));
++        emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff));
++    }
++
++    private void emitLi64(Register rj, long imm) {
++        emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff));
++        emitOri(rj, rj, (int) (imm & 0xfff));
++        emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff));
++        emitLu52i_d(rj, rj, (int) ((imm >> 52) & 0xfff));
++    }
++
++    private void emitOr(Register rd, Register rj, Register rk) {
++        // orr
++        code.emitInt((0b00000000000101010 << 15)
++                     | (rk.encoding << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitMove(Register rd, Register rs) {
++        // move
++        emitOr(rd, rs, LoongArch64.zero);
++    }
++
++    private void emitMovfr2gr(Register rd, LoongArch64Kind kind, Register rj) {
++        // movfr2gr_s/movfr2gr_d
++        int opc = 0;
++        switch (kind) {
++            case SINGLE: opc = 0b0000000100010100101101; break;
++            case DOUBLE: opc = 0b0000000100010100101110; break;
++            default: throw new IllegalArgumentException();
++        }
++        code.emitInt((opc << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLoadRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) {
++        // load
++        assert offset >= 0;
++        int opc = 0;
++        switch (kind) {
++            case BYTE:   opc = 0b0010100000; break;
++            case WORD:   opc = 0b0010100001; break;
++            case DWORD:  opc = 0b0010100010; break;
++            case QWORD:  opc = 0b0010100011; break;
++            case UDWORD: opc = 0b0010101010; break;
++            case SINGLE: opc = 0b0010101100; break;
++            case DOUBLE: opc = 0b0010101110; break;
++            default: throw new IllegalArgumentException();
++        }
++        code.emitInt((opc << 22)
++                     | (low(offset, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitStoreRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) {
++        // store
++        assert offset >= 0;
++        int opc = 0;
++        switch (kind) {
++            case BYTE:   opc = 0b0010100100; break;
++            case WORD:   opc = 0b0010100101; break;
++            case DWORD:  opc = 0b0010100110; break;
++            case QWORD:  opc = 0b0010100111; break;
++            case SINGLE: opc = 0b0010101101; break;
++            case DOUBLE: opc = 0b0010101111; break;
++            default: throw new IllegalArgumentException();
++        }
++        code.emitInt((opc << 22)
++                     | (low(offset, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitJirl(Register rd, Register rj, int offs) {
++        // jirl
++        code.emitInt((0b010011 << 26)
++                     | (low16(offs >> 2) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    @Override
++    public void emitGrowStack(int size) {
++        assert size % 16 == 0;
++        if (size > -4096 && size < 0) {
++            emitAdd(LoongArch64.sp, LoongArch64.sp, -size);
++        } else if (size == 0) {
++            // No-op
++        } else if (size < 4096) {
++            emitAdd(LoongArch64.sp, LoongArch64.sp, -size);
++        } else if (size < 65535) {
++            emitLoadImmediate(scratchRegister, size);
++            emitSub(LoongArch64.sp, LoongArch64.sp, scratchRegister);
++        } else {
++            throw new IllegalArgumentException();
++        }
++    }
++
++    @Override
++    public void emitPrologue() {
++        // Must be patchable by NativeJump::patch_verified_entry
++        emitNop();
++        emitGrowStack(32);
++        emitStoreRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 24);
++        emitStoreRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 16);
++        emitGrowStack(-16);
++        emitMove(LoongArch64.fp, LoongArch64.sp);
++        setDeoptRescueSlot(newStackSlot(LoongArch64Kind.QWORD));
++    }
++
++    @Override
++    public void emitEpilogue() {
++        recordMark(config.MARKID_DEOPT_HANDLER_ENTRY);
++        recordCall(new HotSpotForeignCallTarget(config.handleDeoptStub), 4*4, true, null);
++        emitCall(0xdeaddeaddeadL);
++    }
++
++    @Override
++    public void emitCallPrologue(CallingConvention cc, Object... prim) {
++        emitGrowStack(cc.getStackSize());
++        frameSize += cc.getStackSize();
++        AllocatableValue[] args = cc.getArguments();
++        for (int i = 0; i < args.length; i++) {
++            emitLoad(args[i], prim[i]);
++        }
++        currentGeneral = 0;
++        currentFloat = 0;
++    }
++
++    @Override
++    public void emitCallEpilogue(CallingConvention cc) {
++        emitGrowStack(-cc.getStackSize());
++        frameSize -= cc.getStackSize();
++    }
++
++    @Override
++    public void emitCall(long addr) {
++        // long call (absolute)
++        // lu12i_w(T4, split_low20(value >> 12));
++        // lu32i_d(T4, split_low20(value >> 32));
++        // jirl(RA, T4, split_low12(value));
++        emitLu12i_w(LoongArch64.t4, (int) ((addr >> 12) & 0xfffff));
++        emitLu32i_d(LoongArch64.t4, (int) ((addr >> 32) & 0xfffff));
++        emitJirl(LoongArch64.ra, LoongArch64.t4, (int) (addr & 0xfff));
++    }
++
++    @Override
++    public void emitLoad(AllocatableValue av, Object prim) {
++        if (av instanceof RegisterValue) {
++            Register reg = ((RegisterValue) av).getRegister();
++            if (prim instanceof Float) {
++                if (currentFloat < floatParameterRegisters.size()) {
++                  currentFloat++;
++                  emitLoadFloat(reg, (Float) prim);
++                } else if (currentGeneral < nativeGeneralParameterRegisters.size()) {
++                  currentGeneral++;
++                  emitLoadFloat(doubleScratch, (Float) prim);
++                  emitMovfr2gr(reg, LoongArch64Kind.SINGLE, doubleScratch);
++                }
++            } else if (prim instanceof Double) {
++                if (currentFloat < floatParameterRegisters.size()) {
++                  currentFloat++;
++                  emitLoadDouble(reg, (Double) prim);
++                } else if (currentGeneral < nativeGeneralParameterRegisters.size()) {
++                  currentGeneral++;
++                  emitLoadDouble(doubleScratch, (Double) prim);
++                  emitMovfr2gr(reg, LoongArch64Kind.DOUBLE, doubleScratch);
++                }
++            } else if (prim instanceof Integer) {
++                emitLoadInt(reg, (Integer) prim);
++            } else if (prim instanceof Long) {
++                emitLoadLong(reg, (Long) prim);
++            }
++        } else if (av instanceof StackSlot) {
++            StackSlot slot = (StackSlot) av;
++            if (prim instanceof Float) {
++                emitFloatToStack(slot, emitLoadFloat(doubleScratch, (Float) prim));
++            } else if (prim instanceof Double) {
++                emitDoubleToStack(slot, emitLoadDouble(doubleScratch, (Double) prim));
++            } else if (prim instanceof Integer) {
++                emitIntToStack(slot, emitLoadInt(scratchRegister, (Integer) prim));
++            } else if (prim instanceof Long) {
++                emitLongToStack(slot, emitLoadLong(scratchRegister, (Long) prim));
++            } else {
++                assert false : "Unimplemented";
++            }
++        } else {
++            throw new IllegalArgumentException("Unknown value " + av);
++        }
++    }
++
++    @Override
++    public Register emitLoadPointer(HotSpotConstant c) {
++        recordDataPatchInCode(new ConstantReference((VMConstant) c));
++
++        Register ret = newRegister();
++        // need to match patchable_li52 instruction sequence
++        // lu12i_ori_lu32i
++        emitLi52(ret, 0xdeaddead);
++        return ret;
++    }
++
++    @Override
++    public Register emitLoadPointer(Register b, int offset) {
++        Register ret = newRegister();
++        emitLoadRegister(ret, LoongArch64Kind.QWORD, b, offset);
++        return ret;
++    }
++
++    @Override
++    public Register emitLoadNarrowPointer(DataSectionReference ref) {
++        recordDataPatchInCode(ref);
++
++        Register ret = newRegister();
++        emitPcaddu12i(ret, 0xdead >> 12);
++        emitAdd(ret, ret, 0xdead & 0xfff);
++        emitLoadRegister(ret, LoongArch64Kind.UDWORD, ret, 0);
++        return ret;
++    }
++
++    @Override
++    public Register emitLoadPointer(DataSectionReference ref) {
++        recordDataPatchInCode(ref);
++
++        Register ret = newRegister();
++        emitPcaddu12i(ret, 0xdead >> 12);
++        emitAdd(ret, ret, 0xdead & 0xfff);
++        emitLoadRegister(ret, LoongArch64Kind.QWORD, ret, 0);
++        return ret;
++    }
++
++    private Register emitLoadDouble(Register reg, double c) {
++        DataSectionReference ref = new DataSectionReference();
++        ref.setOffset(data.position());
++        data.emitDouble(c);
++
++        recordDataPatchInCode(ref);
++        emitPcaddu12i(scratchRegister, 0xdead >> 12);
++        emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff);
++        emitLoadRegister(reg, LoongArch64Kind.DOUBLE, scratchRegister, 0);
++        return reg;
++    }
++
++    private Register emitLoadFloat(Register reg, float c) {
++        DataSectionReference ref = new DataSectionReference();
++        ref.setOffset(data.position());
++        data.emitFloat(c);
++
++        recordDataPatchInCode(ref);
++        emitPcaddu12i(scratchRegister, 0xdead >> 12);
++        emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff);
++        emitLoadRegister(reg, LoongArch64Kind.SINGLE, scratchRegister, 0);
++        return reg;
++    }
++
++    @Override
++    public Register emitLoadFloat(float c) {
++        Register ret = LoongArch64.fv0;
++        return emitLoadFloat(ret, c);
++    }
++
++    private Register emitLoadLong(Register reg, long c) {
++        emitLi64(reg, c);
++        return reg;
++    }
++
++    @Override
++    public Register emitLoadLong(long c) {
++        Register ret = newRegister();
++        return emitLoadLong(ret, c);
++    }
++
++    private Register emitLoadInt(Register reg, int c) {
++        emitLoadImmediate(reg, c);
++        return reg;
++    }
++
++    @Override
++    public Register emitLoadInt(int c) {
++        Register ret = newRegister();
++        return emitLoadInt(ret, c);
++    }
++
++    @Override
++    public Register emitIntArg0() {
++        return codeCache.getRegisterConfig()
++            .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int)
++            .get(0);
++    }
++
++    @Override
++    public Register emitIntArg1() {
++        return codeCache.getRegisterConfig()
++            .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int)
++            .get(1);
++    }
++
++    @Override
++    public Register emitIntAdd(Register a, Register b) {
++        emitAdd(a, a, b);
++        return a;
++    }
++
++    @Override
++    public void emitTrap(DebugInfo info) {
++        // Dereference null pointer
++        emitMove(scratchRegister, LoongArch64.zero);
++        recordImplicitException(info);
++        emitLoadRegister(LoongArch64.zero, LoongArch64Kind.QWORD, scratchRegister, 0);
++    }
++
++    @Override
++    public void emitIntRet(Register a) {
++        emitMove(LoongArch64.v0, a);
++        emitMove(LoongArch64.sp, LoongArch64.fp);
++        emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8);
++        emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0);
++        emitGrowStack(-16);
++        emitJirl(LoongArch64.zero, LoongArch64.ra, 0);
++    }
++
++    @Override
++    public void emitFloatRet(Register a) {
++        assert a == LoongArch64.fv0 : "Unimplemented move " + a;
++        emitMove(LoongArch64.sp, LoongArch64.fp);
++        emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8);
++        emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0);
++        emitGrowStack(-16);
++        emitJirl(LoongArch64.zero, LoongArch64.ra, 0);
++    }
++
++    @Override
++    public void emitPointerRet(Register a) {
++        emitIntRet(a);
++    }
++
++    @Override
++    public StackSlot emitPointerToStack(Register a) {
++        return emitLongToStack(a);
++    }
++
++    @Override
++    public StackSlot emitNarrowPointerToStack(Register a) {
++        return emitIntToStack(a);
++    }
++
++    @Override
++    public Register emitUncompressPointer(Register compressed, long base, int shift) {
++        if (shift > 0) {
++            emitShiftLeft(compressed, compressed, shift);
++        }
++
++        if (base != 0) {
++            emitLoadLong(scratchRegister, base);
++            emitAdd(compressed, compressed, scratchRegister);
++        }
++
++        return compressed;
++    }
++
++    private StackSlot emitDoubleToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.DOUBLE, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitDoubleToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.DOUBLE);
++        return emitDoubleToStack(ret, a);
++    }
++
++    private StackSlot emitFloatToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.SINGLE, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitFloatToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.SINGLE);
++        return emitFloatToStack(ret, a);
++    }
++
++    private StackSlot emitIntToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.DWORD, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitIntToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.DWORD);
++        return emitIntToStack(ret, a);
++    }
++
++    private StackSlot emitLongToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.QWORD, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitLongToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.QWORD);
++        return emitLongToStack(ret, a);
++    }
++
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java	2024-01-30 10:00:13.984739508 +0800
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -33,7 +33,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.MaxOopMapStackOffsetTest
+  */
+ 
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java	2024-01-30 10:00:13.984739508 +0800
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /test/lib /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.code
+@@ -33,7 +33,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.common
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java sparc/SPARCTestAssembler.java amd64/AMD64TestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java sparc/SPARCTestAssembler.java amd64/AMD64TestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm/native -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI  -Xbootclasspath/a:. jdk.vm.ci.code.test.NativeCallTest
+  */
+ package jdk.vm.ci.code.test;
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java	2024-01-30 10:00:13.984739508 +0800
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,7 +32,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.SimpleCodeInstallationTest
+  */
+ 
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java	2024-01-30 10:00:13.984739508 +0800
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,7 +32,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.SimpleDebugInfoTest
+  */
+ 
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java	2024-01-30 10:00:13.984739508 +0800
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,7 +32,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.VirtualObjectDebugInfoTest
+  */
+ 
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
+--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java	2024-01-30 10:00:14.021405737 +0800
+@@ -21,10 +21,17 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ 
+ /* @test
+  * @bug 8167409
+  * @requires (os.arch != "aarch64") & (os.arch != "arm")
++ * @requires (os.arch != "mips64el") & (os.arch != "loongarch64")
+  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs
+  */
+ package compiler.runtime.criticalnatives.argumentcorruption;
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
+--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java	2024-01-30 10:00:14.021405737 +0800
+@@ -21,10 +21,17 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ 
+ /* @test
+  * @bug 8167408
+  * @requires (os.arch != "aarch64") & (os.arch != "arm")
++ * @requires (os.arch != "mips64el") & (os.arch != "loongarch64")
+  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp
+  */
+ package compiler.runtime.criticalnatives.lookup;
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java	2024-01-30 10:00:14.024739030 +0800
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package compiler.testlibrary.sha.predicate;
+ 
+ import jdk.test.lib.Platform;
+@@ -63,10 +69,12 @@
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null),
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha1" }, null),
+               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha1" }, null),
++              // Basic instructions are used to implement SHA1 Intrinsics on LA, so "sha1" feature is not needed.
++              new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null),
+               // x86 variants
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha" },  null),
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha" },  null),
+-                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null))))));
++                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null)))))));
+ 
+     public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256"       }, null),
+@@ -74,12 +82,14 @@
+               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha256"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64le.*", new String[] { "sha"          }, null),
++              // Basic instructions are used to implement SHA256 Intrinsics on LA, so "sha256" feature is not needed.
++              new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null),
+               // x86 variants
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
+-                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
++                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
+ 
+     public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512"       }, null),
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
+--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java	2024-01-30 10:00:14.074738434 +0800
+@@ -22,6 +22,12 @@
+  */
+ 
+ /*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
++/*
+  * @test ReservedStackTest
+  *
+  * @requires vm.opt.DeoptimizeALot != true
+@@ -239,7 +245,7 @@
+         return Platform.isAix() ||
+             (Platform.isLinux() &&
+              (Platform.isPPC() || Platform.isS390x() || Platform.isX64() ||
+-              Platform.isX86())) ||
++              Platform.isX86() || Platform.isMIPS() || Platform.isLoongArch64())) ||
+             Platform.isOSX() ||
+             Platform.isSolaris();
+     }
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+--- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java	2024-01-30 10:00:14.158070775 +0800
+@@ -45,7 +45,7 @@
+  */
+ public class TestMutuallyExclusivePlatformPredicates {
+     private static enum MethodGroup {
+-        ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
++        ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86", "isMIPS", "isLoongArch64"),
+         BITNESS("is32bit", "is64bit"),
+         OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"),
+         VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"),
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
+--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java	2024-01-30 10:00:15.451388695 +0800
+@@ -23,6 +23,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021, These
++ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package jdk.jfr.event.os;
+ 
+ import java.util.List;
+@@ -54,8 +60,8 @@
+             Events.assertField(event, "hwThreads").atLeast(1);
+             Events.assertField(event, "cores").atLeast(1);
+             Events.assertField(event, "sockets").atLeast(1);
+-            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390");
+-            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390");
++            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch");
++            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch");
+         }
+     }
+ }
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/jdk/sun/security/pkcs11/PKCS11Test.java b/test/jdk/sun/security/pkcs11/PKCS11Test.java
+--- a/test/jdk/sun/security/pkcs11/PKCS11Test.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/jdk/sun/security/pkcs11/PKCS11Test.java	2024-01-30 10:00:15.654719606 +0800
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ // common infrastructure for SunPKCS11 tests
+ 
+ import java.io.BufferedReader;
+@@ -747,6 +753,9 @@
+                 "/usr/lib64/" });
+         osMap.put("Linux-ppc64-64", new String[] { "/usr/lib64/" });
+         osMap.put("Linux-ppc64le-64", new String[] { "/usr/lib64/" });
++        osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"});
++        osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/",
++                "/usr/lib64/" });
+         osMap.put("Linux-s390x-64", new String[] { "/usr/lib64/" });
+         osMap.put("Windows-x86-32", new String[] {});
+         osMap.put("Windows-amd64-64", new String[] {});
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
+--- a/test/lib/jdk/test/lib/Platform.java	2024-01-10 05:19:49.000000000 +0800
++++ b/test/lib/jdk/test/lib/Platform.java	2024-01-30 10:00:16.081381187 +0800
+@@ -21,6 +21,12 @@
+  * questions.
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ package jdk.test.lib;
+ 
+ import java.io.FileNotFoundException;
+@@ -226,6 +232,14 @@
+         return isArch("(i386)|(x86(?!_64))");
+     }
+ 
++    public static boolean isLoongArch64() {
++        return isArch("loongarch64");
++    }
++
++    public static boolean isMIPS() {
++        return isArch("mips.*");
++    }
++
+     public static String getOsArch() {
+         return osArch;
+     }
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java b/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java
+--- a/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java	1970-01-01 08:00:00.000000000 +0800
++++ b/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java	2024-01-30 10:00:16.094714362 +0800
+@@ -0,0 +1,87 @@
++//
++// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++package org.openjdk.bench.java.lang;
++
++import java.util.Random;
++import java.util.concurrent.TimeUnit;
++import org.openjdk.jmh.annotations.*;
++import org.openjdk.jmh.infra.Blackhole;
++
++@OutputTimeUnit(TimeUnit.MILLISECONDS)
++@State(Scope.Thread)
++@BenchmarkMode(Mode.Throughput)
++public class RotateBenchmark {
++
++  @Param({"1024"})
++  public int TESTSIZE;
++
++  @Param({"20"})
++  public int SHIFT;
++
++  public long [] larr;
++  public int  [] iarr;
++
++  public long [] lres;
++  public int  [] ires;
++
++
++  @Setup(Level.Trial)
++  public void BmSetup() {
++    Random r = new Random(1024);
++    larr = new long[TESTSIZE];
++    iarr = new int[TESTSIZE];
++    lres = new long[TESTSIZE];
++    ires = new int[TESTSIZE];
++
++    for (int i = 0; i < TESTSIZE; i++) {
++      larr[i] = r.nextLong();
++    }
++
++    for (int i = 0; i < TESTSIZE; i++) {
++      iarr[i] = r.nextInt();
++    }
++  }
++
++  @Benchmark
++  public void testRotateLeftI() {
++    for (int i = 0; i < TESTSIZE; i++)
++       ires[i] = Integer.rotateLeft(iarr[i], SHIFT);
++  }
++  @Benchmark
++  public void testRotateRightI() {
++    for (int i = 0; i < TESTSIZE; i++)
++       ires[i] = Integer.rotateRight(iarr[i], SHIFT);
++  }
++  @Benchmark
++  public void testRotateLeftL() {
++    for (int i = 0; i < TESTSIZE; i++)
++       lres[i] = Long.rotateLeft(larr[i], SHIFT);
++  }
++  @Benchmark
++  public void testRotateRightL() {
++    for (int i = 0; i < TESTSIZE; i++)
++       lres[i] = Long.rotateRight(larr[i], SHIFT);
++  }
++
++}
+diff -Naur -x .git -x .github -x .gitattributes -x .gitignore -x .jcheck a/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java b/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java
+--- a/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java	1970-01-01 08:00:00.000000000 +0800
++++ b/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java	2024-01-30 10:00:16.094714362 +0800
+@@ -0,0 +1,125 @@
++/*
++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package org.openjdk.bench.vm.compiler;
++
++import org.openjdk.jmh.annotations.*;
++import org.openjdk.jmh.infra.*;
++
++import java.util.concurrent.TimeUnit;
++import java.util.Random;
++
++@BenchmarkMode(Mode.Throughput)
++@OutputTimeUnit(TimeUnit.SECONDS)
++@State(Scope.Thread)
++public class MacroLogicOpt {
++  @Param({"64","128","256","512","1024","2048","4096"}) private int VECLEN;
++
++  private  int [] ai = new int[VECLEN];
++  private  int [] bi = new int[VECLEN];
++  private  int [] ci = new int[VECLEN];
++  private  int [] ri = new int[VECLEN];
++
++  private  long [] al = new long[VECLEN];
++  private  long [] bl = new long[VECLEN];
++  private  long [] cl = new long[VECLEN];
++  private  long [] dl = new long[VECLEN];
++  private  long [] el = new long[VECLEN];
++  private  long [] fl = new long[VECLEN];
++  private  long [] rl = new long[VECLEN];
++
++  private Random r = new Random();
++
++  @Setup
++  public void init() {
++    ai = new int[VECLEN];
++    bi = new int[VECLEN];
++    ci = new int[VECLEN];
++    ri = new int[VECLEN];
++
++    al = new long[VECLEN];
++    bl = new long[VECLEN];
++    cl = new long[VECLEN];
++    dl = new long[VECLEN];
++    el = new long[VECLEN];
++    fl = new long[VECLEN];
++    rl = new long[VECLEN];
++    for (int i=0; i<VECLEN; i++) {
++      ai[i] = r.nextInt();
++      bi[i] = r.nextInt();
++      ci[i] = r.nextInt();
++
++      al[i] = r.nextLong();
++      bl[i] = r.nextLong();
++      cl[i] = r.nextLong();
++      dl[i] = r.nextLong();
++      el[i] = r.nextLong();
++      fl[i] = r.nextLong();
++    }
++  }
++
++  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
++  private int run_workload1(int count, int [] a , int [] b, int [] c, int [] r) {
++      for(int i = 0 ; i < r.length ; i++)
++          r[i] = (((a[i] & b[i]) ^ (a[i] & c[i]) ^ (b[i] & c[i]))  &  ((~a[i] & b[i]) | (~b[i] & c[i])  | ~c[i] & a[i]));
++    return r[count];
++  }
++
++  @Benchmark
++  public  void workload1_caller(Blackhole bh) {
++    int r = 0;
++    for(int i = 0 ; i < 10000; i++)
++       r += run_workload1(i&(ri.length-1), ai, bi, ci, ri);
++    bh.consume(r);
++  }
++
++  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
++  private long run_workload2(int count, long [] a , long [] b, long [] c, long [] r) {
++      for(int i = 0 ; i < r.length ; i++)
++          r[i] = (((a[i] & b[i]) ^ (a[i] & c[i]) ^ (b[i] & c[i]))  &  ((~a[i] & b[i]) | (~b[i] & c[i])  | ~c[i] & a[i]));
++    return r[count];
++  }
++
++  @Benchmark
++  public void workload2_caller(Blackhole bh) {
++    long r = 0;
++    for(int i = 0 ; i < 100000; i++)
++       r += run_workload2(i&(rl.length-1), al, bl, cl, rl);
++    bh.consume(r);
++  }
++
++  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
++  private long run_workload3(int count, long [] a , long [] b, long [] c,
++                           long [] d, long [] e, long [] f, long [] r) {
++    for(int i = 0 ; i < r.length ; i++)
++      r[i] = (((~a[i] | ~b[i]) & (~c[i])) | (~d[i] & (~e[i] & f[i])));
++    return r[count];
++  }
++
++  @Benchmark
++  public void workload3_caller(Blackhole bh) {
++    long r = 0;
++    for(int i = 0 ; i < 10000; i++)
++       r += run_workload3(i&(ri.length-1), al, bl, cl, dl, el, fl, rl);
++    bh.consume(r);
++  }
++}
diff --git a/delete_expired_certificates.patch b/delete_expired_certificates.patch
index b57ab696789bca351152ccb85a9f269834ef5805..7f2e4192b5c25766655292bd715d242ee4a4852d 100644
--- a/delete_expired_certificates.patch
+++ b/delete_expired_certificates.patch
@@ -120,14 +120,15 @@ index 122a01901..c131bd493 100644
              + File.separator + "security" + File.separator + "cacerts";
  
      // The numbers of certs now.
--    private static final int COUNT = 106;
+-    private static final int COUNT = 108;
 +    private static final int COUNT = 103;
  
      // SHA-256 of cacerts, can be generated with
      // shasum -a 256 cacerts | sed -e 's/../&:/g' | tr '[:lower:]' '[:upper:]' | cut -c1-95
      private static final String CHECKSUM
--            = "61:5F:6D:C5:9C:A3:8A:65:3F:CB:F9:F5:26:04:23:F4:53:A6:8C:B3:8B:2B:0A:F0:66:7D:9E:67:B9:4D:AC:B7";
+-            = "81:D4:84:F6:92:78:A4:82:25:06:DC:42:25:C9:5D:6C:63:E4:99:CE:BC:ED:66:B3:8C:BA:E6:BA:6B:34:0F:01";
 +            = "AC:5D:D0:F5:D4:E9:7B:8E:69:B9:E3:6C:08:44:0D:CB:63:83:D5:A0:D4:DE:4F:17:4F:D3:83:6D:99:26:94:59";
+
      // map of cert alias to SHA-256 fingerprint
      @SuppressWarnings("serial")
      private static final Map<String, String> FINGERPRINT_MAP = new HashMap<>() {
diff --git a/jdk-updates-jdk11u-jdk-11.0.22-ga.tar.xz b/jdk-updates-jdk11u-jdk-11.0.22-ga.tar.xz
deleted file mode 100644
index 78684a6b6786cdb8bb6da27dfcda3fed126946db..0000000000000000000000000000000000000000
Binary files a/jdk-updates-jdk11u-jdk-11.0.22-ga.tar.xz and /dev/null differ
diff --git a/jdk-updates-jdk11u-jdk-11.0.23-ga.tar.xz b/jdk-updates-jdk11u-jdk-11.0.23-ga.tar.xz
new file mode 100644
index 0000000000000000000000000000000000000000..921bdb22895680fb23af8e04daff21e532027915
Binary files /dev/null and b/jdk-updates-jdk11u-jdk-11.0.23-ga.tar.xz differ
diff --git a/openjdk-11.spec b/openjdk-11.spec
index 720b4d76e7179a06ba5867015d3f2b9c82245b8e..f32c013106d8515c2e8db7df741f337f8956f859 100644
--- a/openjdk-11.spec
+++ b/openjdk-11.spec
@@ -55,6 +55,7 @@
 
 %global aarch64         aarch64
 %global riscv64         riscv64
+%global ppc64le         ppc64le
 
 # By default, we build a debug build during main build on JIT architectures
 %if %{with slowdebug}
@@ -110,15 +111,21 @@
 %ifarch %{aarch64}
 %global archinstall aarch64
 %endif
+%ifarch loongarch64
+%global archinstall loongarch64
+%endif
 %ifarch %{riscv64}
 %global archinstall riscv64
 %endif
+%ifarch %{ppc64le}
+%global archinstall ppc64le
+%endif
 
 %global with_systemtap 1
 
 # New Version-String scheme-style defines
 %global majorver 11
-%global securityver 22
+%global securityver 23
 # buildjdkver is usually same as %%{majorver},
 # but in time of bootstrap of next jdk, it is majorver-1,
 # and this it is better to change it here, on single place
@@ -134,12 +141,12 @@
 %global origin_nice     OpenJDK
 %global top_level_dir_name   %{origin}
 %global minorver        0
-%global buildver        7
+%global buildver        8
 %global patchver	0
 
 %global project		jdk-updates
 %global repo		jdk11u
-%global revision	jdk-11.0.22-ga
+%global revision	jdk-11.0.23-ga
 %global full_revision %{project}-%{repo}-%{revision}
 # priority must be 7 digits in total
 # setting to 1, so debug ones can have 0
@@ -561,9 +568,11 @@ exit 0
 %{_jvmdir}/%{sdkdir -- %{?1}}/bin/jstatd
 %{_jvmdir}/%{sdkdir -- %{?1}}/bin/rmic
 %{_jvmdir}/%{sdkdir -- %{?1}}/bin/serialver
+%ifnarch loongarch64
 %ifarch %{aarch64} x86_64
 %{_jvmdir}/%{sdkdir -- %{?1}}/bin/jaotc
 %endif
+%endif
 %{_jvmdir}/%{sdkdir -- %{?1}}/include
 %{_jvmdir}/%{sdkdir -- %{?1}}/lib/ct.sym
 %if %{with_systemtap}
@@ -803,6 +812,14 @@ Source11: nss.cfg.in
 # due to memory leak).
 Patch1000: rh1648249-add_commented_out_nss_cfg_provider_to_java_security.patch
 
+############################################
+#
+# LoongArch64 specific patches
+#
+############################################
+
+Patch2001: LoongArch64-support.patch
+
 #############################################
 #
 # OpenJDK specific patches
@@ -893,7 +910,7 @@ Patch92: 8295068-SSLEngine-throws-NPE-parsing-Certificate.patch
 # riscv64 specific patches
 #
 ############################################
-Patch2000: 2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch
+Patch2000: Add-riscv64-support.patch
 
 BuildRequires: elfutils-extra
 BuildRequires: autoconf
@@ -1126,6 +1143,7 @@ fi
 pushd %{top_level_dir_name}
 
 # OpenJDK patches
+%ifnarch loongarch64 ppc64le
 %ifarch riscv64
 %patch2000 -p1
 %else
@@ -1190,6 +1208,10 @@ pushd %{top_level_dir_name}
 %patch91 -p1
 %patch92 -p1
 %endif
+%endif
+%ifarch loongarch64
+%patch2001 -p1
+%endif
 popd # openjdk
 
 # %patch1000
@@ -1280,8 +1302,10 @@ bash ../configure \
     --with-version-build=%{buildver} \
     --with-version-pre="" \
     --with-version-opt="" \
+%ifnarch loongarch64 ppc64le
     --with-vendor-version-string="%{vendor_version_string}" \
     --with-vendor-name="Bisheng" \
+%endif
     --with-vendor-url="https://openeuler.org/" \
     --with-vendor-bug-url="https://gitee.com/src-openeuler/openjdk-11/issues/" \
     --with-vendor-vm-bug-url="https://gitee.com/src-openeuler/openjdk-11/issues/" \
@@ -1395,6 +1419,7 @@ done
 # javaCalls.cpp:58 should map to:
 # http://hg.openjdk.java.net/jdk8u/jdk8u/hotspot/file/ff3b27e6bcc2/src/share/vm/runtime/javaCalls.cpp#l58
 # Using line number 1 might cause build problems. See:
+%ifnarch loongarch64
 gdb -q "$JAVA_HOME/bin/java" <<EOF | tee gdb.out
 handle SIGSEGV pass nostop noprint
 handle SIGILL pass nostop noprint
@@ -1407,6 +1432,7 @@ end
 run -version
 EOF
 #grep 'JavaCallWrapper::JavaCallWrapper' gdb.out
+%endif
 
 # Check src.zip has all sources. See RHBZ#1130490
 jar -tf $JAVA_HOME/lib/src.zip | grep 'sun.misc.Unsafe'
@@ -1699,6 +1725,19 @@ cjc.mainProgram(arg)
 
 
 %changelog
+* Thu Apr 18 2024 huangjie <huangjie150@huawei.com> - 1:11.0.23.9-0
+- modified 8224675-Late-GC-barrier-insertion-for-ZGC.patch
+- modified delete_expired_certificates.patch 
+
+* Wed Mar 13 2024 jiahua.yu <jiahua.yu@shingroup.cn> - 1:11.0.22.7-3
+- init support for arch ppc64le
+
+* Mon Feb 26 2024 misaka00251 <liuxin@iscas.ac.cn> - 1:11.0.22.7-2
+- Fix build on riscv64
+
+* Tue Feb 20 2024 Leslie Zhai <zhaixiang@loongson.cn> - 1:11.0.22.7-1
+- init support of LoongArch64
+
 * Wed Jan 17 2024 DXwangg <wangjiawei80@huawei.com> - 1:11.0.22.7-0
 - update to 11.0.22+7(GA)
 - modified delete_expired_certificates.patch